You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_mem_assigner.cc 106 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/graph_mem_assigner.h"
  17. #include <cstring>
  18. #include <set>
  19. #include "common/math/math_util.h"
  20. #include "common/util/error_manager/error_manager.h"
  21. #include "framework/common/debug/ge_log.h"
  22. #include "framework/common/debug/log.h"
  23. #include "graph/build/memory/hybrid_mem_assigner.h"
  24. #include "graph/build/memory/var_mem_assign_util.h"
  25. #include "graph/build/memory/block_mem_assigner.h"
  26. #include "graph/common/omg_util.h"
  27. #include "graph/debug/ge_attr_define.h"
  28. #include "graph/ge_attr_value.h"
  29. #include "graph/manager/graph_var_manager.h"
  30. #include "graph/utils/tensor_utils.h"
  31. #include "graph/utils/type_utils.h"
  32. #include "graph/build/memory/buffer_pool_mem_assigner.h"
  33. namespace {
  34. const int kAllInputAddrIsAtomic = -1;
  35. const int kVirtualInputNodeMemoryReuse = 0;
  36. const int kVirtualOutputNodeMemoryReuse = 1;
  37. const int kPrevNextDistanceNum = 2;
  38. const int64_t kInvalidStream = -1;
  39. const char *const kEngineNameGeLocal = "DNN_VM_GE_LOCAL_OP_STORE";
  40. // One state per bit cannot be repeated
  41. enum ContinuousType { kTypeInput = 1, kTypeInputNoPadding = 2, kTypeOutput = 4, kTypeOutputNoPadding = 8 };
  42. int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol,
  43. const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors,
  44. const ge::NodePtr &node, const uint32_t i) {
  45. ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut);
  46. auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString());
  47. if (iter1 == anchor_to_symbol.end()) {
  48. return ge::kInvalidOffset;
  49. }
  50. auto out_symbol = iter1->second;
  51. auto iter2 = symbol_to_anchors.find(out_symbol);
  52. if (iter2 == symbol_to_anchors.end()) {
  53. return ge::kInvalidOffset;
  54. }
  55. for (const auto &node_index_io : iter2->second) {
  56. if (node_index_io.value_ == out_symbol) {
  57. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  58. vector<int64_t> symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset();
  59. if (node_index_io.index_ >= symbol_output_list.size()) {
  60. return ge::kInvalidOffset;
  61. }
  62. GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i,
  63. output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_));
  64. return symbol_output_list.at(node_index_io.index_);
  65. }
  66. }
  67. return ge::kInvalidOffset;
  68. }
  69. } // namespace
  70. namespace ge {
  71. Status VariableMemoryAssigner::Assign() {
  72. Status result = ge::VarMemAssignUtil::AssignConstantOpMemory(compute_graph_);
  73. if (result != ge::SUCCESS) {
  74. return result;
  75. }
  76. result = ge::VarMemAssignUtil::AssignVarMemory(compute_graph_);
  77. if (result != ge::SUCCESS) {
  78. return result;
  79. }
  80. return ge::SUCCESS;
  81. }
  82. Status VariableMemoryAssigner::AssignVarAttr2Nodes() {
  83. Status result = ge::VarMemAssignUtil::AssignVarAttr2Nodes(compute_graph_);
  84. if (result != ge::SUCCESS) {
  85. return result;
  86. }
  87. return ge::SUCCESS;
  88. }
  89. Status VariableMemoryAssigner::AssignMemory2HasRefAttrNode() {
  90. Status result = ge::VarMemAssignUtil::AssignMemory2HasRefAttrNode(compute_graph_);
  91. if (result != ge::SUCCESS) {
  92. return result;
  93. }
  94. return ge::SUCCESS;
  95. }
  96. Status GraphMemoryAssigner::AssignMemory() {
  97. ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_));
  98. if (mem_assigner->Assign() != ge::SUCCESS) {
  99. GELOGE(ge::FAILED, "[Assign][GraphMem]graph_id:%u, graph_name:%s",
  100. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  101. return ge::FAILED;
  102. }
  103. MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
  104. memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
  105. if (mem_assigner->GetP2PMemOffset() >= 0) {
  106. MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
  107. memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
  108. }
  109. auto session_id = compute_graph_->GetSessionID();
  110. int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM);
  111. auto variable_assigner =
  112. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  113. if (variable_assigner == nullptr) {
  114. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  115. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  116. REPORT_CALL_ERROR("E19999", "New Object:VariableMemoryAssigner failed, "
  117. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  118. return ge::FAILED;
  119. }
  120. if (variable_assigner->Assign() != ge::SUCCESS) {
  121. return ge::FAILED;
  122. }
  123. int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign;
  124. GELOGD("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign);
  125. mem_assigner_ = std::move(mem_assigner);
  126. return ge::SUCCESS;
  127. }
  128. ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() {
  129. auto variable_assigner =
  130. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  131. if (variable_assigner == nullptr) {
  132. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  133. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  134. REPORT_CALL_ERROR("E19999", "New Object:VariableMemoryAssigner failed, "
  135. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  136. return ge::FAILED;
  137. }
  138. if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) {
  139. return ge::FAILED;
  140. }
  141. return ge::SUCCESS;
  142. }
  143. ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() {
  144. auto variable_assigner =
  145. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  146. if (variable_assigner == nullptr) {
  147. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  148. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  149. REPORT_CALL_ERROR("E19999", "New Object:VariableMemoryAssigner failed, "
  150. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  151. }
  152. if (variable_assigner->AssignMemory2HasRefAttrNode() != ge::SUCCESS) {
  153. return ge::FAILED;
  154. }
  155. return ge::SUCCESS;
  156. }
  157. ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
  158. int64_t dim_index, int64_t &output_mem_size,
  159. int64_t &batch_dim_num, int64_t &out_size) {
  160. graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size);
  161. if (graph_status != GRAPH_SUCCESS) {
  162. GELOGE(FAILED, "[Get][TensorSize]");
  163. REPORT_CALL_ERROR("E19999", "Get tensor size failed");
  164. return FAILED;
  165. }
  166. GeShape output_shape = output_desc->GetShape();
  167. std::vector<int64_t> output_dims = output_shape.GetDims();
  168. if (dim_index >= static_cast<int64_t>(output_dims.size())) {
  169. REPORT_INNER_ERROR("E19999", "Inner param dim_index value:%ld invalid, bigger than dim size:%lu in shape:%s",
  170. dim_index, output_dims.size(), output_shape.ToString().c_str());
  171. GELOGE(FAILED, "[Check][Param:dim_index]value:%ld invalid, bigger than dim size:%lu in shape:%s",
  172. dim_index, output_dims.size(), output_shape.ToString().c_str());
  173. return FAILED;
  174. }
  175. for (int64_t index = 0; index < dim_index; index++) {
  176. FMK_INT64_MULCHECK(batch_dim_num, output_dims[index]);
  177. batch_dim_num *= output_dims[index];
  178. output_dims[index] = 1;
  179. }
  180. output_shape = GeShape(output_dims);
  181. Format out_format = output_desc->GetFormat();
  182. DataType data_type = output_desc->GetDataType();
  183. graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size);
  184. if (graph_status != GRAPH_SUCCESS) {
  185. GELOGE(graph_status, "[Calc][TensorSize]");
  186. return FAILED;
  187. }
  188. if (output_mem_size < 0) {
  189. REPORT_INNER_ERROR("E19999", "After calculating, tensor memory size:%ld invalid, less than 0. "
  190. "shape:%s, format:%s, dtype:%s, maybe has dynamic shape",
  191. output_mem_size,
  192. output_shape.ToString().c_str(),
  193. TypeUtils::FormatToSerialString(out_format).c_str(),
  194. TypeUtils::DataTypeToSerialString(data_type).c_str());
  195. GELOGE(FAILED, "[Check][TensorSize]value:%ld invalid after calc, less than 0. shape:%s, format:%s, dtype:%s, "
  196. "maybe has dynamic shape",
  197. output_mem_size,
  198. output_shape.ToString().c_str(),
  199. TypeUtils::FormatToSerialString(out_format).c_str(),
  200. TypeUtils::DataTypeToSerialString(data_type).c_str());
  201. return FAILED;
  202. }
  203. return SUCCESS;
  204. }
  205. Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) {
  206. if (memory_offset_.empty()) {
  207. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected, graph_id:%u, graph_name:%s",
  208. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  209. GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, "
  210. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  211. return ge::FAILED;
  212. }
  213. GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph), "ReAssignContinuousMemory Failed!");
  214. GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph), "ReAssignAtomicMemory Failed!");
  215. GE_CHK_STATUS_RET(AssignBufferPoolMemory(), "AssignBufferPoolMemory Failed!");
  216. size_t total_mem_offset = 0;
  217. for (auto pair : memory_offset_) {
  218. mem_type_to_offset[pair.first] = pair.second.mem_offset_;
  219. total_mem_offset += pair.second.mem_offset_;
  220. }
  221. auto session_id = compute_graph_->GetSessionID();
  222. if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) {
  223. GELOGE(ge::FAILED, "[Check][TotalMemOffset] %zu is greater than memory manager malloc max size %zu, "
  224. "graph_id:%u, graph_name:%s, reduce your batchsize or scale your model may solve problem",
  225. total_mem_offset, VarManager::Instance(session_id)->GetGraphMemoryMaxSize(),
  226. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  227. for (auto iter : mem_type_to_offset) {
  228. ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"},
  229. {std::to_string(iter.first), std::to_string(iter.second), "featuremap",
  230. std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())});
  231. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
  232. iter.second, iter.first);
  233. }
  234. return ge::FAILED;
  235. }
  236. return SUCCESS;
  237. }
  238. Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
  239. BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
  240. if (priority_assigner == nullptr) {
  241. REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected, graph_id:%u, graph_name:%s",
  242. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  243. GELOGE(FAILED, "[Check][InnerData:priority_assigner]nullptr is invalid, "
  244. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  245. return ge::FAILED;
  246. }
  247. size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM];
  248. // set offset for zero copy block
  249. for (auto &memory_block : priority_assigner->GetMemoryBlocks()) {
  250. if (memory_block == nullptr || memory_block->deleted_block_ || !memory_block->is_zero_copy_) {
  251. continue;
  252. }
  253. memory_block->Resize();
  254. memory_block->SetHeadOffset(mem_offset[RT_MEMORY_HBM]);
  255. mem_offset[RT_MEMORY_HBM] += memory_block->Size();
  256. memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1);
  257. }
  258. // set offset for zero copy nodes
  259. priority_assigner->SetOpMemOffset(true);
  260. zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp;
  261. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  262. if (iter == memory_offset_.end()) {
  263. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  264. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  265. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  266. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  267. return FAILED;
  268. }
  269. iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM];
  270. GELOGD("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp,
  271. zero_mem_copy_size);
  272. return SUCCESS;
  273. }
  274. uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) {
  275. if (op_desc == nullptr) {
  276. return 0;
  277. };
  278. bool is_continuous = false;
  279. uint32_t continuous_type = 0;
  280. // If GetBool fail, is_continuous is false.
  281. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_continuous);
  282. if (is_continuous) {
  283. continuous_type |= kTypeInput;
  284. } else {
  285. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_continuous);
  286. if (is_continuous) {
  287. bool attr_reuse = false;
  288. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  289. if (attr_reuse) {
  290. continuous_type |= kTypeInputNoPadding;
  291. }
  292. }
  293. }
  294. is_continuous = false;
  295. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_continuous);
  296. if (is_continuous) {
  297. continuous_type |= kTypeOutput;
  298. } else {
  299. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, is_continuous);
  300. if (is_continuous) {
  301. bool attr_reuse = false;
  302. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  303. if (attr_reuse) {
  304. continuous_type |= kTypeOutputNoPadding;
  305. }
  306. }
  307. }
  308. if (continuous_type != 0) {
  309. GELOGI("[Get][MemType:Continuous]Current node %s, value is %d", op_desc->GetName().c_str(), continuous_type);
  310. }
  311. return continuous_type;
  312. }
  313. Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type,
  314. int64_t &tensor_size, int64_t &nopadding_size) {
  315. if ((op_desc == nullptr) || (output_desc == nullptr)) {
  316. REPORT_INNER_ERROR("E19999", "InnerData param op_desc or output_desc is nullptr, not expected");
  317. GELOGE(FAILED, "[Check][Param]op_desc or output_desc is nullptr");
  318. }
  319. tensor_size = 0;
  320. nopadding_size = 0;
  321. bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0);
  322. if (is_nopadding) {
  323. int64_t attr_dim_index;
  324. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  325. if (!get_attr_dim_flag) {
  326. REPORT_INNER_ERROR("E19999", "Get Attr:%s failed, op_name:%s",
  327. ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str());
  328. GELOGE(FAILED, "[Get][Attr:%s]fail for op_name:%s",
  329. ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str());
  330. return FAILED;
  331. }
  332. // Calculate tensor real size of each piece of data and out size of complete data
  333. int64_t batch_dim_num = 1;
  334. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) !=
  335. SUCCESS) {
  336. REPORT_CALL_ERROR("E19999", "CalculateTensorRealSizeAndOutSize failed, attr_dim_index:%ld, op_name:%s",
  337. attr_dim_index, op_desc->GetName().c_str());
  338. GELOGE(FAILED, "[Calculate][NopaddingSize]failed for node %s, attr_dim_index:%ld",
  339. op_desc->GetName().c_str(), attr_dim_index);
  340. return FAILED;
  341. }
  342. } else {
  343. if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) {
  344. REPORT_INNER_ERROR("E19999", "Get Tensor Size failed, op_name:%s", op_desc->GetName().c_str());
  345. GELOGE(FAILED, "[Get][TensorSize]failed in padding case, op_name:%s", op_desc->GetName().c_str());
  346. return FAILED;
  347. }
  348. }
  349. if ((tensor_size < 0) || (nopadding_size < 0)) {
  350. REPORT_INNER_ERROR("E19999", "GetMemorySize fail, "
  351. "tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s",
  352. tensor_size, nopadding_size, op_desc->GetName().c_str());
  353. GELOGE(FAILED, "[Get][MemorySize]tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s",
  354. tensor_size, nopadding_size, op_desc->GetName().c_str());
  355. return FAILED;
  356. }
  357. return SUCCESS;
  358. }
  359. void AlignMemOffset(int64_t &mem_align_size) {
  360. if (mem_align_size <= 0) {
  361. return;
  362. }
  363. mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  364. }
  365. bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op_desc) {
  366. bool is_peer_output_continuous = false;
  367. // If GetBool fail, is_peer_output_continuous is false.
  368. (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous);
  369. // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and
  370. // continuous output of the previous node is the same, we can support it. If size != 1, there may be
  371. // conflict between the two, we can not support it.
  372. auto peer_output_size = peer_op_desc->GetOutputsSize();
  373. GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1),
  374. std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  375. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  376. " requires continuous output. There may be conflict between the two." +
  377. "This node is not supported now.";
  378. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  379. return true;);
  380. bool is_peer_reference = false;
  381. // If GetBool fail, is_peer_reference is false.
  382. (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference);
  383. GE_IF_BOOL_EXEC(is_peer_reference,
  384. std::string warning = "[Check][Continuous]Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  385. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  386. " is ref. There may be conflict between the two.";
  387. GELOGW("%s", warning.c_str());
  388. return false;);
  389. return false;
  390. }
  391. /// op1 -> node -> op2
  392. /// return true when node is ref from input, and op1 or op2 is reuse input from output
  393. bool GraphMemoryAssigner::IsRefFromInputOpCascade(const NodePtr &node) {
  394. bool ref_from_input = false;
  395. int32_t reuse_in_index = -1;
  396. for (const auto &out_anchor : node->GetAllOutDataAnchors()) {
  397. ref_from_input = GraphUtils::IsRefFromInput(out_anchor, reuse_in_index);
  398. if (ref_from_input) {
  399. GELOGD("IsRefFromInputOpCascade: cur node:%s:%d is ref", node->GetName().c_str(), reuse_in_index);
  400. break;
  401. }
  402. }
  403. for (const auto &in_anchor : node->GetAllInDataAnchors()) {
  404. const auto &peer_out_anchor = in_anchor->GetPeerOutAnchor();
  405. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  406. if (ref_from_input && GraphUtils::IsRefFromInput(peer_out_anchor, reuse_in_index)) {
  407. GELOGD("IsRefFromInputOpCascade: in node[%s] is ref, reuse index is:%d",
  408. peer_out_anchor->GetOwnerNode()->GetName().c_str(), reuse_in_index);
  409. return true;
  410. }
  411. }
  412. for (const auto &out_anchor : node->GetAllOutDataAnchors()) {
  413. const auto &peer_in_anchors = out_anchor->GetPeerInDataAnchors();
  414. for (const auto &peer_in_anchor : peer_in_anchors) {
  415. auto peer_in_node = peer_in_anchor->GetOwnerNode();
  416. GE_IF_BOOL_EXEC(peer_in_node == nullptr, continue);
  417. for (const auto &peer_in_node_out_anchor : peer_in_node->GetAllOutDataAnchors()) {
  418. if (ref_from_input && GraphUtils::IsRefFromInput(peer_in_node_out_anchor, reuse_in_index)) {
  419. GELOGD("IsRefFromInputOpCascade: out node[%s] is ref, reuse index is:%d",
  420. peer_in_node_out_anchor->GetOwnerNode()->GetName().c_str(), reuse_in_index);
  421. return true;
  422. }
  423. }
  424. }
  425. }
  426. return false;
  427. }
  428. /// node:in0(in0 reuse out0) -> peer_node:out0
  429. /// update peer_node's 0th output offset with node's 0th output offset
  430. Status GraphMemoryAssigner::UpdateRefOpOffsetReverse(const NodePtr &node) {
  431. map<int32_t, int32_t> out2ins;
  432. GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node:%s",
  433. node->GetName().c_str());
  434. auto op_desc = node->GetOpDesc();
  435. GE_CHECK_NOTNULL(op_desc);
  436. vector<int64_t> output_list = op_desc->GetOutputOffset();
  437. for (const auto &out2in : out2ins) {
  438. auto reuse_in_anchor = node->GetInDataAnchor(out2in.second);
  439. GE_CHECK_NOTNULL(reuse_in_anchor);
  440. auto peer_out_anchor = reuse_in_anchor->GetPeerOutAnchor();
  441. GE_CHECK_NOTNULL(peer_out_anchor);
  442. auto peer_node = peer_out_anchor->GetOwnerNode();
  443. GE_CHECK_NOTNULL(peer_node);
  444. auto peer_op_desc = peer_node->GetOpDesc();
  445. GE_CHECK_NOTNULL(peer_op_desc);
  446. vector<int64_t> peer_output_list = peer_op_desc->GetOutputOffset();
  447. if ((peer_out_anchor->GetIdx() >= static_cast<int>(peer_output_list.size()))
  448. || (out2in.first >= static_cast<int32_t>(output_list.size()))) {
  449. GELOGW("out of range, peer_out_anchor:%d, peer_output_list size:%zu, out2in:%d, output_list size:%zu",
  450. peer_out_anchor->GetIdx(),
  451. peer_output_list.size(),
  452. out2in.first,
  453. output_list.size());
  454. continue;
  455. }
  456. peer_output_list.at(peer_out_anchor->GetIdx()) = output_list.at(out2in.first);
  457. peer_op_desc->SetOutputOffset(peer_output_list);
  458. GELOGD("UpdateRefOpOffsetReverse: Node[%s] output[%d] is set from node[%s] output index[%d] offset[%ld]",
  459. peer_node->GetName().c_str(),
  460. peer_out_anchor->GetIdx(),
  461. node->GetName().c_str(),
  462. out2in.first,
  463. output_list.at(out2in.first));
  464. }
  465. return SUCCESS;
  466. }
  467. Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
  468. Status ret;
  469. // Stored nodes which need assign continuous input memory in `reverse topo order`
  470. std::vector<NodePtr> nodes_stack;
  471. std::map<NodePtr, uint32_t> node_2_continuous_type;
  472. // Traverse nodes
  473. for (auto &node : compute_graph_->GetAllNodes()) {
  474. GE_CHECK_NOTNULL(node);
  475. uint32_t continuous_type;
  476. auto iter = node_2_continuous_type.find(node);
  477. if (iter == node_2_continuous_type.end()) {
  478. continuous_type = GetContinuousMemoryType(node->GetOpDesc());
  479. node_2_continuous_type.emplace(node, continuous_type);
  480. } else {
  481. continuous_type = iter->second;
  482. }
  483. // Assign continuous input memory
  484. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  485. if (IsRefFromInputOpCascade(node)) {
  486. nodes_stack.push_back(node);
  487. GELOGD("Ref: Push node:%s to stack", node->GetName().c_str());
  488. } else if (continuous_input) {
  489. if (AssignContinuousInputMemoryWithAtomicProcessDirectly(node, node_2_continuous_type)) {
  490. GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, continuous_type),
  491. "[Assign][Memory:Continuous:Input]fail for node:%s", node->GetName().c_str())
  492. } else {
  493. nodes_stack.push_back(node);
  494. GELOGD("Continuous: Push node:%s to stack", node->GetName().c_str());
  495. }
  496. }
  497. // Assign continuous output memory
  498. int64_t memory_type = RT_MEMORY_HBM;
  499. bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0);
  500. if (continuous_output) {
  501. GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"),
  502. "[Get][MemType]fail for node:%s", node->GetName().c_str());
  503. ret = AssignContinuousOutputMemory(node, memory_type, continuous_type);
  504. if (ret != ge::SUCCESS) {
  505. GELOGE(ret, "[Assign][Memory:Continuous:Ouput]fail for node:%s", node->GetName().c_str());
  506. return ret;
  507. }
  508. }
  509. }
  510. // Assign continuous input memory in `reverse topo order` which stored before
  511. while (!nodes_stack.empty()){
  512. auto node = nodes_stack.back();
  513. nodes_stack.pop_back();
  514. auto iter = node_2_continuous_type.find(node);
  515. if (iter == node_2_continuous_type.end()) {
  516. REPORT_INNER_ERROR("E19999", "Get ContinuousType from node_2_continuous_type map failed for node:%s",
  517. node->GetName().c_str());
  518. GELOGE(FAILED, "[Get][ContinuousType] find fail for node:%s", node->GetName().c_str());
  519. return FAILED;
  520. }
  521. if (((iter->second & kTypeInput) != 0) || ((iter->second & kTypeInputNoPadding) != 0)) {
  522. GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true),
  523. "[Assign][Memory:Continuous:Input]fail for node:%s.", node->GetName().c_str())
  524. } else {
  525. GE_CHK_STATUS_RET(UpdateRefOpOffsetReverse(node),
  526. "[Update][Memory:Reference:Output]fail for node:%s", node->GetName().c_str())
  527. }
  528. }
  529. for (auto pair : memory_offset_) {
  530. GELOGD("[Reassign][Memory:Continuous]At last, memory type = %ld, mem offset = %zu", pair.first,
  531. pair.second.mem_offset_);
  532. }
  533. return ge::SUCCESS;
  534. }
  535. Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
  536. int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) {
  537. GELOGI("[Assign][Memory:Input:Continuous]start for Current node %s", node->GetName().c_str());
  538. auto iter = memory_offset_.find(memory_type);
  539. if (iter == memory_offset_.end()) {
  540. REPORT_INNER_ERROR("E19999", "find memory offset fail for mem_type:%ld, "
  541. "for node:%s, ", memory_type, node->GetName().c_str());
  542. GELOGE(FAILED, "[Find][MemOffset]fail for mem_type:%ld, when AssignContinuousInputMemory for node:%s",
  543. memory_type, node->GetName().c_str());
  544. return FAILED;
  545. }
  546. // The head and tail of hcom continuous input should be added 512
  547. iter->second.mem_offset_ += MEM_ALIGN_SIZE;
  548. continuous_mem_start = iter->second.mem_offset_;
  549. int64_t mem_offset = iter->second.mem_offset_;
  550. int64_t extra_memory_size = 0;
  551. bool is_continuous_input_allocated = false;
  552. auto op_desc = node->GetOpDesc();
  553. GE_CHECK_NOTNULL(op_desc);
  554. vector<int64_t> output_list_this = op_desc->GetOutputOffset();
  555. if (output_list_this.empty()) {
  556. REPORT_INNER_ERROR("E19999", "No output offset in node :%s, not expected",
  557. node->GetName().c_str());
  558. GELOGE(FAILED, "[Get][OutputOffset] empty is invalid, node:%s", node->GetName().c_str());
  559. return FAILED;
  560. }
  561. (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated);
  562. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  563. GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue);
  564. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  565. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
  566. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  567. GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
  568. GE_IF_BOOL_EXEC(IsContinuousInputConflict(node, peer_op_desc), return PARAM_INVALID;);
  569. int64_t tensor_desc_size = 0;
  570. int64_t nopadding_size = 0;
  571. int64_t real_size = 0;
  572. std::vector<int64_t> offsets_of_fusion = {};
  573. bool lx_fusion = AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_of_fusion);
  574. lx_fusion = lx_fusion && !offsets_of_fusion.empty();
  575. if (lx_fusion) {
  576. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(offsets_of_fusion.size())) {
  577. std::string error = "fusion: peer node:" + FmtToStr(peer_op_desc->GetName()) +
  578. " anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) +
  579. " is out of range:" + FmtToStr(offsets_of_fusion.size());
  580. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  581. return FAILED;
  582. }
  583. nopadding_size = offsets_of_fusion[peer_out_data_anchor->GetIdx()];
  584. tensor_desc_size = nopadding_size;
  585. } else {
  586. if (GetMemorySize(node->GetOpDesc(), peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()),
  587. continuous_type, tensor_desc_size, nopadding_size) != ge::SUCCESS) {
  588. return FAILED;
  589. }
  590. }
  591. bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion;
  592. vector<int64_t> output_list = peer_op_desc->GetOutputOffset();
  593. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_list.size())) {
  594. std::string error = "peer node:" + FmtToStr(peer_op_desc->GetName()) +
  595. " anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) +
  596. " is out of range:" + FmtToStr(output_list.size());
  597. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  598. return FAILED;
  599. }
  600. // when continuous input has been allocated first input is beginning offset
  601. bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0);
  602. if (is_allocated_first_input) {
  603. std::map<int32_t, int32_t> out2ins;
  604. GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node: %s", node->GetName().c_str());
  605. // output is beginning offset, set offset for input; only support this case now
  606. if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) {
  607. auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx());
  608. output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first);
  609. peer_op_desc->SetOutputOffset(output_list);
  610. GELOGI("[Update][Offset]Node %s out %d ref in %d input node %s, use output offset %ld update %ld",
  611. node->GetName().c_str(), out2ins.begin()->first, out2ins.begin()->second,
  612. peer_op_desc->GetName().c_str(), output_list_this.at(out2ins.begin()->first), peer_output_offset);
  613. } else {
  614. GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu.", node->GetName().c_str(),
  615. out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size());
  616. }
  617. // first input is beginning offset
  618. mem_offset = output_list.at(peer_out_data_anchor->GetIdx());
  619. continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx());
  620. } else {
  621. // set offset for input
  622. output_list.at(peer_out_data_anchor->GetIdx()) = mem_offset;
  623. peer_op_desc->SetOutputOffset(output_list);
  624. }
  625. int64_t align_size = tensor_desc_size;
  626. if (is_nopadding) {
  627. mem_offset += nopadding_size;
  628. extra_memory_size += (tensor_desc_size - nopadding_size);
  629. real_size = nopadding_size;
  630. } else {
  631. ge::AlignMemOffset(align_size);
  632. mem_offset += align_size;
  633. // The head and tail of hcom continuous input should be added 512
  634. extra_memory_size = MEM_ALIGN_SIZE;
  635. real_size = tensor_desc_size;
  636. }
  637. GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] "
  638. "size[%zu] realsize[%ld] nopadding size[%d]", node->GetOwnerComputeGraph()->GetName().c_str(),
  639. peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(),
  640. output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type,
  641. is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding);
  642. }
  643. mem_offset += extra_memory_size;
  644. ge::AlignMemOffset(mem_offset);
  645. continuous_mem_size = mem_offset - continuous_mem_start;
  646. if (is_continuous_input_allocated) {
  647. // not allocate memory here, so no need add 512 in header
  648. iter->second.mem_offset_ -= MEM_ALIGN_SIZE;
  649. } else {
  650. iter->second.mem_offset_ = mem_offset;
  651. }
  652. return SUCCESS;
  653. }
  654. Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) {
  655. auto in_data_anchor_list = node->GetAllInDataAnchors();
  656. if (in_data_anchor_list.empty()) {
  657. REPORT_INNER_ERROR("E19999", "InAnchor list empty in node:%s, not expect",
  658. node->GetName().c_str());
  659. GELOGE(FAILED, "[Get][InAnchor]empty is invalid, node:%s", node->GetName().c_str());
  660. return FAILED;
  661. }
  662. auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor();
  663. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr,
  664. REPORT_INNER_ERROR("E19999", "PeerAcnhor is null, not expect for node:%s",
  665. node->GetName().c_str());
  666. GELOGE(ge::FAILED, "[Check][PeerAnchor]null is invalid, node:%s", node->GetName().c_str());
  667. return ge::FAILED);
  668. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  669. GE_IF_BOOL_EXEC(peer_op_desc == nullptr,
  670. REPORT_INNER_ERROR("E19999", "PeerOpDesc is null, not expect for node:%s",
  671. node->GetName().c_str());
  672. GELOGE(ge::FAILED, "[Check][PeerOpDesc]null is invalid, node:%s", node->GetName().c_str());
  673. return ge::FAILED);
  674. vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset();
  675. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) {
  676. REPORT_INNER_ERROR("E19999", "PeerAnchorIndex:%d bigger than in_offset size:%lu, judge invalid for node:%s",
  677. peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str());
  678. GELOGE(FAILED, "[Check][Index:PeerOutDataAnchor]PeerIndex:%d bigger than in_offset size:%lu, node:%s",
  679. peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str());
  680. return FAILED;
  681. }
  682. mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx());
  683. return SUCCESS;
  684. }
  685. Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type,
  686. uint32_t continuous_type) {
  687. GELOGI("Current node %s needs continuous output.", node->GetName().c_str());
  688. auto out_op_desc = node->GetOpDesc();
  689. GE_IF_BOOL_EXEC(out_op_desc == nullptr,
  690. REPORT_INNER_ERROR("E19999", "OpDesc is null, not expect for node:%s",
  691. node->GetName().c_str());
  692. GELOGE(ge::FAILED, "[Check][OpDesc]null is invalid, node:%s", node->GetName().c_str()));
  693. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  694. if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) {
  695. REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, invalid in node:%s",
  696. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  697. GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s",
  698. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  699. return ge::FAILED;
  700. }
  701. int64_t mem_offset = 0;
  702. bool is_nopadding = ((continuous_type & kTypeOutputNoPadding) != 0);
  703. if (is_nopadding) {
  704. // out tensor memory must be reused input tensor memory
  705. if (GetFirstInputPeerOutOutputOffset(node, mem_offset) != SUCCESS) {
  706. return ge::FAILED;
  707. }
  708. } else {
  709. // Get the reference type of the node, default is false
  710. bool is_ref = false;
  711. // If GetBool fail, is_ref is false.
  712. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  713. // If the output is ref type and refers to the ref of an input, the name of the output
  714. // and the input are the same. Ge encounters ref type, finds matching relationship according
  715. // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast
  716. if (is_ref) {
  717. GELOGI("Current node %s no needs assign continuous output because reference input by name.",
  718. node->GetName().c_str());
  719. return SUCCESS;
  720. }
  721. mem_offset = output_list[0];
  722. }
  723. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  724. output_list[out_data_anchor->GetIdx()] = mem_offset;
  725. int64_t tensor_desc_size = 0;
  726. int64_t nopadding_size = 0;
  727. if (GetMemorySize(out_op_desc, out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()), continuous_type,
  728. tensor_desc_size, nopadding_size) != ge::SUCCESS) {
  729. return FAILED;
  730. }
  731. if (is_nopadding) {
  732. mem_offset += nopadding_size;
  733. } else {
  734. mem_offset += tensor_desc_size;
  735. ge::AlignMemOffset(mem_offset);
  736. }
  737. GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]"
  738. " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
  739. out_op_desc->GetName().c_str(), node->GetType().c_str(), out_data_anchor->GetIdx(),
  740. output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL,
  741. is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding);
  742. }
  743. out_op_desc->SetOutputOffset(output_list);
  744. return ge::SUCCESS;
  745. }
  746. Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
  747. // key:dynamic batch, batch name
  748. map<string, map<NodePtr, vector<NodePtr>>> normal_atomic_and_clean_nodes_map;
  749. map<string, vector<NodePtr>> connecting_output_atomic_nodes;
  750. Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes);
  751. if (status != SUCCESS) {
  752. GELOGE(status, "[Filter][AtomicNode]failed in graph_id:%u, graph_name:%s",
  753. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  754. return status;
  755. }
  756. auto mem_iter = memory_offset_.find(RT_MEMORY_HBM);
  757. if (mem_iter == memory_offset_.end()) {
  758. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  759. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  760. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  761. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  762. return FAILED;
  763. }
  764. int64_t batch_atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  765. int64_t batch_max_mem_offset = batch_atomic_mem_start;
  766. for (auto &iter_batch : normal_atomic_and_clean_nodes_map) {
  767. mem_iter->second.mem_offset_ = batch_atomic_mem_start;
  768. for (auto &iter : iter_batch.second) {
  769. int64_t atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  770. GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start);
  771. for (auto &atomic_node : iter.second) {
  772. vector<int64_t> mem_offset_end;
  773. status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end);
  774. if (status != SUCCESS) {
  775. GELOGE(status, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.",
  776. atomic_node->GetName().c_str());
  777. return status;
  778. }
  779. }
  780. int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start;
  781. if (atomic_mem_size != 0) {
  782. GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM),
  783. "[Set][Attr]fail for atomic addr clean node %s.", iter.first->GetName().c_str());
  784. }
  785. }
  786. batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_));
  787. }
  788. mem_iter->second.mem_offset_ = static_cast<size_t>(batch_max_mem_offset);
  789. batch_atomic_mem_start = batch_max_mem_offset;
  790. for (auto &iter_batch : connecting_output_atomic_nodes) {
  791. mem_iter->second.mem_offset_ = batch_atomic_mem_start;
  792. if (AssignConnectNetOutputAtomicMemory(iter_batch.second) != SUCCESS) {
  793. GELOGE(FAILED, "[Assign][Memory]for nodes that connect to netoutput failed."
  794. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  795. return FAILED;
  796. }
  797. batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_));
  798. }
  799. mem_iter->second.mem_offset_ = static_cast<size_t>(batch_max_mem_offset);
  800. return SUCCESS;
  801. }
  802. Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(
  803. map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map,
  804. map<string, vector<NodePtr>> &connecting_output_atomic_nodes) {
  805. GE_CHECK_NOTNULL(compute_graph_);
  806. for (const auto &node : compute_graph_->GetAllNodes()) {
  807. if (node->GetType() == ATOMICADDRCLEAN) {
  808. map<string, vector<NodePtr>> tmp_normal_atomic_nodes;
  809. const auto &out_control_anchor = node->GetOutControlAnchor();
  810. GE_CHECK_NOTNULL(out_control_anchor);
  811. for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
  812. if (peer_in_control_anchor != nullptr) {
  813. auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
  814. auto peer_in_node_desc = peer_in_node->GetOpDesc();
  815. if (peer_in_node_desc != nullptr) {
  816. bool is_atomic_node = false;
  817. // If GetBool fail, is_atomic_node is false.
  818. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
  819. if (is_atomic_node) {
  820. bool is_reference = false;
  821. // If GetBool fail, is_reference is false.
  822. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference);
  823. if (is_reference) {
  824. REPORT_INNER_ERROR("E19999", "Op:%s cannot have both atomic and is_reference attribute, "
  825. "not support now", peer_in_node_desc->GetName().c_str());
  826. GELOGE(FAILED, "[Check][Attr]Op:%s cannot have both atomic and is_reference attribute, "
  827. "not support now", peer_in_node_desc->GetName().c_str());
  828. return ge::PARAM_INVALID;
  829. }
  830. std::string batch_label;
  831. (void)ge::AttrUtils::GetStr(peer_in_node_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  832. vector<int> is_connecting_output;
  833. // If GetBool fail, attr is_connecting_output is an empty vector.
  834. (void) ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output);
  835. if (is_connecting_output.empty()) {
  836. tmp_normal_atomic_nodes[batch_label].emplace_back(peer_in_node);
  837. continue;
  838. }
  839. connecting_output_atomic_nodes[batch_label].emplace_back(peer_in_node);
  840. tmp_normal_atomic_nodes[batch_label].clear();
  841. break;
  842. }
  843. }
  844. }
  845. }
  846. for (auto &it_atomic_node : tmp_normal_atomic_nodes) {
  847. if (!it_atomic_node.second.empty()) {
  848. normal_atomic_nodes_map[it_atomic_node.first][node] = it_atomic_node.second;
  849. }
  850. }
  851. }
  852. }
  853. return SUCCESS;
  854. }
  855. Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
  856. vector<int64_t> &mem_offset_end) {
  857. auto node_op_desc = node->GetOpDesc();
  858. // Assign atomic node output memory
  859. Status ret = AssignAtomicOutputMemory(node, mem_offset_end);
  860. if (ret != SUCCESS) {
  861. GELOGE(ret, "[Assign][Memory:Ouput:Atomic]Failed for node:%s.", node_op_desc->GetName().c_str());
  862. return ret;
  863. }
  864. // Check and assign atomic node workspace memory
  865. map<string, map<int64_t, int64_t>> atomic_workspace_info;
  866. atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info);
  867. if (!atomic_workspace_info.empty()) {
  868. bool is_fusion_node = false;
  869. // If GetBool fail, is_fusion_node is false.
  870. (void) ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);
  871. if (is_fusion_node) {
  872. // Assign fusion atomic node workspace memory
  873. ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  874. } else {
  875. // Assign single ordinary atomic node workspace memory, not include fusion node
  876. ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  877. }
  878. if (ret != SUCCESS) {
  879. GELOGE(ret, "[Assign][Memory:Atomic:Workspace]fail for node:%s.", node_op_desc->GetName().c_str());
  880. return ret;
  881. }
  882. } else {
  883. GELOGW("Current atomic node %s does not have attr ATOMIC_WORKSPACE_INFO.", node->GetName().c_str());
  884. }
  885. return SUCCESS;
  886. }
  887. Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) {
  888. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  889. if (iter == memory_offset_.end()) {
  890. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  891. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  892. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  893. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  894. return FAILED;
  895. }
  896. for (auto &node : connect_netoutput_nodes) {
  897. GE_CHECK_NOTNULL(node);
  898. if (node->GetOpDesc() == nullptr) {
  899. GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str());
  900. continue;
  901. }
  902. // Atomic memory start addr
  903. int64_t original_atomic_mem_start = static_cast<int64_t>(iter->second.mem_offset_);
  904. GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.",
  905. node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start);
  906. vector<int64_t> mem_offset_end;
  907. if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
  908. GELOGE(FAILED, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.",
  909. node->GetName().c_str());
  910. return FAILED;
  911. }
  912. // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately.
  913. if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) {
  914. GELOGE(FAILED, "[Set][Attr:IndependentAtomic]fail for node:%s", node->GetName().c_str());
  915. return FAILED;
  916. }
  917. }
  918. return SUCCESS;
  919. }
  920. Status GraphMemoryAssigner::AssignReferenceMemory() {
  921. for (auto &node : compute_graph_->GetDirectNode()) {
  922. // Get the reference type of the node, default is false
  923. bool is_ref = false;
  924. // If GetBool fail, is_ref is false.
  925. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  926. if (!is_ref) {
  927. continue;
  928. }
  929. GELOGI("Current node %s needs to support the reference relationship between output and input.",
  930. node->GetName().c_str());
  931. auto out_op_desc = node->GetOpDesc();
  932. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  933. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  934. if (out_op_desc->GetOutputsSize() > output_list.size()) {
  935. REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, judge invalid in node:%s",
  936. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  937. GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s",
  938. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  939. return ge::FAILED;
  940. }
  941. map<string, int> input_name_index;
  942. for (const auto &input_name : out_op_desc->GetAllInputNames()) {
  943. int index = out_op_desc->GetInputIndexByName(input_name);
  944. input_name_index.emplace(input_name, index);
  945. }
  946. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  947. string out_data_anchor_name = out_op_desc->GetOutputNameByIndex(out_data_anchor->GetIdx());
  948. auto iter = input_name_index.find(out_data_anchor_name);
  949. if (iter != input_name_index.end()) {
  950. int index = iter->second;
  951. GELOGI("Reference memory: input anchor index = %d, input anchor name = %s, output anchor name = %s.", index,
  952. iter->first.c_str(), out_data_anchor_name.c_str());
  953. GE_CHECK_NOTNULL(node->GetInDataAnchor(index));
  954. auto peer_out_anchor = node->GetInDataAnchor(index)->GetPeerOutAnchor();
  955. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  956. int peer_out_anchor_index = peer_out_anchor->GetIdx();
  957. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  958. auto peer_out_op_desc = peer_out_node->GetOpDesc();
  959. GE_CHECK_NOTNULL(peer_out_op_desc);
  960. output_list[out_data_anchor->GetIdx()] = peer_out_op_desc->GetOutputOffset()[peer_out_anchor_index];
  961. GELOGI("Reference output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld]",
  962. node->GetOwnerComputeGraph()->GetName().c_str(), peer_out_op_desc->GetName().c_str(),
  963. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], peer_out_op_desc->GetStreamId());
  964. } else {
  965. GELOGI("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]",
  966. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(),
  967. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId());
  968. }
  969. }
  970. out_op_desc->SetOutputOffset(output_list);
  971. }
  972. return ge::SUCCESS;
  973. }
  974. bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
  975. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  976. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  977. if (peer_out_data_anchor == nullptr) {
  978. continue;
  979. }
  980. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  981. if (peer_op_desc == nullptr) {
  982. continue;
  983. }
  984. if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) ||
  985. (peer_op_desc->GetType() == VARIABLE)) {
  986. REPORT_INNER_ERROR("E19999", "node(type:%s, name:%s) link to atomic node(name:%s), "
  987. "this situation not supported now",
  988. peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str());
  989. GELOGE(ge::FAILED, "[Check][Link]node(type:%s, name:%s) link to atomic node(name:%s), "
  990. "this situation not supported now",
  991. peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str());
  992. return false;
  993. }
  994. }
  995. return true;
  996. }
  997. Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) {
  998. auto op_desc = node->GetOpDesc();
  999. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED);
  1000. mem_offset_end.clear();
  1001. GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());
  1002. vector<int64_t> atomic_output_index;
  1003. // If GetListInt fail, atomic_output_index is empty.
  1004. (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
  1005. // Check atomic output
  1006. vector<int64_t> output_list = op_desc->GetOutputOffset();
  1007. if (atomic_output_index.size() > output_list.size()) {
  1008. std::string error =
  1009. "Op:" + FmtToStr(node->GetName()) + "'s size:" + FmtToStr(atomic_output_index.size()) +
  1010. " of atomic_output_index is more than the size:" + FmtToStr(output_list.size()) + " of output_list";
  1011. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1012. return ge::FAILED;
  1013. }
  1014. auto output_list_size = static_cast<int64_t>(output_list.size());
  1015. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  1016. if (iter == memory_offset_.end()) {
  1017. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  1018. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1019. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  1020. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1021. return FAILED;
  1022. }
  1023. for (auto &output_index : atomic_output_index) {
  1024. if (output_index >= output_list_size) {
  1025. std::string error =
  1026. "Op:" + FmtToStr(node->GetName()) + "'s atomic_output index:" + FmtToStr(output_index) +
  1027. " is more than the size:" + FmtToStr(output_list_size) + " of output_list.";
  1028. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1029. return ge::PARAM_INVALID;
  1030. }
  1031. // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here
  1032. bool is_assigned_mem = false;
  1033. if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) {
  1034. GELOGE(ge::FAILED, "[Get][MemoryAssignmentStatus]fail for node %s, out_index:%ld",
  1035. node->GetName().c_str(), output_index);
  1036. return ge::FAILED;
  1037. }
  1038. // If you have already assigned an atomic address, skip it, and you don't need to reassign it.
  1039. if (is_assigned_mem) {
  1040. GELOGI(
  1041. "Node %s atomic output : we have assigned atomic memory as the input of next node in "
  1042. "ReAssignContinuousMemory function.",
  1043. op_desc->GetName().c_str());
  1044. continue;
  1045. }
  1046. auto output_desc = op_desc->GetAllOutputsDescPtr().at(output_index);
  1047. int64_t size = 0;
  1048. if (ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS) {
  1049. GELOGI("Get size failed");
  1050. }
  1051. output_list[output_index] = iter->second.mem_offset_;
  1052. std::string batch_label;
  1053. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1054. GELOGI("[IMAS]Atomic output : Set %s name[%s] optype[%s] output[%ld] offset to [%zu] stream_id[%ld] memtype[%u] "
  1055. "size[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(),
  1056. node->GetType().c_str(), output_index, iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM,
  1057. size, size, batch_label.c_str());
  1058. iter->second.mem_offset_ += size;
  1059. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  1060. mem_offset_end.emplace_back(iter->second.mem_offset_);
  1061. }
  1062. op_desc->SetOutputOffset(output_list);
  1063. return ge::SUCCESS;
  1064. }
  1065. Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index,
  1066. bool &is_mem_assigned) {
  1067. if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) {
  1068. std::string error =
  1069. "Op:" + FmtToStr(node->GetName()) + "'s output index:" + FmtToStr(output_index) +
  1070. " is more than the size:" + FmtToStr(node->GetAllOutDataAnchors().size()) + " of node's AllOutDataAnchors.";
  1071. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1072. return ge::PARAM_INVALID;
  1073. }
  1074. auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
  1075. GE_CHECK_NOTNULL(out_data_anchor);
  1076. auto input_anchors = out_data_anchor->GetPeerInDataAnchors();
  1077. for (auto &input_anchor : input_anchors) {
  1078. auto output_node = input_anchor->GetOwnerNode();
  1079. /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address
  1080. /// has been assigned
  1081. vector<int64_t> atomic_input_index;
  1082. (void) ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index);
  1083. if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) {
  1084. is_mem_assigned = true;
  1085. break;
  1086. }
  1087. }
  1088. return SUCCESS;
  1089. }
  1090. Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1091. map<string, map<int64_t, int64_t>> &workspace_info,
  1092. vector<int64_t> &mem_offset_end) {
  1093. GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str());
  1094. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1095. if (mem_type_iter == memory_offset_.end()) {
  1096. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  1097. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1098. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  1099. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1100. return FAILED;
  1101. }
  1102. vector<int64_t> workspace_vector = op_desc->GetWorkspace();
  1103. for (auto iter = workspace_info.begin(); iter != workspace_info.end(); ++iter) {
  1104. if (op_desc->GetName() != iter->first) {
  1105. std::string error = "The node name" + FmtToStr(op_desc->GetName()) +
  1106. " and the node name" + FmtToStr(iter->first) + " in workspace info are inconsistent.";
  1107. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1108. return ge::PARAM_INVALID;
  1109. }
  1110. if (iter->second.empty()) {
  1111. continue;
  1112. }
  1113. for (auto &info_iter : iter->second) {
  1114. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1115. auto workspace_size = info_iter.second;
  1116. if (workspace_index >= workspace_vector.size()) {
  1117. std::string error = "The workspace index:" + FmtToStr(workspace_index) +
  1118. " is more than the size:" + FmtToStr(workspace_vector.size()) + " of workspace vector in op:" +
  1119. op_desc->GetName().c_str();
  1120. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1121. return ge::PARAM_INVALID;
  1122. }
  1123. workspace_vector[workspace_index] = mem_type_iter->second.mem_offset_;
  1124. std::string batch_label;
  1125. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1126. GELOGI(
  1127. "[IMAS]Atomic ordinary workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1128. "memtype[%u] size[%ld] real_size[%ld] batch[%s].",
  1129. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index,
  1130. mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size,
  1131. batch_label.c_str());
  1132. mem_type_iter->second.mem_offset_ += workspace_size;
  1133. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  1134. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1135. }
  1136. }
  1137. op_desc->SetWorkspace(workspace_vector);
  1138. return SUCCESS;
  1139. }
  1140. Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1141. map<string, map<int64_t, int64_t>> &workspace_info,
  1142. vector<int64_t> &mem_offset_end) {
  1143. GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str());
  1144. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1145. if (mem_type_iter == memory_offset_.end()) {
  1146. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  1147. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1148. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  1149. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1150. return FAILED;
  1151. }
  1152. map<string, map<int64_t, int64_t>> sub_node_workspace_offset;
  1153. for (auto &iter : workspace_info) {
  1154. if (iter.second.empty()) {
  1155. continue;
  1156. }
  1157. map<int64_t, int64_t> index_offset;
  1158. for (auto &info_iter : iter.second) {
  1159. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1160. auto workspace_size = info_iter.second;
  1161. size_t workspace_offset = mem_type_iter->second.mem_offset_;
  1162. std::string batch_label;
  1163. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1164. GELOGI(
  1165. "[IMAS]Atomic fusion workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1166. "memtype[%u] ssize[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(),
  1167. op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index, mem_type_iter->second.mem_offset_,
  1168. op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, batch_label.c_str());
  1169. mem_type_iter->second.mem_offset_ += workspace_size;
  1170. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  1171. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1172. index_offset.insert(std::make_pair(workspace_index, workspace_offset));
  1173. }
  1174. sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset));
  1175. }
  1176. if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) {
  1177. REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for node:%s",
  1178. EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str());
  1179. GELOGE(FAILED, "[Set][Attr:%s]fail for node:%s.",
  1180. EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str());
  1181. return FAILED;
  1182. }
  1183. return SUCCESS;
  1184. }
  1185. Status GraphMemoryAssigner::CheckOffset() {
  1186. std::map<std::string, std::string> anchor_to_symbol;
  1187. std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors;
  1188. if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
  1189. REPORT_CALL_ERROR("E19999", "Get ref-mapping for graph %s failed", compute_graph_->GetName().c_str());
  1190. GELOGE(FAILED, "[Get][RefMapping]fail for graph %s", compute_graph_->GetName().c_str());
  1191. return FAILED;
  1192. }
  1193. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1194. GE_CHECK_NOTNULL(node->GetOpDesc());
  1195. vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset();
  1196. for (auto input : input_list) {
  1197. if (input == ge::kInvalidOffset) {
  1198. std::string error = "Invalid input offset" + FmtToStr(ge::kInvalidOffset) +
  1199. + " in node" + FmtToStr(node->GetName());
  1200. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1201. return FAILED;
  1202. }
  1203. }
  1204. bool need_update_output = false;
  1205. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  1206. for (uint32_t i = 0; i < output_list.size(); ++i) {
  1207. if (output_list[i] == ge::kInvalidOffset) {
  1208. std::string error = "Invalid output offset" + FmtToStr(ge::kInvalidOffset) +
  1209. + " in node" + FmtToStr(node->GetName());
  1210. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1211. return FAILED;
  1212. }
  1213. if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) {
  1214. auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i);
  1215. if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) {
  1216. output_list[i] = symbol_offset;
  1217. need_update_output = true;
  1218. }
  1219. }
  1220. }
  1221. if (need_update_output) {
  1222. node->GetOpDesc()->SetOutputOffset(output_list);
  1223. }
  1224. vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace();
  1225. for (auto workspace : workspace_list) {
  1226. if (workspace == ge::kInvalidOffset) {
  1227. std::string error = "Invalid workspace" + FmtToStr(ge::kInvalidOffset) +
  1228. + " in node" + FmtToStr(node->GetName());
  1229. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1230. return FAILED;
  1231. }
  1232. }
  1233. // check reuse input and output
  1234. GE_CHK_STATUS_RET(CheckRefNodeOffset(node), "[Check][Offset]fail for node: %s", node->GetName().c_str());
  1235. }
  1236. return SUCCESS;
  1237. }
  1238. ge::Status GraphMemoryAssigner::CheckRefNodeOffset(const NodePtr &node) {
  1239. GE_CHECK_NOTNULL(node);
  1240. std::map<int32_t, int32_t> out2ins;
  1241. GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node: %s", node->GetName().c_str());
  1242. auto opdesc = node->GetOpDesc();
  1243. GE_CHECK_NOTNULL(opdesc);
  1244. auto output_list = opdesc->GetOutputOffset();
  1245. auto input_list = opdesc->GetInputOffset();
  1246. for (const auto &out2in : out2ins) {
  1247. auto out_i = out2in.first;
  1248. if (static_cast<size_t>(out_i) >= output_list.size()) {
  1249. std::string error = "Node" + FmtToStr(opdesc->GetName()) + "output offset size" +
  1250. FmtToStr(output_list.size()) + "should bigger than ref out index" + FmtToStr(out_i);
  1251. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1252. return ge::FAILED;
  1253. }
  1254. auto in_i = out2in.second;
  1255. if (static_cast<size_t>(in_i) >= input_list.size()) {
  1256. std::string error = "Node" + FmtToStr(opdesc->GetName()) + "input offset size" +
  1257. FmtToStr(input_list.size()) + "should bigger than ref input index" + FmtToStr(in_i);
  1258. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1259. return ge::FAILED;
  1260. }
  1261. if (output_list[out_i] != input_list[in_i]) {
  1262. std::string error = "Node" + FmtToStr(opdesc->GetName()) + "input offset " + FmtToStr(input_list[in_i]) +
  1263. "should equal to output offset" + FmtToStr(output_list[out_i]) + "with ref in" +
  1264. FmtToStr(in_i) + "to output" + FmtToStr(out_i);
  1265. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1266. return ge::FAILED;
  1267. }
  1268. }
  1269. return ge::SUCCESS;
  1270. }
  1271. ge::Status GraphMemoryAssigner::SetInputOffset() {
  1272. if (memory_offset_.empty()) {
  1273. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected, graph_id:%u, graph_name:%s",
  1274. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1275. GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, "
  1276. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1277. }
  1278. for (auto pair : memory_offset_) {
  1279. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
  1280. pair.second.mem_offset_, pair.first);
  1281. }
  1282. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1283. if (UpdateOpInputOffset(node) != ge::SUCCESS) {
  1284. GELOGE(ge::FAILED, "[Update][Offset:Input]fail for op:%s", node->GetName().c_str());
  1285. return ge::FAILED;
  1286. }
  1287. }
  1288. return ge::SUCCESS;
  1289. }
  1290. NodePtr GraphMemoryAssigner::GetKnownInputNode(const NodePtr &node) const {
  1291. if (!node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
  1292. return node;
  1293. }
  1294. if (NodeUtils::IsDynamicShape(node)) {
  1295. return node;
  1296. }
  1297. return NodeUtils::GetParentInput(node);
  1298. }
  1299. ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1300. uint32_t parent_index = 0;
  1301. if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  1302. return SUCCESS;
  1303. }
  1304. // Subgraph Data Node, check for constant input.
  1305. std::string op_type;
  1306. const auto &in_node = NodeUtils::GetParentInput(node);
  1307. if (NodeUtils::GetConstOpType(in_node, op_type)) {
  1308. input_list = in_node->GetOpDesc()->GetOutputOffset();
  1309. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output.
  1310. return SUCCESS; // Constant input.
  1311. }
  1312. // Memory allocated for dynamic shape subgraph Data.
  1313. if (NodeUtils::IsDynamicShape(node)) {
  1314. return SUCCESS;
  1315. }
  1316. const auto &owner = node->GetOwnerComputeGraph();
  1317. const auto &parent_desc = owner->GetParentNode()->GetOpDesc();
  1318. const auto parent_inputs = parent_desc->GetInputOffset();
  1319. if (parent_inputs.size() <= parent_index) {
  1320. std::string error = "Get Parent input offset failed, node is " + FmtToStr(node->GetName()) +
  1321. + ", input_size is " + FmtToStr(parent_inputs.size()) + ", parent index is " +
  1322. FmtToStr(parent_index);
  1323. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1324. return FAILED;
  1325. }
  1326. input_list = {parent_inputs[parent_index]};
  1327. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input.
  1328. return SUCCESS;
  1329. }
  1330. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1331. vector<int64_t> origin_input_list;
  1332. vector<int64_t> memory_type;
  1333. auto tmp_op_desc = node->GetOpDesc();
  1334. origin_input_list = tmp_op_desc->GetInputOffset();
  1335. int64_t valid_input_index = 0;
  1336. bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
  1337. std::map<int32_t, int32_t> out2ins;
  1338. GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node: %s", node->GetName().c_str());
  1339. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1340. vector<int64_t> output_list;
  1341. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1342. if (peer_out_anchor == nullptr) {
  1343. continue;
  1344. }
  1345. // If the current node not broadcast, the OutputOffset of the previous node is used to update the input_list
  1346. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1347. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1348. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1349. output_list = last_peer_out_op_desc->GetOutputOffset();
  1350. auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx());
  1351. if (output_list.size() > static_cast<size_t>(out_index)) {
  1352. bool is_l1_type = false;
  1353. int64_t input_offset = output_list.at(out_index);
  1354. if (has_mem_type_attr && !origin_input_list.empty()) {
  1355. auto input_size = tmp_op_desc->GetInputsSize();
  1356. auto ori_input_offset_list_size = origin_input_list.size();
  1357. auto mem_type_size = memory_type.size();
  1358. if ((input_size != mem_type_size) || (input_size != ori_input_offset_list_size)) {
  1359. std::string error = "Node" + FmtToStr(tmp_op_desc->GetName()) +
  1360. + " input_size" + FmtToStr(input_size) + " diff from memory_type_size" +
  1361. FmtToStr(mem_type_size) + " from ori_input_offset_list_size" +
  1362. FmtToStr(ori_input_offset_list_size);
  1363. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1364. return ge::FAILED;
  1365. }
  1366. GELOGD("Node[%s] input[%d] has origin offset[%ld]", tmp_op_desc->GetName().c_str(), anchor->GetIdx(),
  1367. origin_input_list[valid_input_index]);
  1368. // L1 keep original input_offset
  1369. is_l1_type = (memory_type[valid_input_index] == RT_MEMORY_L1);
  1370. if (is_l1_type) {
  1371. input_offset = origin_input_list[valid_input_index];
  1372. } else {
  1373. // hbm input_offset = original input_offset + output_offset
  1374. input_offset = origin_input_list[valid_input_index] + output_list.at(out_index);
  1375. }
  1376. }
  1377. const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
  1378. if (in_node->GetType() == CONSTANT) {
  1379. GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx()));
  1380. GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset));
  1381. }
  1382. if (!is_l1_type) {
  1383. // update ref output_offset when input change
  1384. GE_CHK_STATUS_RET(UpdateRefOpOutputOffset(node, out2ins, anchor->GetIdx(), input_offset),
  1385. "[Update][RefOffset]fail for node: %s", node->GetName().c_str());
  1386. }
  1387. GELOGD("Node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", tmp_op_desc->GetName().c_str(),
  1388. anchor->GetIdx(), peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), out_index,
  1389. input_offset);
  1390. input_list.emplace_back(input_offset);
  1391. valid_input_index++;
  1392. }
  1393. }
  1394. return ge::SUCCESS;
  1395. }
  1396. ge::Status GraphMemoryAssigner::UpdateRefOpOutputOffset(const NodePtr &node, const std::map<int32_t, int32_t> &out2ins,
  1397. const int ref_in, const int64_t input_offset) const {
  1398. auto opdesc = node->GetOpDesc();
  1399. GE_CHECK_NOTNULL(opdesc);
  1400. for (const auto &out2in : out2ins) {
  1401. auto out_i = out2in.first;
  1402. auto in_i = out2in.second;
  1403. if (in_i == ref_in) {
  1404. auto origin_output_list = opdesc->GetOutputOffset();
  1405. if (static_cast<size_t>(out_i) >= origin_output_list.size()) {
  1406. std::string error = "Node" + FmtToStr(opdesc->GetName()) + "output offset size" +
  1407. FmtToStr(origin_output_list.size()) + "should bigger than ref out index" + FmtToStr(out_i);
  1408. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1409. return ge::FAILED;
  1410. }
  1411. origin_output_list[out_i] = input_offset;
  1412. opdesc->SetOutputOffset(origin_output_list);
  1413. GELOGI("Node[%s] output[%d] is updated from reuse input index[%d] to offset[%ld]", opdesc->GetName().c_str(),
  1414. out_i, ref_in, input_offset);
  1415. }
  1416. }
  1417. return ge::SUCCESS;
  1418. }
  1419. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const {
  1420. GE_CHECK_NOTNULL(node->GetOpDesc());
  1421. vector<int64_t> input_list;
  1422. if (node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) {
  1423. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1424. vector<int64_t> output_list;
  1425. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1426. if (peer_out_anchor == nullptr) {
  1427. continue;
  1428. }
  1429. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1430. // If the current node is broadcast and the preceding node is variable, because InputOffset has been set
  1431. // in function:AssignVarAttr2Nodes, then the InputOffset of the broadcast node is taken to update the input_list.
  1432. // Otherwise, the OutputOffset of the previous node is used to update the input_list.
  1433. if (last_peer_out_node->GetType() != VARIABLE) {
  1434. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1435. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1436. output_list = last_peer_out_op_desc->GetOutputOffset();
  1437. if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
  1438. input_list.emplace_back(output_list.at(peer_out_anchor->GetIdx()));
  1439. }
  1440. } else {
  1441. vector<int64_t> cur_node_input_list;
  1442. auto cur_node_op_desc = node->GetOpDesc();
  1443. GE_CHECK_NOTNULL(cur_node_op_desc);
  1444. cur_node_input_list = cur_node_op_desc->GetInputOffset();
  1445. if (cur_node_input_list.size() > static_cast<size_t>(anchor->GetIdx())) {
  1446. input_list.emplace_back(cur_node_input_list.at(anchor->GetIdx()));
  1447. }
  1448. }
  1449. }
  1450. } else if (node->GetType() == DATA_TYPE) {
  1451. if (UpdateConstArgsOffset(node, input_list) != SUCCESS) {
  1452. GELOGE(FAILED, "[Update][Offset:Input:Const]fail for node:%s ", node->GetName().c_str());
  1453. return FAILED;
  1454. }
  1455. } else {
  1456. if (UpdateOpInputOffset(node, input_list) != SUCCESS) {
  1457. GELOGE(FAILED, "[Update][Offset:Input]fail for node:%s", node->GetName().c_str());
  1458. return FAILED;
  1459. }
  1460. }
  1461. node->GetOpDesc()->SetInputOffset(input_list);
  1462. return SUCCESS;
  1463. }
  1464. Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
  1465. const vector<int64_t> &mem_offset_end, int64_t memory_type) {
  1466. GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start);
  1467. // Parsing offset and size vectors
  1468. vector<int64_t> memory_offset_start;
  1469. vector<int64_t> memory_offset_size;
  1470. memory_offset_start.emplace_back(atomic_mem_start);
  1471. for (size_t i = 0; i < mem_offset_end.size(); ++i) {
  1472. memory_offset_start.emplace_back(mem_offset_end[i]);
  1473. // Number 1 means element index
  1474. auto size = memory_offset_start[i + 1] - memory_offset_start[i];
  1475. memory_offset_size.emplace_back(size);
  1476. }
  1477. memory_offset_start.pop_back();
  1478. const auto &in_control_anchor = node->GetInControlAnchor();
  1479. if (!memory_offset_size.empty() && in_control_anchor != nullptr) {
  1480. for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1481. if (peer_out_control_anchor == nullptr) {
  1482. continue;
  1483. }
  1484. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1485. auto peer_out_node_desc = peer_out_node->GetOpDesc();
  1486. if (peer_out_node_desc == nullptr) {
  1487. continue;
  1488. }
  1489. GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(),
  1490. peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str());
  1491. if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
  1492. if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) {
  1493. GELOGE(FAILED, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str());
  1494. return FAILED;
  1495. }
  1496. }
  1497. }
  1498. }
  1499. return SUCCESS;
  1500. }
  1501. ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start,
  1502. const vector<int64_t> &atomic_mem_size, int64_t memory_type) {
  1503. auto node_op_desc = node->GetOpDesc();
  1504. if (node_op_desc != nullptr) {
  1505. GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
  1506. vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
  1507. vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
  1508. workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1509. workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1510. node_op_desc->SetWorkspace(workspace_vector);
  1511. node_op_desc->SetWorkspaceBytes(workspace_byte_vector);
  1512. std::vector<int64_t> mem_start_vector;
  1513. // If GetListInt fail, mem_start_vector is empty.
  1514. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
  1515. mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1516. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
  1517. REPORT_INNER_ERROR("E19999", "Set Attr:%s failed, op_name:%s",
  1518. ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str());
  1519. GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s",
  1520. ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str());
  1521. return FAILED);
  1522. std::vector<int64_t> mem_size_vector;
  1523. // If GetListInt fail, mem_size_vector is empty.
  1524. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
  1525. mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1526. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
  1527. REPORT_INNER_ERROR("E19999", "Set Attr:%s failed, op_name:%s",
  1528. ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str());
  1529. GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s",
  1530. ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str());
  1531. return FAILED);
  1532. std::stringstream ss;
  1533. for (auto iter : atomic_mem_start) {
  1534. ss << iter << " ";
  1535. }
  1536. string atomic_mem_start_str = ss.str();
  1537. ss.clear();
  1538. ss.str("");
  1539. for (auto iter : atomic_mem_size) {
  1540. ss << iter << " ";
  1541. }
  1542. string atomic_mem_size_str = ss.str();
  1543. GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] optype[%s] output[0] offset to [%s] streamid[%ld]"
  1544. " memtype[%ld] size[%s]",node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
  1545. node->GetType().c_str(), atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), memory_type,
  1546. atomic_mem_size_str.c_str());
  1547. }
  1548. return SUCCESS;
  1549. }
  1550. void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size, int64_t memory_type) {
  1551. if (mem_align_size <= 0) {
  1552. return;
  1553. }
  1554. auto iter = memory_offset_.find(memory_type);
  1555. if (iter == memory_offset_.end()) {
  1556. GELOGW("Memory offset don't have memory type[%ld].", memory_type);
  1557. return;
  1558. }
  1559. iter->second.mem_offset_ =
  1560. (iter->second.mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size;
  1561. }
  1562. ge::Status GraphMemoryAssigner::GetNodeListMemoryType(const vector<NodePtr> &nodes, int32_t mem_reuse_model,
  1563. int64_t &memory_type) {
  1564. memory_type = RT_MEMORY_HBM;
  1565. // In the dynamic batch scenario, the memory attributes of nodes are the same.
  1566. for (auto &n : nodes) {
  1567. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  1568. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"),
  1569. "[Get][MemType:input]fail for node:%s", n->GetName().c_str())
  1570. break;
  1571. }
  1572. if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  1573. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"),
  1574. "[Get][MemType:output]fail for node:%s", n->GetName().c_str())
  1575. break;
  1576. }
  1577. }
  1578. return SUCCESS;
  1579. }
  1580. ge::Status GraphMemoryAssigner::GetNodeMemoryType(const NodePtr &node, int64_t &memory_type, string input_or_output) {
  1581. memory_type = RT_MEMORY_HBM;
  1582. vector<int64_t> mem_type_list;
  1583. if (input_or_output == "input") {
  1584. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_INPUT_MEM_TYPE_LIST, mem_type_list);
  1585. }
  1586. if (input_or_output == "output") {
  1587. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_OUTPUT_MEM_TYPE_LIST, mem_type_list);
  1588. }
  1589. if (mem_type_list.empty()) {
  1590. if (memory_offset_.find(memory_type) == memory_offset_.end()) {
  1591. std::string error = "Memory offset map does not have memory type" + FmtToStr(memory_type) +
  1592. + ", opname is " + FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1593. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1594. return FAILED;
  1595. }
  1596. return SUCCESS;
  1597. }
  1598. if (mem_type_list.size() != node->GetAllInDataAnchorsSize()) {
  1599. std::string error = "The size" + FmtToStr(mem_type_list.size()) +
  1600. " of mem type list is not equal to the size of in data anchor" +
  1601. FmtToStr(node->GetAllInDataAnchorsSize()) + ", opname is " +
  1602. FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1603. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1604. return FAILED;
  1605. }
  1606. if (!CheckContinuousMemType(mem_type_list)) {
  1607. GELOGE(FAILED, "[Check][MemType:Continuous]fail for node:%s", node->GetName().c_str());
  1608. return FAILED;
  1609. }
  1610. // It is continuous memory and memory type is the same, so use the first memory.
  1611. memory_type = mem_type_list[0];
  1612. return SUCCESS;
  1613. }
  1614. bool GraphMemoryAssigner::CheckContinuousMemType(vector<int64_t> mem_type_list) {
  1615. if (mem_type_list.size() == 0) {
  1616. return true;
  1617. }
  1618. int64_t mem_type_tmp = mem_type_list[0];
  1619. for (auto mem_type : mem_type_list) {
  1620. if (mem_type != mem_type_tmp) {
  1621. std::string error = "The memory is continuous, but the type of the input memory is inconsistent. They are " +
  1622. FmtToStr(mem_type_tmp) + " and " + FmtToStr(mem_type);
  1623. ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error});
  1624. GELOGW("The memory is continuous, but the type of the input memory is inconsistent. They are [%ld] and [%ld].",
  1625. mem_type_tmp, mem_type);
  1626. return false;
  1627. }
  1628. }
  1629. if (memory_offset_.find(mem_type_tmp) == memory_offset_.end()) {
  1630. std::string error = "Memory offset map does not have memory type" + FmtToStr(mem_type_tmp);
  1631. ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error});
  1632. GELOGW("Memory offset map does not have memory type[%ld].", mem_type_tmp);
  1633. return false;
  1634. }
  1635. return true;
  1636. }
  1637. void GraphMemoryAssigner::PrintMemoryOffset() {
  1638. for (auto pair : memory_offset_) {
  1639. // Assign memory of max batch nodes that have the same batch label.
  1640. GELOGD("Reassign memory for max batch virtual nodes, memory type = %ld, memory offset = %zu.",
  1641. pair.first, pair.second.mem_offset_);
  1642. }
  1643. }
  1644. ge::Status GraphMemoryAssigner::TryGetNodeRefIndexes(const NodePtr &node, map<int32_t, int32_t> &out2ins) const{
  1645. // data and netoutput no need check because only data's output or netoutput's input is used
  1646. if (node->GetType() == DATA || node->GetType() == NETOUTPUT) {
  1647. return ge::SUCCESS;
  1648. }
  1649. for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  1650. int32_t reuse_in_index = -1;
  1651. // nopadding means output[0] reuse input[0], but as history reason,
  1652. // other output index also return true for mem assign in block_mem_assigner
  1653. if (GraphUtils::IsNoPaddingRefFromInput(out_data_anchor, reuse_in_index)) {
  1654. out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index);
  1655. return ge::SUCCESS;
  1656. }
  1657. bool reuse_input_flag = GraphUtils::IsRefFromInput(out_data_anchor, reuse_in_index);
  1658. if (reuse_input_flag) {
  1659. if (node->GetInDataAnchor(reuse_in_index) != nullptr) {
  1660. out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index);
  1661. } else {
  1662. REPORT_INNER_ERROR("E19999", "Invalid reuse_input value %d on output %d of node %s, "
  1663. "please check attr reuse_input",
  1664. reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str());
  1665. GELOGE(FAILED, "[Check][Attr]Invalid reuse_input value %d on output %d of node %s, "
  1666. "please check attr reuse_input",
  1667. reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str());
  1668. return FAILED;
  1669. }
  1670. }
  1671. }
  1672. return ge::SUCCESS;
  1673. }
  1674. bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
  1675. const NodePtr &input_continuous_node, map<NodePtr, uint32_t> &node_2_continuous_type) {
  1676. for (const auto &in_node : input_continuous_node->GetInDataNodes()) {
  1677. if (in_node->GetType() == VARIABLE) {
  1678. GELOGI("node %s 's precursor node %s is variable, do not store.", input_continuous_node->GetName().c_str(),
  1679. in_node->GetName().c_str());
  1680. return true;
  1681. }
  1682. auto iter = node_2_continuous_type.find(in_node);
  1683. // In node's topo order in the front, so function can not be exception
  1684. auto continuous_type = iter->second;
  1685. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  1686. if (continuous_input) {
  1687. GELOGI("[Store][Node] of %s cause it's precursor node %s need assign continuous input memory",
  1688. input_continuous_node->GetName().c_str(), in_node->GetName().c_str());
  1689. return false;
  1690. }
  1691. }
  1692. for (const auto &out_node : input_continuous_node->GetOutDataNodes()) {
  1693. auto continuous_type = GetContinuousMemoryType(out_node->GetOpDesc());
  1694. node_2_continuous_type.emplace(out_node, continuous_type);
  1695. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  1696. if (continuous_input) {
  1697. GELOGI("[Store][Node] of %s cause it's succeed node %s need assign continuous input memory",
  1698. input_continuous_node->GetName().c_str(), out_node->GetName().c_str());
  1699. return false;
  1700. }
  1701. }
  1702. return true;
  1703. }
  1704. ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node,
  1705. uint32_t continuous_type,
  1706. bool reverse_refresh) {
  1707. int64_t mem_clean_start = 0;
  1708. int64_t mem_clean_size = 0;
  1709. int64_t memory_type = RT_MEMORY_HBM;
  1710. GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"),
  1711. "[Get][MemType]fail for node:%s", input_continuous_node->GetName().c_str());
  1712. auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type,
  1713. continuous_type, reverse_refresh);
  1714. if (ret != ge::SUCCESS) {
  1715. GELOGE(ret, "[Assign][Memory:Input:continuous]fail for node:%s", input_continuous_node->GetName().c_str());
  1716. return ret;
  1717. }
  1718. // Clean up atomic address, eg, hcom node
  1719. vector<int32_t> input_indexes;
  1720. // If GetListInt fail, input_indexes is empty.
  1721. (void)ge::AttrUtils::GetListInt(input_continuous_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes);
  1722. if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) {
  1723. // check whether there is an atomic conflict between the current node and the peer out node
  1724. if (!CheckInputIsSupportAtomic(input_continuous_node)) {
  1725. return ge::FAILED;
  1726. }
  1727. const auto &in_control_anchor = input_continuous_node->GetInControlAnchor();
  1728. GE_CHECK_NOTNULL(in_control_anchor);
  1729. for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1730. GE_CHECK_NOTNULL(peer_out_control_anchor);
  1731. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1732. if (peer_out_node->GetType() == ATOMICADDRCLEAN) {
  1733. ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type);
  1734. if (ret != SUCCESS) {
  1735. GELOGE(ret, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str());
  1736. return ret;
  1737. }
  1738. }
  1739. }
  1740. }
  1741. return ge::SUCCESS;
  1742. }
  1743. Status GraphMemoryAssigner::AssignBufferPoolMemory() {
  1744. auto is_buffer_pool_mem_enable = [] (const ComputeGraphPtr &graph) -> bool {
  1745. for (NodePtr &node : graph->GetAllNodes()) {
  1746. auto op_desc = node->GetOpDesc();
  1747. if (op_desc == nullptr) {
  1748. continue;
  1749. }
  1750. bool has_attrs = op_desc->HasAttr(ATTR_NAME_BUFFER_POOL_ID) && op_desc->HasAttr(ATTR_NAME_BUFFER_POOL_SIZE);
  1751. if (has_attrs) {
  1752. return true;
  1753. }
  1754. }
  1755. return false;
  1756. };
  1757. auto root_graph = GraphUtils::FindRootGraph(compute_graph_);
  1758. GE_CHECK_NOTNULL(root_graph);
  1759. if (root_graph->GetGraphUnknownFlag()) {
  1760. GELOGI("[Check][Enable]Unknown root graph does not support buffer pool memory, graph:%s.",
  1761. compute_graph_->GetName().c_str());
  1762. return SUCCESS;
  1763. }
  1764. if (!is_buffer_pool_mem_enable(compute_graph_)) {
  1765. GELOGD("[Check][Enable]Buffer pool memory is not enable, graph:%s.", compute_graph_->GetName().c_str());
  1766. return SUCCESS;
  1767. }
  1768. map<int64_t, size_t> mem_type_to_offset;
  1769. for (const auto &pair : memory_offset_) {
  1770. mem_type_to_offset[pair.first] = pair.second.mem_offset_;
  1771. }
  1772. BufferPoolMemAssigner buffer_pool_mem_assigner(compute_graph_, mem_type_to_offset);
  1773. Status status = buffer_pool_mem_assigner.Assign();
  1774. if (status != SUCCESS) {
  1775. GELOGE(status, "[Assign][BufferPoolMem]Graph:%s.", compute_graph_->GetName().c_str());
  1776. REPORT_INNER_ERROR("E19999", "Failed to assign buffer pool memory, graph:%s.", compute_graph_->GetName().c_str());
  1777. return status;
  1778. }
  1779. int64_t mem_type = buffer_pool_mem_assigner.GetMemType();
  1780. auto iter = memory_offset_.find(mem_type);
  1781. if (iter == memory_offset_.end()) {
  1782. GELOGE(FAILED, "[Check][MemType]Memory type is not supported, graph:%s, mem type:%ld.",
  1783. compute_graph_->GetName().c_str(), mem_type);
  1784. REPORT_INNER_ERROR("E19999", "Memory type is not supported, graph:%s, mem type:%ld.",
  1785. compute_graph_->GetName().c_str(), mem_type);
  1786. return FAILED;
  1787. }
  1788. iter->second.mem_offset_ = buffer_pool_mem_assigner.GetMemOffset();
  1789. GELOGI("[Assign][BufferPoolMem]Assign buffer pool memory successfully, graph:%s, mem type:%ld, mem offset:%zu.",
  1790. compute_graph_->GetName().c_str(), mem_type, buffer_pool_mem_assigner.GetMemOffset());
  1791. return SUCCESS;
  1792. }
  1793. // if producer and customers in the same stream, or customers on the same stream when producer not assign a stream,
  1794. // then return false.
  1795. bool GraphMemoryAssigner::IsOutputVisitedByMultiStream(const NodePtr &peer_out_node, int64_t out_anchor_index) {
  1796. GE_IF_BOOL_EXEC(peer_out_node->GetOpDesc() == nullptr, return true);
  1797. int64_t unique_stream_id = peer_out_node->GetOpDesc()->GetStreamId();
  1798. GE_IF_BOOL_EXEC(peer_out_node->GetOutDataAnchor(out_anchor_index) == nullptr, return true);
  1799. for (const auto &in_data_anchor : peer_out_node->GetOutDataAnchor(out_anchor_index)->GetPeerInDataAnchors()) {
  1800. auto node = in_data_anchor->GetOwnerNode();
  1801. GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, continue);
  1802. if (node->GetOpDesc()->GetStreamId() == kInvalidStream) {
  1803. continue;
  1804. }
  1805. if (unique_stream_id == kInvalidStream) { // peer_out_node not belong to any stream
  1806. unique_stream_id = node->GetOpDesc()->GetStreamId();
  1807. continue;
  1808. }
  1809. if (node->GetOpDesc()->GetStreamId() != unique_stream_id) {
  1810. return true;
  1811. }
  1812. }
  1813. return false;
  1814. }
  1815. void GraphMemoryAssigner::UpdatePrevNodeInputDesc(const NodePtr &prev_node,
  1816. const vector<int64_t> &prev_node_input_index_vec,
  1817. int64_t distance) {
  1818. GE_IF_BOOL_EXEC(prev_node == nullptr, return);
  1819. auto prev_node_op_desc = prev_node->GetOpDesc();
  1820. GE_IF_BOOL_EXEC(prev_node_op_desc == nullptr, return);
  1821. for (const auto prev_node_input_index : prev_node_input_index_vec) {
  1822. auto input_desc = prev_node_op_desc->GetInputDesc(prev_node_input_index);
  1823. vector<int64_t> prev_next_distances;
  1824. if (!ge::AttrUtils::GetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
  1825. GELOGW("Get [%s] input [%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed",
  1826. prev_node_op_desc->GetName().c_str(),
  1827. prev_node_input_index);
  1828. continue;
  1829. }
  1830. if (prev_next_distances.size() == kPrevNextDistanceNum) {
  1831. prev_next_distances[1] = distance;
  1832. } else {
  1833. GELOGW("Size of prev_next_distances is not %d.", kPrevNextDistanceNum);
  1834. continue;
  1835. }
  1836. if (!ge::AttrUtils::SetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
  1837. GELOGW("Set [%s] input [%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
  1838. prev_node_op_desc->GetName().c_str(),
  1839. prev_node_input_index);
  1840. continue;
  1841. }
  1842. if (prev_node_op_desc->UpdateInputDesc(prev_node_input_index, input_desc) != GRAPH_SUCCESS) {
  1843. GELOGW("Update [%s] input [%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
  1844. prev_node_op_desc->GetName().c_str(),
  1845. prev_node_input_index);
  1846. continue;
  1847. }
  1848. GELOGD("Set the next distance[%ld] to node[%s], input index[%ld]",
  1849. distance,
  1850. prev_node->GetName().c_str(),
  1851. prev_node_input_index);
  1852. }
  1853. return;
  1854. }
  1855. void GraphMemoryAssigner::UpdateCurNodeInputDesc(const NodePtr &cur_node,
  1856. int64_t cur_node_input_index,
  1857. int64_t distance) {
  1858. GE_IF_BOOL_EXEC(cur_node == nullptr, return);
  1859. GE_IF_BOOL_EXEC(cur_node->GetOpDesc() == nullptr, return);
  1860. auto input_desc = cur_node->GetOpDesc()->GetInputDesc(cur_node_input_index);
  1861. vector<int64_t> prev_next_distances{distance, -1};
  1862. if (!ge::AttrUtils::SetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
  1863. GELOGW("Set [%s] input[%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
  1864. cur_node->GetOpDesc()->GetName().c_str(),
  1865. cur_node_input_index);
  1866. return;
  1867. }
  1868. if (cur_node->GetOpDesc()->UpdateInputDesc(cur_node_input_index, input_desc) != GRAPH_SUCCESS) {
  1869. GELOGW("Update [%s] input[%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
  1870. cur_node->GetOpDesc()->GetName().c_str(),
  1871. cur_node_input_index);
  1872. return;
  1873. }
  1874. GELOGD("Set the prev distance[%ld] to node[%s], input index[%ld]",
  1875. distance,
  1876. cur_node->GetName().c_str(),
  1877. cur_node_input_index);
  1878. return;
  1879. }
  1880. void GraphMemoryAssigner::CheckNeedCalcDistAndUpdateVisitInfo(
  1881. const NodePtr &peer_out_node,
  1882. const OutDataAnchorPtr &peer_out_anchor,
  1883. size_t matched_mem_offset,
  1884. map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
  1885. bool &is_need_calc_distance) {
  1886. auto iter = mem_block_visit_info.find(matched_mem_offset);
  1887. // cannot find visit info, peer_out_node must be a producer and this data is the first time to be visited.
  1888. if (iter == mem_block_visit_info.end()) {
  1889. if (IsOutputVisitedByMultiStream(peer_out_node, peer_out_anchor->GetIdx())) {
  1890. vector<int64_t> temp;
  1891. mem_block_visit_info.insert(std::make_pair(matched_mem_offset, std::make_pair(nullptr, temp)));
  1892. is_need_calc_distance = false;
  1893. return;
  1894. } else {
  1895. vector<int64_t> temp = {-1};
  1896. // producer's prev_node_index set to -1 as default
  1897. mem_block_visit_info.insert(std::make_pair(matched_mem_offset, std::make_pair(peer_out_node, temp)));
  1898. is_need_calc_distance = true;
  1899. return;
  1900. }
  1901. } else {
  1902. if (mem_block_visit_info[matched_mem_offset].first == nullptr) {
  1903. // multi-stream visit, no need to calculate
  1904. is_need_calc_distance = false;
  1905. return;
  1906. }
  1907. if (peer_out_node->GetOpDesc()->GetStreamId() !=
  1908. mem_block_visit_info[matched_mem_offset].first->GetOpDesc()->GetStreamId()) {
  1909. // cur node and peer_out_node not in the same stream, no need to calculate
  1910. is_need_calc_distance = false;
  1911. return;
  1912. }
  1913. }
  1914. is_need_calc_distance = true;
  1915. return;
  1916. }
  1917. // calculate distance, update visit info, update prev_node input desc, update cur node input desc
  1918. void GraphMemoryAssigner::CalcDistanceAndUpdateDesc(const map<string, int64_t> &node_index_in_stream,
  1919. const InDataAnchorPtr &in_data_anchor,
  1920. size_t matched_mem_offset,
  1921. NodePtr &node,
  1922. map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
  1923. bool &is_need_skip) {
  1924. int64_t distance = -1;
  1925. auto prev_node = mem_block_visit_info[matched_mem_offset].first;
  1926. auto prev_node_input_index_vec = mem_block_visit_info[matched_mem_offset].second;
  1927. GE_IF_BOOL_EXEC(prev_node == nullptr, is_need_skip = true; return);
  1928. if (prev_node_input_index_vec.size() == 1 && prev_node_input_index_vec[0] == -1) {
  1929. // prev_node is producer and the data is just be produced(not visited by other node)
  1930. GE_IF_BOOL_EXEC(prev_node->GetOpDesc() == nullptr, is_need_skip = true; return);
  1931. if (prev_node->GetOpDesc()->GetStreamId() == -1) { // producer not assigned a stream
  1932. distance = 0;
  1933. } else {
  1934. auto iter = node_index_in_stream.find(prev_node->GetName());
  1935. if (iter == node_index_in_stream.end()) {
  1936. distance = 0;
  1937. } else {
  1938. distance = node_index_in_stream.at(node->GetName()) - iter->second - 1;
  1939. }
  1940. }
  1941. mem_block_visit_info[matched_mem_offset].first = node;
  1942. mem_block_visit_info[matched_mem_offset].second.clear();
  1943. mem_block_visit_info[matched_mem_offset].second.push_back(in_data_anchor->GetIdx());
  1944. } else { // the data is visit by other customer just before.
  1945. if (prev_node_input_index_vec.empty()) {
  1946. GELOGW("Missing prev node[%s] input index.", prev_node->GetName().c_str());
  1947. is_need_skip = true;
  1948. return;
  1949. }
  1950. if (prev_node == node) { // scene: multiple anchors of a node access the same data
  1951. vector<int64_t> prev_next_distances;
  1952. GE_IF_BOOL_EXEC(prev_node->GetOpDesc() == nullptr, is_need_skip = true; return);
  1953. auto input_desc = prev_node->GetOpDesc()->GetInputDesc(prev_node_input_index_vec[0]);
  1954. if (!ge::AttrUtils::GetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
  1955. GELOGW("Get ATTR_NAME_DATA_VISIT_DISTANCE failed.");
  1956. is_need_skip = true;
  1957. return;
  1958. }
  1959. if (prev_next_distances.size() != kPrevNextDistanceNum) {
  1960. GELOGW("Size of prev_next_distance is not %d.", kPrevNextDistanceNum);
  1961. is_need_skip = true;
  1962. return;
  1963. } else {
  1964. distance = prev_next_distances[0]; // use the same prev_distance as previous anchor
  1965. }
  1966. mem_block_visit_info[matched_mem_offset].second.push_back(in_data_anchor->GetIdx());
  1967. } else {
  1968. distance = node_index_in_stream.at(node->GetName()) - node_index_in_stream.at(prev_node->GetName()) - 1;
  1969. UpdatePrevNodeInputDesc(prev_node, prev_node_input_index_vec, distance);
  1970. mem_block_visit_info[matched_mem_offset].first = node;
  1971. mem_block_visit_info[matched_mem_offset].second.clear();
  1972. mem_block_visit_info[matched_mem_offset].second.push_back(in_data_anchor->GetIdx());
  1973. }
  1974. }
  1975. UpdateCurNodeInputDesc(node, in_data_anchor->GetIdx(), distance);
  1976. }
  1977. void GraphMemoryAssigner::DeleteVisitInfoWhenLifecycleEnded(
  1978. const NodePtr &node,
  1979. const InDataAnchorPtr &in_data_anchor,
  1980. size_t matched_mem_offset,
  1981. map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info) {
  1982. GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, return);
  1983. auto input_desc = node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx());
  1984. bool is_end_of_inputmem_lifecycle = false;
  1985. // if is_end_of_inputmem_lifecycle is true, indicating that cur node is the last customer of this data,
  1986. // then we need to delete the visit info of the block in case that the memblock be reused and visited.
  1987. if (ge::AttrUtils::GetBool(input_desc, ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, is_end_of_inputmem_lifecycle) &&
  1988. is_end_of_inputmem_lifecycle) {
  1989. GELOGD("ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE is true, node name is [%s], in_data_anchor index is [%d]",
  1990. node->GetName().c_str(),
  1991. in_data_anchor->GetIdx());
  1992. auto iter = mem_block_visit_info.find(matched_mem_offset);
  1993. if (iter != mem_block_visit_info.end()) {
  1994. mem_block_visit_info.erase(iter);
  1995. }
  1996. }
  1997. }
  1998. void GraphMemoryAssigner::MarkNodeDistanceAttr(const ComputeGraphPtr &compute_graph,
  1999. NodePtr &node,
  2000. map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
  2001. const map<string, int64_t> &node_index_in_stream) {
  2002. GELOGD("Begin to mark node distance attr, node name is [%s]", node->GetName().c_str());
  2003. GE_IF_BOOL_EXEC(node == nullptr, return);
  2004. for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
  2005. auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
  2006. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  2007. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  2008. GE_IF_BOOL_EXEC(peer_out_node == nullptr, continue);
  2009. GE_IF_BOOL_EXEC(peer_out_node->GetOpDesc() == nullptr, continue);
  2010. auto matched_mem_offset = peer_out_node->GetOpDesc()->GetOutputOffset().at(peer_out_anchor->GetIdx());
  2011. bool is_need_calc_distance = false;
  2012. CheckNeedCalcDistAndUpdateVisitInfo(peer_out_node, peer_out_anchor, matched_mem_offset,
  2013. mem_block_visit_info, is_need_calc_distance);
  2014. if (!is_need_calc_distance) {
  2015. continue;
  2016. }
  2017. bool is_need_skip = false;
  2018. CalcDistanceAndUpdateDesc(node_index_in_stream, in_data_anchor, matched_mem_offset, node,
  2019. mem_block_visit_info, is_need_skip);
  2020. if (is_need_skip) {
  2021. continue;
  2022. }
  2023. DeleteVisitInfoWhenLifecycleEnded(node, in_data_anchor, matched_mem_offset, mem_block_visit_info);
  2024. }
  2025. }
  2026. void GraphMemoryAssigner::MarkDistanceAttr() {
  2027. // key: mem_offset of the memory which we visited. value: node we visited and input index of this node
  2028. map<size_t, pair<NodePtr, vector<int64_t>>> mem_block_visit_info;
  2029. // key: node name, value: topo order of node in it's belonged stream(exclude ge_local_op)
  2030. map<string, int64_t> node_index_in_stream;
  2031. // key: stream id, value: cur nodes num in that stream
  2032. map<int64_t, int64_t> stream_nodes_num;
  2033. for (auto &node : compute_graph_->GetAllNodes()) {
  2034. auto node_op_desc = node->GetOpDesc();
  2035. GE_IF_BOOL_EXEC(node_op_desc == nullptr, return);
  2036. int64_t stream_id = node_op_desc->GetStreamId();
  2037. if (node_op_desc->GetOpKernelLibName() != kEngineNameGeLocal) {
  2038. if (stream_nodes_num.find(stream_id) == stream_nodes_num.end()) {
  2039. stream_nodes_num.insert(std::make_pair(stream_id, 1));
  2040. } else {
  2041. ++stream_nodes_num[stream_id];
  2042. }
  2043. node_index_in_stream.insert(std::make_pair(node->GetName(), stream_nodes_num[stream_id] - 1));
  2044. MarkNodeDistanceAttr(compute_graph_, node, mem_block_visit_info, node_index_in_stream);
  2045. } else {
  2046. GELOGD("node[%s] is ge_local_op, no need to calculate distance.", node->GetName().c_str());
  2047. }
  2048. }
  2049. }
  2050. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示