You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_mem_assigner.cc 86 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/graph_mem_assigner.h"
  17. #include <cstring>
  18. #include <set>
  19. #include "common/math/math_util.h"
  20. #include "common/util/error_manager/error_manager.h"
  21. #include "framework/common/debug/ge_log.h"
  22. #include "framework/common/debug/log.h"
  23. #include "graph/build/memory/hybrid_mem_assigner.h"
  24. #include "graph/build/memory/var_mem_assign_util.h"
  25. #include "graph/build/memory/block_mem_assigner.h"
  26. #include "graph/common/omg_util.h"
  27. #include "graph/debug/ge_attr_define.h"
  28. #include "graph/ge_attr_value.h"
  29. #include "graph/manager/graph_var_manager.h"
  30. #include "graph/utils/tensor_utils.h"
  31. #include "graph/utils/type_utils.h"
  32. #include "graph/build/memory/buffer_pool_mem_assigner.h"
  33. namespace {
  34. const int kAllInputAddrIsAtomic = -1;
  35. const int kVirtualInputNodeMemoryReuse = 0;
  36. const int kVirtualOutputNodeMemoryReuse = 1;
  37. // One state per bit cannot be repeated
  38. enum ContinuousType { kTypeInput = 1, kTypeInputNoPadding = 2, kTypeOutput = 4, kTypeOutputNoPadding = 8 };
  39. int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol,
  40. const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors,
  41. const ge::NodePtr &node, const uint32_t i) {
  42. ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut);
  43. auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString());
  44. if (iter1 == anchor_to_symbol.end()) {
  45. return ge::kInvalidOffset;
  46. }
  47. auto out_symbol = iter1->second;
  48. auto iter2 = symbol_to_anchors.find(out_symbol);
  49. if (iter2 == symbol_to_anchors.end()) {
  50. return ge::kInvalidOffset;
  51. }
  52. for (const auto &node_index_io : iter2->second) {
  53. if (node_index_io.value_ == out_symbol) {
  54. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  55. vector<int64_t> symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset();
  56. if (node_index_io.index_ >= symbol_output_list.size()) {
  57. return ge::kInvalidOffset;
  58. }
  59. GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i,
  60. output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_));
  61. return symbol_output_list.at(node_index_io.index_);
  62. }
  63. }
  64. return ge::kInvalidOffset;
  65. }
  66. } // namespace
  67. namespace ge {
  68. Status VariableMemoryAssigner::Assign() {
  69. Status result = ge::VarMemAssignUtil::AssignConstantOpMemory(compute_graph_);
  70. if (result != ge::SUCCESS) {
  71. return result;
  72. }
  73. result = ge::VarMemAssignUtil::AssignVarMemory(compute_graph_);
  74. if (result != ge::SUCCESS) {
  75. return result;
  76. }
  77. return ge::SUCCESS;
  78. }
  79. Status VariableMemoryAssigner::AssignVarAttr2Nodes() {
  80. Status result = ge::VarMemAssignUtil::AssignVarAttr2Nodes(compute_graph_);
  81. if (result != ge::SUCCESS) {
  82. return result;
  83. }
  84. return ge::SUCCESS;
  85. }
  86. Status VariableMemoryAssigner::AssignMemory2HasRefAttrNode() {
  87. Status result = ge::VarMemAssignUtil::AssignMemory2HasRefAttrNode(compute_graph_);
  88. if (result != ge::SUCCESS) {
  89. return result;
  90. }
  91. return ge::SUCCESS;
  92. }
  93. Status GraphMemoryAssigner::AssignMemory() {
  94. ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_));
  95. if (mem_assigner->Assign() != ge::SUCCESS) {
  96. GELOGE(ge::FAILED, "[Assign][GraphMem]graph_id:%u, graph_name:%s",
  97. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  98. return ge::FAILED;
  99. }
  100. MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
  101. memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
  102. if (mem_assigner->GetP2PMemOffset() >= 0) {
  103. MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
  104. memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
  105. }
  106. auto session_id = compute_graph_->GetSessionID();
  107. int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM);
  108. auto variable_assigner =
  109. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  110. if (variable_assigner == nullptr) {
  111. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  112. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  113. REPORT_CALL_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, "
  114. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  115. return ge::FAILED;
  116. }
  117. if (variable_assigner->Assign() != ge::SUCCESS) {
  118. return ge::FAILED;
  119. }
  120. int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign;
  121. GELOGD("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign);
  122. mem_assigner_ = std::move(mem_assigner);
  123. return ge::SUCCESS;
  124. }
  125. ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() {
  126. auto variable_assigner =
  127. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  128. if (variable_assigner == nullptr) {
  129. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  130. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  131. REPORT_CALL_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, "
  132. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  133. return ge::FAILED;
  134. }
  135. if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) {
  136. return ge::FAILED;
  137. }
  138. return ge::SUCCESS;
  139. }
  140. ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() {
  141. auto variable_assigner =
  142. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  143. if (variable_assigner == nullptr) {
  144. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  145. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  146. REPORT_CALL_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, "
  147. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  148. }
  149. if (variable_assigner->AssignMemory2HasRefAttrNode() != ge::SUCCESS) {
  150. return ge::FAILED;
  151. }
  152. return ge::SUCCESS;
  153. }
  154. ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
  155. int64_t dim_index, int64_t &output_mem_size,
  156. int64_t &batch_dim_num, int64_t &out_size) {
  157. graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size);
  158. if (graph_status != GRAPH_SUCCESS) {
  159. GELOGE(FAILED, "[Get][TensorSize]");
  160. REPORT_INNER_ERROR("E19999", "Get tensor size failed when %s", __FUNCTION__);
  161. return FAILED;
  162. }
  163. GeShape output_shape = output_desc->GetShape();
  164. std::vector<int64_t> output_dims = output_shape.GetDims();
  165. if (dim_index >= static_cast<int64_t>(output_dims.size())) {
  166. REPORT_INNER_ERROR("E19999", "Inner param dim_index value:%ld invalid, bigger than dim size:%lu in shape:%s",
  167. dim_index, output_dims.size(), output_shape.ToString().c_str());
  168. GELOGE(FAILED, "[Check][Param:dim_index]value:%ld invalid, bigger than dim size:%lu in shape:%s",
  169. dim_index, output_dims.size(), output_shape.ToString().c_str());
  170. return FAILED;
  171. }
  172. for (int64_t index = 0; index < dim_index; index++) {
  173. FMK_INT64_MULCHECK(batch_dim_num, output_dims[index]);
  174. batch_dim_num *= output_dims[index];
  175. output_dims[index] = 1;
  176. }
  177. output_shape = GeShape(output_dims);
  178. Format out_format = output_desc->GetFormat();
  179. DataType data_type = output_desc->GetDataType();
  180. graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size);
  181. if (graph_status != GRAPH_SUCCESS) {
  182. GELOGE(graph_status, "[Calc][TensorSize]");
  183. return FAILED;
  184. }
  185. if (output_mem_size < 0) {
  186. REPORT_INNER_ERROR("E19999", "After calculating, tensor memory size:%ld invalid, less than 0. "
  187. "shape:%s, format:%s, dtype:%s, maybe has dynamic shape",
  188. output_mem_size,
  189. output_shape.ToString().c_str(),
  190. TypeUtils::FormatToSerialString(out_format).c_str(),
  191. TypeUtils::DataTypeToSerialString(data_type).c_str());
  192. GELOGE(FAILED, "[Check][TensorSize]value:%ld invalid after calc, less than 0. shape:%s, format:%s, dtype:%s, "
  193. "maybe has dynamic shape",
  194. output_mem_size,
  195. output_shape.ToString().c_str(),
  196. TypeUtils::FormatToSerialString(out_format).c_str(),
  197. TypeUtils::DataTypeToSerialString(data_type).c_str());
  198. return FAILED;
  199. }
  200. return SUCCESS;
  201. }
  202. Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) {
  203. if (memory_offset_.empty()) {
  204. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected when ReAssignMemory, "
  205. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  206. GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, "
  207. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  208. return ge::FAILED;
  209. }
  210. GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph), "ReAssignContinuousMemory Failed!");
  211. GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph), "ReAssignAtomicMemory Failed!");
  212. GE_CHK_STATUS_RET(AssignBufferPoolMemory(), "AssignBufferPoolMemory Failed!");
  213. size_t total_mem_offset = 0;
  214. for (auto pair : memory_offset_) {
  215. mem_type_to_offset[pair.first] = pair.second.mem_offset_;
  216. total_mem_offset += pair.second.mem_offset_;
  217. }
  218. auto session_id = compute_graph_->GetSessionID();
  219. if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) {
  220. GELOGE(ge::FAILED, "[Check][TotalMemOffset] %zu is greater than memory manager malloc max size %zu, "
  221. "graph_id:%u, graph_name:%s, reduce your batchsize or scale your model may solve problem",
  222. total_mem_offset, VarManager::Instance(session_id)->GetGraphMemoryMaxSize(),
  223. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  224. for (auto iter : mem_type_to_offset) {
  225. ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"},
  226. {std::to_string(iter.first), std::to_string(iter.second), "featuremap",
  227. std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())});
  228. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
  229. iter.second, iter.first);
  230. }
  231. return ge::FAILED;
  232. }
  233. return SUCCESS;
  234. }
  235. Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
  236. BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
  237. if (priority_assigner == nullptr) {
  238. REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected when AssignZeroCopyMemory, "
  239. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  240. GELOGE(FAILED, "[Check][InnerData:priority_assigner]nullptr is invalid, "
  241. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  242. return ge::FAILED;
  243. }
  244. size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM];
  245. // set offset for zero copy block
  246. for (auto &memory_block : priority_assigner->GetMemoryBlocks()) {
  247. if (memory_block == nullptr || memory_block->deleted_block_ || !memory_block->is_zero_copy_) {
  248. continue;
  249. }
  250. memory_block->Resize();
  251. memory_block->SetHeadOffset(mem_offset[RT_MEMORY_HBM]);
  252. mem_offset[RT_MEMORY_HBM] += memory_block->Size();
  253. memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1);
  254. }
  255. // set offset for zero copy nodes
  256. priority_assigner->SetOpMemOffset(true);
  257. zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp;
  258. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  259. if (iter == memory_offset_.end()) {
  260. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], "
  261. "not expected when AssignZeroCopyMemory, "
  262. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  263. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  264. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  265. return FAILED;
  266. }
  267. iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM];
  268. GELOGD("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp,
  269. zero_mem_copy_size);
  270. return SUCCESS;
  271. }
  272. uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) {
  273. if (op_desc == nullptr) {
  274. return 0;
  275. };
  276. bool is_continuous = false;
  277. uint32_t continuous_type = 0;
  278. // If GetBool fail, is_continuous is false.
  279. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_continuous);
  280. if (is_continuous) {
  281. continuous_type |= kTypeInput;
  282. } else {
  283. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_continuous);
  284. if (is_continuous) {
  285. bool attr_reuse = false;
  286. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  287. if (attr_reuse) {
  288. continuous_type |= kTypeInputNoPadding;
  289. }
  290. }
  291. }
  292. is_continuous = false;
  293. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_continuous);
  294. if (is_continuous) {
  295. continuous_type |= kTypeOutput;
  296. } else {
  297. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, is_continuous);
  298. if (is_continuous) {
  299. bool attr_reuse = false;
  300. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  301. if (attr_reuse) {
  302. continuous_type |= kTypeOutputNoPadding;
  303. }
  304. }
  305. }
  306. if (continuous_type != 0) {
  307. GELOGI("[Get][MemType:Continuous]Current node %s, value is %d", op_desc->GetName().c_str(), continuous_type);
  308. }
  309. return continuous_type;
  310. }
  311. Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type,
  312. int64_t &tensor_size, int64_t &nopadding_size) {
  313. if ((op_desc == nullptr) || (output_desc == nullptr)) {
  314. REPORT_INNER_ERROR("E19999", "InnerData param op_desc or output_desc is nullptr, "
  315. "not expected when GetMemorySize");
  316. GELOGE(FAILED, "[Check][Param]op_desc or output_desc is nullptr");
  317. }
  318. tensor_size = 0;
  319. nopadding_size = 0;
  320. bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0);
  321. if (is_nopadding) {
  322. int64_t attr_dim_index;
  323. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  324. if (!get_attr_dim_flag) {
  325. REPORT_INNER_ERROR("E19999", "Get Attr:%s failed when GetMemorySize, op_name:%s",
  326. ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str());
  327. GELOGE(FAILED, "[Get][Attr:%s]fail for op_name:%s",
  328. ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str());
  329. return FAILED;
  330. }
  331. // Calculate tensor real size of each piece of data and out size of complete data
  332. int64_t batch_dim_num = 1;
  333. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) !=
  334. SUCCESS) {
  335. REPORT_CALL_ERROR("E19999", "CalculateTensorRealSizeAndOutSize failed, attr_dim_index:%ld, op_name:%s",
  336. attr_dim_index, op_desc->GetName().c_str());
  337. GELOGE(FAILED, "[Calculate][NopaddingSize]failed for node %s, attr_dim_index:%ld",
  338. op_desc->GetName().c_str(), attr_dim_index);
  339. return FAILED;
  340. }
  341. } else {
  342. if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) {
  343. REPORT_INNER_ERROR("E19999", "Get Tensor Size failed, op_name:%s", op_desc->GetName().c_str());
  344. GELOGE(FAILED, "[Get][TensorSize]failed in padding case, op_name:%s", op_desc->GetName().c_str());
  345. return FAILED;
  346. }
  347. }
  348. if ((tensor_size < 0) || (nopadding_size < 0)) {
  349. REPORT_INNER_ERROR("E19999", "GetMemorySize fail, "
  350. "tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s",
  351. tensor_size, nopadding_size, op_desc->GetName().c_str());
  352. GELOGE(FAILED, "[Get][MemorySize]tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s",
  353. tensor_size, nopadding_size, op_desc->GetName().c_str());
  354. return FAILED;
  355. }
  356. return SUCCESS;
  357. }
  358. void AlignMemOffset(int64_t &mem_align_size) {
  359. if (mem_align_size <= 0) {
  360. return;
  361. }
  362. mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  363. }
  364. bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op_desc) {
  365. bool is_peer_output_continuous = false;
  366. // If GetBool fail, is_peer_output_continuous is false.
  367. (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous);
  368. // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and
  369. // continuous output of the previous node is the same, we can support it. If size != 1, there may be
  370. // conflict between the two, we can not support it.
  371. auto peer_output_size = peer_op_desc->GetOutputsSize();
  372. GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1),
  373. std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  374. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  375. " requires continuous output. There may be conflict between the two." +
  376. "This node is not supported now.";
  377. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  378. return true;);
  379. bool is_peer_reference = false;
  380. // If GetBool fail, is_peer_reference is false.
  381. (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference);
  382. GE_IF_BOOL_EXEC(is_peer_reference,
  383. std::string warning = "[Check][Continuous]Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  384. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  385. " is ref. There may be conflict between the two.";
  386. GELOGW("%s", warning.c_str());
  387. return false;);
  388. return false;
  389. }
  390. Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
  391. Status ret;
  392. // Stored nodes which need assign continuous input memory in `reverse topo order`
  393. std::vector<NodePtr> nodes_stack;
  394. std::map<NodePtr, uint32_t> node_2_continuous_type;
  395. // Traverse nodes
  396. for (auto &node : compute_graph_->GetAllNodes()) {
  397. GE_CHECK_NOTNULL(node);
  398. uint32_t continuous_type;
  399. auto iter = node_2_continuous_type.find(node);
  400. if (iter == node_2_continuous_type.end()) {
  401. continuous_type = GetContinuousMemoryType(node->GetOpDesc());
  402. node_2_continuous_type.emplace(node, continuous_type);
  403. } else {
  404. continuous_type = iter->second;
  405. }
  406. // Assign continuous input memory
  407. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  408. if (continuous_input) {
  409. if (AssignContinuousInputMemoryWithAtomicProcessDirectly(node, node_2_continuous_type)) {
  410. GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, continuous_type),
  411. "[Assign][Memory:Continuous:Input]fail for node:%s", node->GetName().c_str())
  412. } else {
  413. nodes_stack.push_back(node);
  414. }
  415. }
  416. // Assign continuous output memory
  417. int64_t memory_type = RT_MEMORY_HBM;
  418. bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0);
  419. if (continuous_output) {
  420. GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"),
  421. "[Get][MemType]fail for node:%s", node->GetName().c_str());
  422. ret = AssignContinuousOutputMemory(node, memory_type, continuous_type);
  423. if (ret != ge::SUCCESS) {
  424. GELOGE(ret, "[Assign][Memory:Continuous:Ouput]fail for node:%s", node->GetName().c_str());
  425. return ret;
  426. }
  427. }
  428. }
  429. // Assign continuous input memory in `reverse topo order` which stored before
  430. while (!nodes_stack.empty()){
  431. auto node = nodes_stack.back();
  432. nodes_stack.pop_back();
  433. auto iter = node_2_continuous_type.find(node);
  434. if (iter == node_2_continuous_type.end()) {
  435. REPORT_INNER_ERROR("E19999", "Inner data error when process continuous memory alloc for node:%s, "
  436. "but has no continuous type", node->GetName().c_str());
  437. GELOGE(FAILED, "[Get][ContinuousType] find fail for node:%s", node->GetName().c_str());
  438. return FAILED;
  439. }
  440. GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true),
  441. "[Assign][Memory:Continuous:Input]fail for node:%s.", node->GetName().c_str())
  442. }
  443. for (auto pair : memory_offset_) {
  444. GELOGD("[Reassign][Memory:Continuous]At last, memory type = %ld, mem offset = %zu", pair.first,
  445. pair.second.mem_offset_);
  446. }
  447. return ge::SUCCESS;
  448. }
  449. Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
  450. int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) {
  451. GELOGI("[Assign][Memory:Input:Continuous]start for Current node %s", node->GetName().c_str());
  452. auto iter = memory_offset_.find(memory_type);
  453. if (iter == memory_offset_.end()) {
  454. REPORT_INNER_ERROR("E19999", "find memory offset fail for mem_type:%ld, "
  455. "when assign continuous input memory for node:%s, ", memory_type, node->GetName().c_str());
  456. GELOGE(FAILED, "[Find][MemOffset]fail for mem_type:%ld, when AssignContinuousInputMemory for node:%s",
  457. memory_type, node->GetName().c_str());
  458. return FAILED;
  459. }
  460. // The head and tail of hcom continuous input should be added 512
  461. iter->second.mem_offset_ += MEM_ALIGN_SIZE;
  462. continuous_mem_start = iter->second.mem_offset_;
  463. int64_t mem_offset = iter->second.mem_offset_;
  464. int64_t extra_memory_size = 0;
  465. bool is_continuous_input_allocated = false;
  466. auto op_desc = node->GetOpDesc();
  467. GE_CHECK_NOTNULL(op_desc);
  468. vector<int64_t> output_list_this = op_desc->GetOutputOffset();
  469. if (output_list_this.empty()) {
  470. REPORT_INNER_ERROR("E19999", "No output offset in node :%s, not expected when assign continuous input memory",
  471. node->GetName().c_str());
  472. GELOGE(FAILED, "[Get][OutputOffset] empty is invalid, node:%s", node->GetName().c_str());
  473. return FAILED;
  474. }
  475. (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated);
  476. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  477. GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue);
  478. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  479. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
  480. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  481. GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
  482. GE_IF_BOOL_EXEC(IsContinuousInputConflict(node, peer_op_desc), return PARAM_INVALID;);
  483. int64_t tensor_desc_size = 0;
  484. int64_t nopadding_size = 0;
  485. int64_t real_size = 0;
  486. std::vector<int64_t> offsets_of_fusion = {};
  487. bool lx_fusion = AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_of_fusion);
  488. lx_fusion = lx_fusion && !offsets_of_fusion.empty();
  489. if (lx_fusion) {
  490. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(offsets_of_fusion.size())) {
  491. std::string error = "fusion: peer node:" + FmtToStr(peer_op_desc->GetName()) +
  492. " anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) +
  493. " is out of range:" + FmtToStr(offsets_of_fusion.size());
  494. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  495. return FAILED;
  496. }
  497. nopadding_size = offsets_of_fusion[peer_out_data_anchor->GetIdx()];
  498. tensor_desc_size = nopadding_size;
  499. } else {
  500. if (GetMemorySize(node->GetOpDesc(), peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()),
  501. continuous_type, tensor_desc_size, nopadding_size) != ge::SUCCESS) {
  502. return FAILED;
  503. }
  504. }
  505. bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion;
  506. vector<int64_t> output_list = peer_op_desc->GetOutputOffset();
  507. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_list.size())) {
  508. std::string error = "peer node:" + FmtToStr(peer_op_desc->GetName()) +
  509. " anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) +
  510. " is out of range:" + FmtToStr(output_list.size());
  511. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  512. return FAILED;
  513. }
  514. // when continuous input has been allocated first input is beginning offset
  515. bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0);
  516. if (is_allocated_first_input) {
  517. std::map<int32_t, int32_t> out2ins;
  518. GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "[Get][AllRef]fail for node: %s", node->GetName().c_str());
  519. // output is beginning offset, set offset for input; only support this case now
  520. if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) {
  521. auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx());
  522. output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first);
  523. peer_op_desc->SetOutputOffset(output_list);
  524. GELOGI("[Update][Offset]Node %s out %d ref in %d input node %s, use output offset %ld update %ld",
  525. node->GetName().c_str(), out2ins.begin()->first, out2ins.begin()->second,
  526. peer_op_desc->GetName().c_str(), output_list_this.at(out2ins.begin()->first), peer_output_offset);
  527. } else {
  528. GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu.", node->GetName().c_str(),
  529. out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size());
  530. }
  531. // first input is beginning offset
  532. mem_offset = output_list.at(peer_out_data_anchor->GetIdx());
  533. continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx());
  534. } else {
  535. // set offset for input
  536. output_list.at(peer_out_data_anchor->GetIdx()) = mem_offset;
  537. peer_op_desc->SetOutputOffset(output_list);
  538. }
  539. int64_t align_size = tensor_desc_size;
  540. if (is_nopadding) {
  541. mem_offset += nopadding_size;
  542. extra_memory_size += (tensor_desc_size - nopadding_size);
  543. real_size = nopadding_size;
  544. } else {
  545. ge::AlignMemOffset(align_size);
  546. mem_offset += align_size;
  547. // The head and tail of hcom continuous input should be added 512
  548. extra_memory_size = MEM_ALIGN_SIZE;
  549. real_size = tensor_desc_size;
  550. }
  551. GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] "
  552. "size[%zu] realsize[%ld] nopadding size[%d]", node->GetOwnerComputeGraph()->GetName().c_str(),
  553. peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(),
  554. output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type,
  555. is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding);
  556. }
  557. mem_offset += extra_memory_size;
  558. ge::AlignMemOffset(mem_offset);
  559. continuous_mem_size = mem_offset - continuous_mem_start;
  560. if (is_continuous_input_allocated) {
  561. // not allocate memory here, so no need add 512 in header
  562. iter->second.mem_offset_ -= MEM_ALIGN_SIZE;
  563. } else {
  564. iter->second.mem_offset_ = mem_offset;
  565. }
  566. return SUCCESS;
  567. }
  568. Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) {
  569. auto in_data_anchor_list = node->GetAllInDataAnchors();
  570. if (in_data_anchor_list.empty()) {
  571. REPORT_INNER_ERROR("E19999", "InAnchor list empty in node:%s, not expect when GetFirstInputPeerOutOutputOffset",
  572. node->GetName().c_str());
  573. GELOGE(FAILED, "[Get][InAnchor]empty is invalid, node:%s", node->GetName().c_str());
  574. return FAILED;
  575. }
  576. auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor();
  577. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr,
  578. REPORT_INNER_ERROR("E19999", "PeerAcnhor is null, "
  579. "not expect when GetFirstInputPeerOutOutputOffset for node:%s",
  580. node->GetName().c_str());
  581. GELOGE(ge::FAILED, "[Check][PeerAnchor]null is invalid, node:%s", node->GetName().c_str());
  582. return ge::FAILED);
  583. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  584. GE_IF_BOOL_EXEC(peer_op_desc == nullptr,
  585. REPORT_INNER_ERROR("E19999", "PeerOpDesc is null, "
  586. "not expect when GetFirstInputPeerOutOutputOffset for node:%s",
  587. node->GetName().c_str());
  588. GELOGE(ge::FAILED, "[Check][PeerOpDesc]null is invalid, node:%s", node->GetName().c_str());
  589. return ge::FAILED);
  590. vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset();
  591. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) {
  592. REPORT_INNER_ERROR("E19999", "PeerAnchorIndex:%d bigger than in_offset size:%lu, "
  593. "judge invalid when GetFirstInputPeerOutOutputOffset for node:%s",
  594. peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str());
  595. GELOGE(FAILED, "[Check][Index:PeerOutDataAnchor]PeerIndex:%d bigger than in_offset size:%lu, node:%s",
  596. peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str());
  597. return FAILED;
  598. }
  599. mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx());
  600. return SUCCESS;
  601. }
  602. Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type,
  603. uint32_t continuous_type) {
  604. GELOGI("Current node %s needs continuous output.", node->GetName().c_str());
  605. auto out_op_desc = node->GetOpDesc();
  606. GE_IF_BOOL_EXEC(out_op_desc == nullptr,
  607. REPORT_INNER_ERROR("E19999", "OpDesc is null, "
  608. "not expect when AssignContinuousOutputMemory for node:%s",
  609. node->GetName().c_str());
  610. GELOGE(ge::FAILED, "[Check][OpDesc]null is invalid, node:%s", node->GetName().c_str()));
  611. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  612. if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) {
  613. REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, invalid in node:%s, "
  614. "when AssignContinuousOutputMemory",
  615. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  616. GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s",
  617. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  618. return ge::FAILED;
  619. }
  620. int64_t mem_offset = 0;
  621. bool is_nopadding = ((continuous_type & kTypeOutputNoPadding) != 0);
  622. if (is_nopadding) {
  623. // out tensor memory must be reused input tensor memory
  624. if (GetFirstInputPeerOutOutputOffset(node, mem_offset) != SUCCESS) {
  625. return ge::FAILED;
  626. }
  627. } else {
  628. // Get the reference type of the node, default is false
  629. bool is_ref = false;
  630. // If GetBool fail, is_ref is false.
  631. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  632. // If the output is ref type and refers to the ref of an input, the name of the output
  633. // and the input are the same. Ge encounters ref type, finds matching relationship according
  634. // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast
  635. if (is_ref) {
  636. GELOGI("Current node %s no needs assign continuous output because reference input by name.",
  637. node->GetName().c_str());
  638. return SUCCESS;
  639. }
  640. mem_offset = output_list[0];
  641. }
  642. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  643. output_list[out_data_anchor->GetIdx()] = mem_offset;
  644. int64_t tensor_desc_size = 0;
  645. int64_t nopadding_size = 0;
  646. if (GetMemorySize(out_op_desc, out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()), continuous_type,
  647. tensor_desc_size, nopadding_size) != ge::SUCCESS) {
  648. return FAILED;
  649. }
  650. if (is_nopadding) {
  651. mem_offset += nopadding_size;
  652. } else {
  653. mem_offset += tensor_desc_size;
  654. ge::AlignMemOffset(mem_offset);
  655. }
  656. GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]"
  657. " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
  658. out_op_desc->GetName().c_str(), node->GetType().c_str(), out_data_anchor->GetIdx(),
  659. output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL,
  660. is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding);
  661. }
  662. out_op_desc->SetOutputOffset(output_list);
  663. return ge::SUCCESS;
  664. }
  665. Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
  666. // key:dynamic batch, batch name
  667. map<string, map<NodePtr, vector<NodePtr>>> normal_atomic_and_clean_nodes_map;
  668. map<string, vector<NodePtr>> connecting_output_atomic_nodes;
  669. Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes);
  670. if (status != SUCCESS) {
  671. GELOGE(status, "[Filter][AtomicNode]failed in graph_id:%u, graph_name:%s",
  672. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  673. return status;
  674. }
  675. auto mem_iter = memory_offset_.find(RT_MEMORY_HBM);
  676. if (mem_iter == memory_offset_.end()) {
  677. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], "
  678. "not expected when ReAssignAtomicMemory, "
  679. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  680. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  681. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  682. return FAILED;
  683. }
  684. int64_t batch_atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  685. int64_t batch_max_mem_offset = batch_atomic_mem_start;
  686. for (auto &iter_batch : normal_atomic_and_clean_nodes_map) {
  687. mem_iter->second.mem_offset_ = batch_atomic_mem_start;
  688. for (auto &iter : iter_batch.second) {
  689. int64_t atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  690. GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start);
  691. for (auto &atomic_node : iter.second) {
  692. vector<int64_t> mem_offset_end;
  693. status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end);
  694. if (status != SUCCESS) {
  695. GELOGE(status, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.",
  696. atomic_node->GetName().c_str());
  697. return status;
  698. }
  699. }
  700. int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start;
  701. if (atomic_mem_size != 0) {
  702. GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM),
  703. "[Set][Attr]fail for atomic addr clean node %s.", iter.first->GetName().c_str());
  704. }
  705. }
  706. batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_));
  707. }
  708. mem_iter->second.mem_offset_ = static_cast<size_t>(batch_max_mem_offset);
  709. batch_atomic_mem_start = batch_max_mem_offset;
  710. for (auto &iter_batch : connecting_output_atomic_nodes) {
  711. mem_iter->second.mem_offset_ = batch_atomic_mem_start;
  712. if (AssignConnectNetOutputAtomicMemory(iter_batch.second) != SUCCESS) {
  713. GELOGE(FAILED, "[Assign][Memory]for nodes that connect to netoutput failed."
  714. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  715. return FAILED;
  716. }
  717. batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_));
  718. }
  719. mem_iter->second.mem_offset_ = static_cast<size_t>(batch_max_mem_offset);
  720. return SUCCESS;
  721. }
  722. Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(
  723. map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map,
  724. map<string, vector<NodePtr>> &connecting_output_atomic_nodes) {
  725. GE_CHECK_NOTNULL(compute_graph_);
  726. for (const auto &node : compute_graph_->GetAllNodes()) {
  727. if (node->GetType() == ATOMICADDRCLEAN) {
  728. map<string, vector<NodePtr>> tmp_normal_atomic_nodes;
  729. const auto &out_control_anchor = node->GetOutControlAnchor();
  730. GE_CHECK_NOTNULL(out_control_anchor);
  731. for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
  732. if (peer_in_control_anchor != nullptr) {
  733. auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
  734. auto peer_in_node_desc = peer_in_node->GetOpDesc();
  735. if (peer_in_node_desc != nullptr) {
  736. bool is_atomic_node = false;
  737. // If GetBool fail, is_atomic_node is false.
  738. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
  739. if (is_atomic_node) {
  740. bool is_reference = false;
  741. // If GetBool fail, is_reference is false.
  742. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference);
  743. if (is_reference) {
  744. REPORT_INNER_ERROR("E19999", "Op:%s cannot have both atomic and is_reference attribute, "
  745. "not support now", peer_in_node_desc->GetName().c_str());
  746. GELOGE(FAILED, "[Check][Attr]Op:%s cannot have both atomic and is_reference attribute, "
  747. "not support now", peer_in_node_desc->GetName().c_str());
  748. return ge::PARAM_INVALID;
  749. }
  750. std::string batch_label;
  751. (void)ge::AttrUtils::GetStr(peer_in_node_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  752. vector<int> is_connecting_output;
  753. // If GetBool fail, attr is_connecting_output is an empty vector.
  754. (void) ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output);
  755. if (is_connecting_output.empty()) {
  756. tmp_normal_atomic_nodes[batch_label].emplace_back(peer_in_node);
  757. continue;
  758. }
  759. connecting_output_atomic_nodes[batch_label].emplace_back(peer_in_node);
  760. tmp_normal_atomic_nodes[batch_label].clear();
  761. break;
  762. }
  763. }
  764. }
  765. }
  766. for (auto &it_atomic_node : tmp_normal_atomic_nodes) {
  767. if (!it_atomic_node.second.empty()) {
  768. normal_atomic_nodes_map[it_atomic_node.first][node] = it_atomic_node.second;
  769. }
  770. }
  771. }
  772. }
  773. return SUCCESS;
  774. }
  775. Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
  776. vector<int64_t> &mem_offset_end) {
  777. auto node_op_desc = node->GetOpDesc();
  778. // Assign atomic node output memory
  779. Status ret = AssignAtomicOutputMemory(node, mem_offset_end);
  780. if (ret != SUCCESS) {
  781. GELOGE(ret, "[Assign][Memory:Ouput:Atomic]Failed for node:%s.", node_op_desc->GetName().c_str());
  782. return ret;
  783. }
  784. // Check and assign atomic node workspace memory
  785. map<string, map<int64_t, int64_t>> atomic_workspace_info;
  786. atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info);
  787. if (!atomic_workspace_info.empty()) {
  788. bool is_fusion_node = false;
  789. // If GetBool fail, is_fusion_node is false.
  790. (void) ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);
  791. if (is_fusion_node) {
  792. // Assign fusion atomic node workspace memory
  793. ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  794. } else {
  795. // Assign single ordinary atomic node workspace memory, not include fusion node
  796. ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  797. }
  798. if (ret != SUCCESS) {
  799. GELOGE(ret, "[Assign][Memory:Atomic:Workspace]fail for node:%s.", node_op_desc->GetName().c_str());
  800. return ret;
  801. }
  802. } else {
  803. GELOGW("Current atomic node %s does not have attr ATOMIC_WORKSPACE_INFO.", node->GetName().c_str());
  804. }
  805. return SUCCESS;
  806. }
  807. Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) {
  808. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  809. if (iter == memory_offset_.end()) {
  810. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], "
  811. "not expected when AssignConnectNetOutputAtomicMemory, "
  812. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  813. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  814. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  815. return FAILED;
  816. }
  817. for (auto &node : connect_netoutput_nodes) {
  818. GE_CHECK_NOTNULL(node);
  819. if (node->GetOpDesc() == nullptr) {
  820. GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str());
  821. continue;
  822. }
  823. // Atomic memory start addr
  824. int64_t original_atomic_mem_start = static_cast<int64_t>(iter->second.mem_offset_);
  825. GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.",
  826. node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start);
  827. vector<int64_t> mem_offset_end;
  828. if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
  829. GELOGE(FAILED, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.",
  830. node->GetName().c_str());
  831. return FAILED;
  832. }
  833. // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately.
  834. if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) {
  835. GELOGE(FAILED, "[Set][Attr:IndependentAtomic]fail for node:%s", node->GetName().c_str());
  836. return FAILED;
  837. }
  838. }
  839. return SUCCESS;
  840. }
  841. Status GraphMemoryAssigner::AssignReferenceMemory() {
  842. for (auto &node : compute_graph_->GetDirectNode()) {
  843. // Get the reference type of the node, default is false
  844. bool is_ref = false;
  845. // If GetBool fail, is_ref is false.
  846. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  847. if (!is_ref) {
  848. continue;
  849. }
  850. GELOGI("Current node %s needs to support the reference relationship between output and input.",
  851. node->GetName().c_str());
  852. auto out_op_desc = node->GetOpDesc();
  853. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  854. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  855. if (out_op_desc->GetOutputsSize() > output_list.size()) {
  856. REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, judge invalid in node:%s "
  857. "when AssignReferenceMemory",
  858. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  859. GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s",
  860. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  861. return ge::FAILED;
  862. }
  863. map<string, int> input_name_index;
  864. for (const auto &input_name : out_op_desc->GetAllInputNames()) {
  865. int index = out_op_desc->GetInputIndexByName(input_name);
  866. input_name_index.emplace(input_name, index);
  867. }
  868. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  869. string out_data_anchor_name = out_op_desc->GetOutputNameByIndex(out_data_anchor->GetIdx());
  870. auto iter = input_name_index.find(out_data_anchor_name);
  871. if (iter != input_name_index.end()) {
  872. int index = iter->second;
  873. GELOGI("Reference memory: input anchor index = %d, input anchor name = %s, output anchor name = %s.", index,
  874. iter->first.c_str(), out_data_anchor_name.c_str());
  875. GE_CHECK_NOTNULL(node->GetInDataAnchor(index));
  876. auto peer_out_anchor = node->GetInDataAnchor(index)->GetPeerOutAnchor();
  877. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  878. int peer_out_anchor_index = peer_out_anchor->GetIdx();
  879. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  880. auto peer_out_op_desc = peer_out_node->GetOpDesc();
  881. GE_CHECK_NOTNULL(peer_out_op_desc);
  882. output_list[out_data_anchor->GetIdx()] = peer_out_op_desc->GetOutputOffset()[peer_out_anchor_index];
  883. GELOGI("Reference output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld]",
  884. node->GetOwnerComputeGraph()->GetName().c_str(), peer_out_op_desc->GetName().c_str(),
  885. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], peer_out_op_desc->GetStreamId());
  886. } else {
  887. GELOGI("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]",
  888. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(),
  889. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId());
  890. }
  891. }
  892. out_op_desc->SetOutputOffset(output_list);
  893. }
  894. return ge::SUCCESS;
  895. }
  896. bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
  897. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  898. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  899. if (peer_out_data_anchor == nullptr) {
  900. continue;
  901. }
  902. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  903. if (peer_op_desc == nullptr) {
  904. continue;
  905. }
  906. if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) ||
  907. (peer_op_desc->GetType() == VARIABLE)) {
  908. REPORT_INNER_ERROR("E19999", "node(type:%s, name:%s) link to atomic node(name:%s), "
  909. "this situation not supported now",
  910. peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str());
  911. GELOGE(ge::FAILED, "[Check][Link]node(type:%s, name:%s) link to atomic node(name:%s), "
  912. "this situation not supported now",
  913. peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str());
  914. return false;
  915. }
  916. }
  917. return true;
  918. }
  919. Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) {
  920. auto op_desc = node->GetOpDesc();
  921. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED);
  922. mem_offset_end.clear();
  923. GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());
  924. vector<int64_t> atomic_output_index;
  925. // If GetListInt fail, atomic_output_index is empty.
  926. (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
  927. // Check atomic output
  928. vector<int64_t> output_list = op_desc->GetOutputOffset();
  929. if (atomic_output_index.size() > output_list.size()) {
  930. std::string error =
  931. "Op:" + FmtToStr(node->GetName()) + "'s size:" + FmtToStr(atomic_output_index.size()) +
  932. " of atomic_output_index is more than the size:" + FmtToStr(output_list.size()) + " of output_list";
  933. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  934. return ge::FAILED;
  935. }
  936. auto output_list_size = static_cast<int64_t>(output_list.size());
  937. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  938. if (iter == memory_offset_.end()) {
  939. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], "
  940. "not expected when AssignAtomicOutputMemory, "
  941. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  942. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  943. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  944. return FAILED;
  945. }
  946. for (auto &output_index : atomic_output_index) {
  947. if (output_index >= output_list_size) {
  948. std::string error =
  949. "Op:" + FmtToStr(node->GetName()) + "'s atomic_output index:" + FmtToStr(output_index) +
  950. " is more than the size:" + FmtToStr(output_list_size) + " of output_list.";
  951. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  952. return ge::PARAM_INVALID;
  953. }
  954. // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here
  955. bool is_assigned_mem = false;
  956. if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) {
  957. GELOGE(ge::FAILED, "[Get][MemoryAssignmentStatus]fail for node %s, out_index:%ld",
  958. node->GetName().c_str(), output_index);
  959. return ge::FAILED;
  960. }
  961. // If you have already assigned an atomic address, skip it, and you don't need to reassign it.
  962. if (is_assigned_mem) {
  963. GELOGI(
  964. "Node %s atomic output : we have assigned atomic memory as the input of next node in "
  965. "ReAssignContinuousMemory function.",
  966. op_desc->GetName().c_str());
  967. continue;
  968. }
  969. auto output_desc = op_desc->GetAllOutputsDescPtr().at(output_index);
  970. int64_t size = 0;
  971. if (ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS) {
  972. GELOGI("Get size failed");
  973. }
  974. output_list[output_index] = iter->second.mem_offset_;
  975. std::string batch_label;
  976. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  977. GELOGI("[IMAS]Atomic output : Set %s name[%s] optype[%s] output[%ld] offset to [%zu] stream_id[%ld] memtype[%u] "
  978. "size[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(),
  979. node->GetType().c_str(), output_index, iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM,
  980. size, size, batch_label.c_str());
  981. iter->second.mem_offset_ += size;
  982. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  983. mem_offset_end.emplace_back(iter->second.mem_offset_);
  984. }
  985. op_desc->SetOutputOffset(output_list);
  986. return ge::SUCCESS;
  987. }
  988. Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index,
  989. bool &is_mem_assigned) {
  990. if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) {
  991. std::string error =
  992. "Op:" + FmtToStr(node->GetName()) + "'s output index:" + FmtToStr(output_index) +
  993. " is more than the size:" + FmtToStr(node->GetAllOutDataAnchors().size()) + " of node's AllOutDataAnchors.";
  994. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  995. return ge::PARAM_INVALID;
  996. }
  997. auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
  998. GE_CHECK_NOTNULL(out_data_anchor);
  999. auto input_anchors = out_data_anchor->GetPeerInDataAnchors();
  1000. for (auto &input_anchor : input_anchors) {
  1001. auto output_node = input_anchor->GetOwnerNode();
  1002. /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address
  1003. /// has been assigned
  1004. vector<int64_t> atomic_input_index;
  1005. (void) ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index);
  1006. if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) {
  1007. is_mem_assigned = true;
  1008. break;
  1009. }
  1010. }
  1011. return SUCCESS;
  1012. }
  1013. Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1014. map<string, map<int64_t, int64_t>> &workspace_info,
  1015. vector<int64_t> &mem_offset_end) {
  1016. GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str());
  1017. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1018. if (mem_type_iter == memory_offset_.end()) {
  1019. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], "
  1020. "not expected when AssignOrdinaryAtomicWorkspaceMemory, "
  1021. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1022. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  1023. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1024. return FAILED;
  1025. }
  1026. vector<int64_t> workspace_vector = op_desc->GetWorkspace();
  1027. for (auto iter = workspace_info.begin(); iter != workspace_info.end(); ++iter) {
  1028. if (op_desc->GetName() != iter->first) {
  1029. std::string error = "The node name" + FmtToStr(op_desc->GetName()) +
  1030. " and the node name" + FmtToStr(iter->first) + " in workspace info are inconsistent.";
  1031. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1032. return ge::PARAM_INVALID;
  1033. }
  1034. if (iter->second.empty()) {
  1035. continue;
  1036. }
  1037. for (auto &info_iter : iter->second) {
  1038. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1039. auto workspace_size = info_iter.second;
  1040. if (workspace_index >= workspace_vector.size()) {
  1041. std::string error = "The workspace index:" + FmtToStr(workspace_index) +
  1042. " is more than the size:" + FmtToStr(workspace_vector.size()) + " of workspace vector in op:" +
  1043. op_desc->GetName().c_str();
  1044. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1045. return ge::PARAM_INVALID;
  1046. }
  1047. workspace_vector[workspace_index] = mem_type_iter->second.mem_offset_;
  1048. std::string batch_label;
  1049. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1050. GELOGI(
  1051. "[IMAS]Atomic ordinary workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1052. "memtype[%u] size[%ld] real_size[%ld] batch[%s].",
  1053. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index,
  1054. mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size,
  1055. batch_label.c_str());
  1056. mem_type_iter->second.mem_offset_ += workspace_size;
  1057. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1058. }
  1059. }
  1060. op_desc->SetWorkspace(workspace_vector);
  1061. return SUCCESS;
  1062. }
  1063. Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1064. map<string, map<int64_t, int64_t>> &workspace_info,
  1065. vector<int64_t> &mem_offset_end) {
  1066. GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str());
  1067. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1068. if (mem_type_iter == memory_offset_.end()) {
  1069. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], "
  1070. "not expected when AssignFusionAtomicWorkspaceMemory, "
  1071. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1072. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  1073. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1074. return FAILED;
  1075. }
  1076. map<string, map<int64_t, int64_t>> sub_node_workspace_offset;
  1077. for (auto &iter : workspace_info) {
  1078. if (iter.second.empty()) {
  1079. continue;
  1080. }
  1081. map<int64_t, int64_t> index_offset;
  1082. for (auto &info_iter : iter.second) {
  1083. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1084. auto workspace_size = info_iter.second;
  1085. size_t workspace_offset = mem_type_iter->second.mem_offset_;
  1086. std::string batch_label;
  1087. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1088. GELOGI(
  1089. "[IMAS]Atomic fusion workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1090. "memtype[%u] ssize[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(),
  1091. op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index, mem_type_iter->second.mem_offset_,
  1092. op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, batch_label.c_str());
  1093. mem_type_iter->second.mem_offset_ += workspace_size;
  1094. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1095. index_offset.insert(std::make_pair(workspace_index, workspace_offset));
  1096. }
  1097. sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset));
  1098. }
  1099. if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) {
  1100. REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for node:%s when AssignFusionAtomicWorkspaceMemory",
  1101. EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str());
  1102. GELOGE(FAILED, "[Set][Attr:%s]fail for node:%s.",
  1103. EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str());
  1104. return FAILED;
  1105. }
  1106. return SUCCESS;
  1107. }
  1108. Status GraphMemoryAssigner::CheckOffset() {
  1109. std::map<std::string, std::string> anchor_to_symbol;
  1110. std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors;
  1111. if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
  1112. REPORT_CALL_ERROR("E19999", "Get ref-mapping for graph %s failed", compute_graph_->GetName().c_str());
  1113. GELOGE(FAILED, "[Get][RefMapping]fail for graph %s", compute_graph_->GetName().c_str());
  1114. return FAILED;
  1115. }
  1116. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1117. GE_CHECK_NOTNULL(node->GetOpDesc());
  1118. vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset();
  1119. for (auto input : input_list) {
  1120. if (input == ge::kInvalidOffset) {
  1121. std::string error = "Invalid input offset" + FmtToStr(ge::kInvalidOffset) +
  1122. + " in node" + FmtToStr(node->GetName());
  1123. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1124. return FAILED;
  1125. }
  1126. }
  1127. bool need_update_output = false;
  1128. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  1129. for (uint32_t i = 0; i < output_list.size(); ++i) {
  1130. if (output_list[i] == ge::kInvalidOffset) {
  1131. std::string error = "Invalid output offset" + FmtToStr(ge::kInvalidOffset) +
  1132. + " in node" + FmtToStr(node->GetName());
  1133. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1134. return FAILED;
  1135. }
  1136. if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) {
  1137. auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i);
  1138. if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) {
  1139. output_list[i] = symbol_offset;
  1140. need_update_output = true;
  1141. }
  1142. }
  1143. }
  1144. if (need_update_output) {
  1145. node->GetOpDesc()->SetOutputOffset(output_list);
  1146. }
  1147. vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace();
  1148. for (auto workspace : workspace_list) {
  1149. if (workspace == ge::kInvalidOffset) {
  1150. std::string error = "Invalid workspace" + FmtToStr(ge::kInvalidOffset) +
  1151. + " in node" + FmtToStr(node->GetName());
  1152. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1153. return FAILED;
  1154. }
  1155. }
  1156. }
  1157. return SUCCESS;
  1158. }
  1159. ge::Status GraphMemoryAssigner::SetInputOffset() {
  1160. if (memory_offset_.empty()) {
  1161. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected when SetInputOffset, "
  1162. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1163. GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, "
  1164. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1165. }
  1166. for (auto pair : memory_offset_) {
  1167. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
  1168. pair.second.mem_offset_, pair.first);
  1169. }
  1170. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1171. if (UpdateOpInputOffset(node) != ge::SUCCESS) {
  1172. GELOGE(ge::FAILED, "[Update][Offset:Input]fail for op:%s", node->GetName().c_str());
  1173. return ge::FAILED;
  1174. }
  1175. }
  1176. return ge::SUCCESS;
  1177. }
  1178. NodePtr GraphMemoryAssigner::GetKnownInputNode(const NodePtr &node) const {
  1179. if (!node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
  1180. return node;
  1181. }
  1182. if (NodeUtils::IsDynamicShape(node)) {
  1183. return node;
  1184. }
  1185. return NodeUtils::GetParentInput(node);
  1186. }
  1187. ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1188. uint32_t parent_index = 0;
  1189. if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  1190. return SUCCESS;
  1191. }
  1192. // Subgraph Data Node, check for constant input.
  1193. std::string op_type;
  1194. const auto &in_node = NodeUtils::GetParentInput(node);
  1195. if (NodeUtils::GetConstOpType(in_node, op_type)) {
  1196. input_list = in_node->GetOpDesc()->GetOutputOffset();
  1197. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output.
  1198. return SUCCESS; // Constant input.
  1199. }
  1200. // Memory allocated for dynamic shape subgraph Data.
  1201. if (NodeUtils::IsDynamicShape(node)) {
  1202. return SUCCESS;
  1203. }
  1204. const auto &owner = node->GetOwnerComputeGraph();
  1205. const auto &parent_desc = owner->GetParentNode()->GetOpDesc();
  1206. const auto parent_inputs = parent_desc->GetInputOffset();
  1207. if (parent_inputs.size() <= parent_index) {
  1208. std::string error = "Get Parent input offset failed, node is " + FmtToStr(node->GetName()) +
  1209. + ", input_size is " + FmtToStr(parent_inputs.size()) + ", parent index is " +
  1210. FmtToStr(parent_index);
  1211. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1212. return FAILED;
  1213. }
  1214. input_list = {parent_inputs[parent_index]};
  1215. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input.
  1216. return SUCCESS;
  1217. }
  1218. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1219. vector<int64_t> origin_input_list;
  1220. vector<int64_t> memory_type;
  1221. auto tmp_op_desc = node->GetOpDesc();
  1222. origin_input_list = tmp_op_desc->GetInputOffset();
  1223. int64_t valid_input_index = 0;
  1224. bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
  1225. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1226. vector<int64_t> output_list;
  1227. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1228. if (peer_out_anchor == nullptr) {
  1229. continue;
  1230. }
  1231. // If the current node not broadcast, the OutputOffset of the previous node is used to update the input_list
  1232. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1233. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1234. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1235. output_list = last_peer_out_op_desc->GetOutputOffset();
  1236. auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx());
  1237. if (output_list.size() > static_cast<size_t>(out_index)) {
  1238. int64_t input_offset = output_list.at(out_index);
  1239. if (has_mem_type_attr && !origin_input_list.empty()) {
  1240. auto input_size = tmp_op_desc->GetInputsSize();
  1241. auto ori_input_offset_list_size = origin_input_list.size();
  1242. auto mem_type_size = memory_type.size();
  1243. if ((input_size != mem_type_size) || (input_size != ori_input_offset_list_size)) {
  1244. std::string error = "fusion: node" + FmtToStr(tmp_op_desc->GetName()) +
  1245. + " input_size" + FmtToStr(input_size) + " diff from memory_type_size" +
  1246. FmtToStr(mem_type_size) + " from ori_input_offset_list_size" +
  1247. FmtToStr(ori_input_offset_list_size);
  1248. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1249. return ge::FAILED;
  1250. }
  1251. // not hbm keep orignal inputoffest
  1252. // hbm inputoffset = original inputoffset + outputoffset
  1253. input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 ? origin_input_list[valid_input_index]
  1254. : origin_input_list[valid_input_index] + output_list.at(out_index));
  1255. }
  1256. const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
  1257. if (in_node->GetType() == CONSTANT) {
  1258. GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx()));
  1259. GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset));
  1260. }
  1261. GELOGD("%s node[%s] input[%ld] is set from node[%s] out index[%lu] offset[%ld]",
  1262. has_mem_type_attr ? "Fusion" : "",
  1263. tmp_op_desc->GetName().c_str(),
  1264. valid_input_index,
  1265. peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(),
  1266. out_index,
  1267. input_offset);
  1268. input_list.emplace_back(input_offset);
  1269. valid_input_index++;
  1270. }
  1271. }
  1272. return ge::SUCCESS;
  1273. }
  1274. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const {
  1275. GE_CHECK_NOTNULL(node->GetOpDesc());
  1276. vector<int64_t> input_list;
  1277. if (node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) {
  1278. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1279. vector<int64_t> output_list;
  1280. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1281. if (peer_out_anchor == nullptr) {
  1282. continue;
  1283. }
  1284. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1285. // If the current node is broadcast and the preceding node is variable, because InputOffset has been set
  1286. // in function:AssignVarAttr2Nodes, then the InputOffset of the broadcast node is taken to update the input_list.
  1287. // Otherwise, the OutputOffset of the previous node is used to update the input_list.
  1288. if (last_peer_out_node->GetType() != VARIABLE) {
  1289. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1290. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1291. output_list = last_peer_out_op_desc->GetOutputOffset();
  1292. if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
  1293. input_list.emplace_back(output_list.at(peer_out_anchor->GetIdx()));
  1294. }
  1295. } else {
  1296. vector<int64_t> cur_node_input_list;
  1297. auto cur_node_op_desc = node->GetOpDesc();
  1298. GE_CHECK_NOTNULL(cur_node_op_desc);
  1299. cur_node_input_list = cur_node_op_desc->GetInputOffset();
  1300. if (cur_node_input_list.size() > static_cast<size_t>(anchor->GetIdx())) {
  1301. input_list.emplace_back(cur_node_input_list.at(anchor->GetIdx()));
  1302. }
  1303. }
  1304. }
  1305. } else if (node->GetType() == DATA_TYPE) {
  1306. if (UpdateConstArgsOffset(node, input_list) != SUCCESS) {
  1307. GELOGE(FAILED, "[Update][Offset:Input:Const]fail for node:%s ", node->GetName().c_str());
  1308. return FAILED;
  1309. }
  1310. } else {
  1311. if (UpdateOpInputOffset(node, input_list) != SUCCESS) {
  1312. GELOGE(FAILED, "[Update][Offset:Input]fail for node:%s", node->GetName().c_str());
  1313. return FAILED;
  1314. }
  1315. }
  1316. node->GetOpDesc()->SetInputOffset(input_list);
  1317. return SUCCESS;
  1318. }
  1319. Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
  1320. const vector<int64_t> &mem_offset_end, int64_t memory_type) {
  1321. GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start);
  1322. // Parsing offset and size vectors
  1323. vector<int64_t> memory_offset_start;
  1324. vector<int64_t> memory_offset_size;
  1325. memory_offset_start.emplace_back(atomic_mem_start);
  1326. for (size_t i = 0; i < mem_offset_end.size(); ++i) {
  1327. memory_offset_start.emplace_back(mem_offset_end[i]);
  1328. // Number 1 means element index
  1329. auto size = memory_offset_start[i + 1] - memory_offset_start[i];
  1330. memory_offset_size.emplace_back(size);
  1331. }
  1332. memory_offset_start.pop_back();
  1333. const auto &in_control_anchor = node->GetInControlAnchor();
  1334. if (!memory_offset_size.empty() && in_control_anchor != nullptr) {
  1335. for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1336. if (peer_out_control_anchor == nullptr) {
  1337. continue;
  1338. }
  1339. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1340. auto peer_out_node_desc = peer_out_node->GetOpDesc();
  1341. if (peer_out_node_desc == nullptr) {
  1342. continue;
  1343. }
  1344. GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(),
  1345. peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str());
  1346. if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
  1347. if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) {
  1348. GELOGE(FAILED, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str());
  1349. return FAILED;
  1350. }
  1351. }
  1352. }
  1353. }
  1354. return SUCCESS;
  1355. }
  1356. ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start,
  1357. const vector<int64_t> &atomic_mem_size, int64_t memory_type) {
  1358. auto node_op_desc = node->GetOpDesc();
  1359. if (node_op_desc != nullptr) {
  1360. GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
  1361. vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
  1362. vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
  1363. workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1364. workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1365. node_op_desc->SetWorkspace(workspace_vector);
  1366. node_op_desc->SetWorkspaceBytes(workspace_byte_vector);
  1367. std::vector<int64_t> mem_start_vector;
  1368. // If GetListInt fail, mem_start_vector is empty.
  1369. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
  1370. mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1371. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
  1372. REPORT_INNER_ERROR("E19999", "Set Attr:%s failed when SetAtomicCleanAttr, op_name:%s",
  1373. ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str());
  1374. GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s",
  1375. ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str());
  1376. return FAILED);
  1377. std::vector<int64_t> mem_size_vector;
  1378. // If GetListInt fail, mem_size_vector is empty.
  1379. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
  1380. mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1381. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
  1382. REPORT_INNER_ERROR("E19999", "Set Attr:%s failed when SetAtomicCleanAttr, op_name:%s",
  1383. ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str());
  1384. GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s",
  1385. ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str());
  1386. return FAILED);
  1387. std::stringstream ss;
  1388. for (auto iter : atomic_mem_start) {
  1389. ss << iter << " ";
  1390. }
  1391. string atomic_mem_start_str = ss.str();
  1392. ss.clear();
  1393. ss.str("");
  1394. for (auto iter : atomic_mem_size) {
  1395. ss << iter << " ";
  1396. }
  1397. string atomic_mem_size_str = ss.str();
  1398. GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] optype[%s] output[0] offset to [%s] streamid[%ld]"
  1399. " memtype[%ld] size[%s]",node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
  1400. node->GetType().c_str(), atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), memory_type,
  1401. atomic_mem_size_str.c_str());
  1402. }
  1403. return SUCCESS;
  1404. }
  1405. void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size, int64_t memory_type) {
  1406. if (mem_align_size <= 0) {
  1407. return;
  1408. }
  1409. auto iter = memory_offset_.find(memory_type);
  1410. if (iter == memory_offset_.end()) {
  1411. GELOGW("Memory offset don't have memory type[%ld].", memory_type);
  1412. return;
  1413. }
  1414. iter->second.mem_offset_ =
  1415. (iter->second.mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size;
  1416. }
  1417. ge::Status GraphMemoryAssigner::GetNodeListMemoryType(const vector<NodePtr> &nodes, int32_t mem_reuse_model,
  1418. int64_t &memory_type) {
  1419. memory_type = RT_MEMORY_HBM;
  1420. // In the dynamic batch scenario, the memory attributes of nodes are the same.
  1421. for (auto &n : nodes) {
  1422. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  1423. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"),
  1424. "[Get][MemType:input]fail for node:%s", n->GetName().c_str())
  1425. break;
  1426. }
  1427. if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  1428. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"),
  1429. "[Get][MemType:output]fail for node:%s", n->GetName().c_str())
  1430. break;
  1431. }
  1432. }
  1433. return SUCCESS;
  1434. }
  1435. ge::Status GraphMemoryAssigner::GetNodeMemoryType(const NodePtr &node, int64_t &memory_type, string input_or_output) {
  1436. memory_type = RT_MEMORY_HBM;
  1437. vector<int64_t> mem_type_list;
  1438. if (input_or_output == "input") {
  1439. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_INPUT_MEM_TYPE_LIST, mem_type_list);
  1440. }
  1441. if (input_or_output == "output") {
  1442. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_OUTPUT_MEM_TYPE_LIST, mem_type_list);
  1443. }
  1444. if (mem_type_list.empty()) {
  1445. if (memory_offset_.find(memory_type) == memory_offset_.end()) {
  1446. std::string error = "Memory offset map does not have memory type" + FmtToStr(memory_type) +
  1447. + ", opname is " + FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1448. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1449. return FAILED;
  1450. }
  1451. return SUCCESS;
  1452. }
  1453. if (mem_type_list.size() != node->GetAllInDataAnchorsSize()) {
  1454. std::string error = "The size" + FmtToStr(mem_type_list.size()) +
  1455. " of mem type list is not equal to the size of in data anchor" +
  1456. FmtToStr(node->GetAllInDataAnchorsSize()) + ", opname is " +
  1457. FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1458. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1459. return FAILED;
  1460. }
  1461. if (!CheckContinuousMemType(mem_type_list)) {
  1462. GELOGE(FAILED, "[Check][MemType:Continuous]fail for node:%s", node->GetName().c_str());
  1463. return FAILED;
  1464. }
  1465. // It is continuous memory and memory type is the same, so use the first memory.
  1466. memory_type = mem_type_list[0];
  1467. return SUCCESS;
  1468. }
  1469. bool GraphMemoryAssigner::CheckContinuousMemType(vector<int64_t> mem_type_list) {
  1470. if (mem_type_list.size() == 0) {
  1471. return true;
  1472. }
  1473. int64_t mem_type_tmp = mem_type_list[0];
  1474. for (auto mem_type : mem_type_list) {
  1475. if (mem_type != mem_type_tmp) {
  1476. std::string error = "The memory is continuous, but the type of the input memory is inconsistent. They are " +
  1477. FmtToStr(mem_type_tmp) + " and " + FmtToStr(mem_type);
  1478. ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error});
  1479. GELOGW("The memory is continuous, but the type of the input memory is inconsistent. They are [%ld] and [%ld].",
  1480. mem_type_tmp, mem_type);
  1481. return false;
  1482. }
  1483. }
  1484. if (memory_offset_.find(mem_type_tmp) == memory_offset_.end()) {
  1485. std::string error = "Memory offset map does not have memory type" + FmtToStr(mem_type_tmp);
  1486. ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error});
  1487. GELOGW("Memory offset map does not have memory type[%ld].", mem_type_tmp);
  1488. return false;
  1489. }
  1490. return true;
  1491. }
  1492. void GraphMemoryAssigner::PrintMemoryOffset() {
  1493. for (auto pair : memory_offset_) {
  1494. // Assign memory of max batch nodes that have the same batch label.
  1495. GELOGD("Reassign memory for max batch virtual nodes, memory type = %ld, memory offset = %zu.",
  1496. pair.first, pair.second.mem_offset_);
  1497. }
  1498. }
  1499. ge::Status GraphMemoryAssigner::GetAllRef(const NodePtr &node, map<int32_t, int32_t> &out2ins) {
  1500. for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  1501. int32_t reuse_in_index = -1;
  1502. bool reuse_input_flag = GraphUtils::IsRefFromInput(out_data_anchor, reuse_in_index);
  1503. if (reuse_input_flag) {
  1504. if (node->GetInDataAnchor(reuse_in_index) != nullptr) {
  1505. out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index);
  1506. } else {
  1507. REPORT_INNER_ERROR("E19999", "Invalid reuse_input value %d on output %d of node %s, "
  1508. "please check attr reuse_input",
  1509. reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str());
  1510. GELOGE(FAILED, "[Check][Attr]Invalid reuse_input value %d on output %d of node %s, "
  1511. "please check attr reuse_input",
  1512. reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str());
  1513. return FAILED;
  1514. }
  1515. }
  1516. }
  1517. return ge::SUCCESS;
  1518. }
  1519. bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
  1520. const NodePtr &input_continuous_node, map<NodePtr, uint32_t> &node_2_continuous_type) {
  1521. for (const auto &in_node : input_continuous_node->GetInDataNodes()) {
  1522. if (in_node->GetType() == VARIABLE) {
  1523. GELOGI("node %s 's precursor node %s is variable, do not store.", input_continuous_node->GetName().c_str(),
  1524. in_node->GetName().c_str());
  1525. return true;
  1526. }
  1527. auto iter = node_2_continuous_type.find(in_node);
  1528. // In node's topo order in the front, so function can not be exception
  1529. auto continuous_type = iter->second;
  1530. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  1531. if (continuous_input) {
  1532. GELOGI("[Store][Node] of %s cause it's precursor node %s need assign continuous input memory",
  1533. input_continuous_node->GetName().c_str(), in_node->GetName().c_str());
  1534. return false;
  1535. }
  1536. }
  1537. for (const auto &out_node : input_continuous_node->GetOutDataNodes()) {
  1538. auto continuous_type = GetContinuousMemoryType(out_node->GetOpDesc());
  1539. node_2_continuous_type.emplace(out_node, continuous_type);
  1540. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  1541. if (continuous_input) {
  1542. GELOGI("[Store][Node] of %s cause it's succeed node %s need assign continuous input memory",
  1543. input_continuous_node->GetName().c_str(), out_node->GetName().c_str());
  1544. return false;
  1545. }
  1546. }
  1547. return true;
  1548. }
  1549. ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node,
  1550. uint32_t continuous_type,
  1551. bool reverse_refresh) {
  1552. int64_t mem_clean_start = 0;
  1553. int64_t mem_clean_size = 0;
  1554. int64_t memory_type = RT_MEMORY_HBM;
  1555. GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"),
  1556. "[Get][MemType]fail for node:%s", input_continuous_node->GetName().c_str());
  1557. auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type,
  1558. continuous_type, reverse_refresh);
  1559. if (ret != ge::SUCCESS) {
  1560. GELOGE(ret, "[Assign][Memory:Input:continuous]fail for node:%s", input_continuous_node->GetName().c_str());
  1561. return ret;
  1562. }
  1563. // Clean up atomic address, eg, hcom node
  1564. vector<int32_t> input_indexes;
  1565. // If GetListInt fail, input_indexes is empty.
  1566. (void)ge::AttrUtils::GetListInt(input_continuous_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes);
  1567. if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) {
  1568. // check whether there is an atomic conflict between the current node and the peer out node
  1569. if (!CheckInputIsSupportAtomic(input_continuous_node)) {
  1570. return ge::FAILED;
  1571. }
  1572. const auto &in_control_anchor = input_continuous_node->GetInControlAnchor();
  1573. GE_CHECK_NOTNULL(in_control_anchor);
  1574. for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1575. GE_CHECK_NOTNULL(peer_out_control_anchor);
  1576. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1577. if (peer_out_node->GetType() == ATOMICADDRCLEAN) {
  1578. ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type);
  1579. if (ret != SUCCESS) {
  1580. GELOGE(ret, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str());
  1581. return ret;
  1582. }
  1583. }
  1584. }
  1585. }
  1586. return ge::SUCCESS;
  1587. }
  1588. Status GraphMemoryAssigner::AssignBufferPoolMemory() {
  1589. auto is_buffer_pool_mem_enable = [] (const ComputeGraphPtr &graph) -> bool {
  1590. for (NodePtr &node : graph->GetAllNodes()) {
  1591. auto op_desc = node->GetOpDesc();
  1592. if (op_desc == nullptr) {
  1593. continue;
  1594. }
  1595. bool has_attrs = op_desc->HasAttr(ATTR_NAME_BUFFER_POOL_ID) && op_desc->HasAttr(ATTR_NAME_BUFFER_POOL_SIZE);
  1596. if (has_attrs) {
  1597. return true;
  1598. }
  1599. }
  1600. return false;
  1601. };
  1602. auto root_graph = GraphUtils::FindRootGraph(compute_graph_);
  1603. GE_CHECK_NOTNULL(root_graph);
  1604. if (root_graph->GetGraphUnknownFlag()) {
  1605. GELOGI("[Check][Enable]Unknown root graph does not support buffer pool memory, graph:%s.",
  1606. compute_graph_->GetName().c_str());
  1607. return SUCCESS;
  1608. }
  1609. if (!is_buffer_pool_mem_enable(compute_graph_)) {
  1610. GELOGD("[Check][Enable]Buffer pool memory is not enable, graph:%s.", compute_graph_->GetName().c_str());
  1611. return SUCCESS;
  1612. }
  1613. map<int64_t, size_t> mem_type_to_offset;
  1614. for (const auto &pair : memory_offset_) {
  1615. mem_type_to_offset[pair.first] = pair.second.mem_offset_;
  1616. }
  1617. BufferPoolMemAssigner buffer_pool_mem_assigner(compute_graph_, mem_type_to_offset);
  1618. Status status = buffer_pool_mem_assigner.Assign();
  1619. if (status != SUCCESS) {
  1620. GELOGE(status, "[Assign][BufferPoolMem]Graph:%s.", compute_graph_->GetName().c_str());
  1621. REPORT_INNER_ERROR("E19999", "Failed to assign buffer pool memory, graph:%s.", compute_graph_->GetName().c_str());
  1622. return status;
  1623. }
  1624. int64_t mem_type = buffer_pool_mem_assigner.GetMemType();
  1625. auto iter = memory_offset_.find(mem_type);
  1626. if (iter == memory_offset_.end()) {
  1627. GELOGE(FAILED, "[Check][MemType]Memory type is not supported, graph:%s, mem type:%ld.",
  1628. compute_graph_->GetName().c_str(), mem_type);
  1629. REPORT_INNER_ERROR("E19999", "Memory type is not supported, graph:%s, mem type:%ld.",
  1630. compute_graph_->GetName().c_str(), mem_type);
  1631. return FAILED;
  1632. }
  1633. iter->second.mem_offset_ = buffer_pool_mem_assigner.GetMemOffset();
  1634. GELOGI("[Assign][BufferPoolMem]Assign buffer pool memory successfully, graph:%s, mem type:%ld, mem offset:%zu.",
  1635. compute_graph_->GetName().c_str(), mem_type, buffer_pool_mem_assigner.GetMemOffset());
  1636. return SUCCESS;
  1637. }
  1638. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示