You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_mem_assigner.cc 71 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/graph_mem_assigner.h"
  17. #include <cstring>
  18. #include <set>
  19. #include "common/math/math_util.h"
  20. #include "common/util/error_manager/error_manager.h"
  21. #include "framework/common/debug/ge_log.h"
  22. #include "graph/build/memory/hybrid_mem_assigner.h"
  23. #include "graph/build/memory/var_mem_assign_util.h"
  24. #include "graph/build/memory/block_mem_assigner.h"
  25. #include "graph/common/omg_util.h"
  26. #include "graph/debug/ge_attr_define.h"
  27. #include "graph/ge_attr_value.h"
  28. #include "graph/manager/graph_var_manager.h"
  29. #include "graph/utils/tensor_utils.h"
  30. #include "graph/utils/type_utils.h"
  31. namespace {
  32. const int kDataOutputIndex = 0;
  33. const int kAllInputAddrIsAtomic = -1;
  34. const int kVirtualInputNodeMemoryReuse = 0;
  35. const int kVirtualOutputNodeMemoryReuse = 1;
  36. const size_t kVirtualInputNodeOutputSize = 1;
  37. const size_t kVirtualOutputNodeInputSize = 1;
  38. const size_t kVirtualNodeDataIndex = 0;
  39. const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_";
  40. int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol,
  41. const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors,
  42. const ge::NodePtr &node, const uint32_t i) {
  43. ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut);
  44. auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString());
  45. if (iter1 == anchor_to_symbol.end()) {
  46. return ge::kInvalidOffset;
  47. }
  48. auto out_symbol = iter1->second;
  49. auto iter2 = symbol_to_anchors.find(out_symbol);
  50. if (iter2 == symbol_to_anchors.end()) {
  51. return ge::kInvalidOffset;
  52. }
  53. for (const auto &node_index_io : iter2->second) {
  54. if (node_index_io.value_ == out_symbol) {
  55. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  56. vector<int64_t> symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset();
  57. if (node_index_io.index_ >= symbol_output_list.size()) {
  58. return ge::kInvalidOffset;
  59. }
  60. GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i,
  61. output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_));
  62. return symbol_output_list.at(node_index_io.index_);
  63. }
  64. }
  65. return ge::kInvalidOffset;
  66. }
  67. } // namespace
  68. namespace ge {
  69. Status VariableMemoryAssigner::Assign() {
  70. Status result = ge::VarMemAssignUtil::AssignConstantOpMemory(compute_graph_);
  71. if (result != ge::SUCCESS) {
  72. return result;
  73. }
  74. result = ge::VarMemAssignUtil::AssignVarMemory(compute_graph_);
  75. if (result != ge::SUCCESS) {
  76. return result;
  77. }
  78. return ge::SUCCESS;
  79. }
  80. Status VariableMemoryAssigner::AssignVarAttr2Nodes() {
  81. Status result = ge::VarMemAssignUtil::AssignVarAttr2Nodes(compute_graph_);
  82. if (result != ge::SUCCESS) {
  83. return result;
  84. }
  85. return ge::SUCCESS;
  86. }
  87. Status GraphMemoryAssigner::AssignMemory() {
  88. ge::HybridMemAssignerPtr mem_assigner(new (std::nothrow) HybridMemAssigner(compute_graph_));
  89. if (mem_assigner->Assign() != ge::SUCCESS) {
  90. GELOGE(ge::FAILED, "Memory assigner failed");
  91. return ge::FAILED;
  92. }
  93. MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
  94. memory_offset_.push_back(memory_offset);
  95. auto session_id = compute_graph_->GetSessionID();
  96. int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM);
  97. auto variable_assigner =
  98. std::unique_ptr<ge::VariableMemoryAssigner>(new (std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  99. if (variable_assigner == nullptr) {
  100. GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
  101. return ge::FAILED;
  102. }
  103. if (variable_assigner->Assign() != ge::SUCCESS) {
  104. return ge::FAILED;
  105. }
  106. int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign;
  107. GELOGI("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign);
  108. mem_assigner_ = std::move(mem_assigner);
  109. return ge::SUCCESS;
  110. }
  111. ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() {
  112. auto variable_assigner =
  113. std::unique_ptr<ge::VariableMemoryAssigner>(new (std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  114. if (variable_assigner == nullptr) {
  115. GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
  116. return ge::FAILED;
  117. }
  118. if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) {
  119. return ge::FAILED;
  120. }
  121. return ge::SUCCESS;
  122. }
  123. ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
  124. int64_t dim_index, int64_t &output_mem_size,
  125. int64_t &batch_dim_num, int64_t &out_size) {
  126. graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size);
  127. if (graph_status != GRAPH_SUCCESS) {
  128. GELOGE(FAILED, "Opdesc GetSize failed!");
  129. return FAILED;
  130. }
  131. GeShape output_shape = output_desc->GetShape();
  132. std::vector<int64_t> output_dims = output_shape.GetDims();
  133. if (dim_index >= static_cast<int64_t>(output_dims.size())) {
  134. GELOGE(FAILED, "Invaild value(%ld) of attr _reuse_input_on_dim_index, which is out of data range [0, %zu).",
  135. dim_index, output_dims.size());
  136. return FAILED;
  137. }
  138. for (int64_t index = 0; index < dim_index; index++) {
  139. FMK_INT64_MULCHECK(batch_dim_num, output_dims[index]);
  140. batch_dim_num *= output_dims[index];
  141. output_dims[index] = 1;
  142. }
  143. output_shape = GeShape(output_dims);
  144. Format out_format = output_desc->GetFormat();
  145. DataType data_type = output_desc->GetDataType();
  146. graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size);
  147. if (graph_status != GRAPH_SUCCESS) {
  148. GELOGE(graph_status, "Opdesc CalcTensorMemSize failed!");
  149. return FAILED;
  150. }
  151. if (output_mem_size < 0) {
  152. GELOGE(FAILED, "After calculating tensor memory size, output_mem_size = %ld, out of data range [0, %ld]",
  153. output_mem_size, INT64_MAX);
  154. return FAILED;
  155. }
  156. return SUCCESS;
  157. }
  158. Status GraphMemoryAssigner::GetMaxBatchLabel(const map<string, vector<NodePtr>> &mem_reuse_virtual_nodes_map,
  159. int32_t mem_reuse_model, string &max_batch_label) {
  160. for (auto &i_map : mem_reuse_virtual_nodes_map) {
  161. vector<NodePtr> virtual_nodes_list = i_map.second;
  162. vector<int64_t> max_shape_dims;
  163. size_t max_batch_dim = 0;
  164. bool max_batch_dim_find = false;
  165. for (size_t i = 0; i < virtual_nodes_list.size(); ++i) {
  166. GE_CHECK_NOTNULL(virtual_nodes_list[i]);
  167. OpDescPtr op_desc = virtual_nodes_list[i]->GetOpDesc();
  168. GE_CHECK_NOTNULL(op_desc);
  169. ge::ConstGeTensorDescPtr input_output_desc;
  170. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  171. input_output_desc = op_desc->GetOutputDescPtr(kVirtualNodeDataIndex);
  172. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  173. input_output_desc = op_desc->GetInputDescPtr(kVirtualNodeDataIndex);
  174. } else {
  175. GELOGE(FAILED, "Invalid parameter memory reuse model, which is: %d.", mem_reuse_model);
  176. return FAILED;
  177. }
  178. GE_CHECK_NOTNULL(input_output_desc);
  179. if (i == 0) {
  180. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  181. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label);
  182. max_shape_dims = input_output_desc->GetShape().GetDims();
  183. } else {
  184. vector<int64_t> current_shape_dims = input_output_desc->GetShape().GetDims();
  185. if (current_shape_dims.size() != max_shape_dims.size()) {
  186. GELOGE(FAILED, "The shape size of several nodes between multiple batches does not match.");
  187. return FAILED;
  188. }
  189. for (size_t j = 0; j < current_shape_dims.size(); ++j) {
  190. if (current_shape_dims[j] == max_shape_dims[j]) {
  191. continue;
  192. }
  193. if (max_batch_dim_find && max_batch_dim != j) {
  194. GELOGE(FAILED, "The shape of several nodes between multiple batches does not match.");
  195. return FAILED;
  196. }
  197. max_batch_dim_find = true;
  198. max_batch_dim = j;
  199. if (current_shape_dims[j] > max_shape_dims[j]) {
  200. max_shape_dims[j] = current_shape_dims[j];
  201. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  202. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label);
  203. }
  204. // Only compare the first different dim in shape.
  205. break;
  206. }
  207. }
  208. }
  209. // In every element of virtual_input_nodes_map, the label of the max batch node is the same.
  210. break;
  211. }
  212. return SUCCESS;
  213. }
  214. Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, size_t &mem_offset) {
  215. if (memory_offset_.empty()) {
  216. GELOGE(FAILED, "memory_offset_ is empty.");
  217. return ge::FAILED;
  218. }
  219. GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph), "ReAssignContinuousMemory Failed!");
  220. GE_CHK_STATUS_RET(ReAssignReuseAndNoPaddingContinuousInputMemory(),
  221. "ReAssignReuseAndNoPaddingContinuousInputMemory Failed!");
  222. GE_CHK_STATUS_RET(ReAssignReuseAndNoPaddingContinuousOutputMemory(),
  223. "ReAssignReuseAndNoPaddingContinuousOutputMemory Failed!");
  224. GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph), "ReAssignAtomicMemory Failed!");
  225. mem_offset = memory_offset_[0].mem_offset_;
  226. auto session_id = compute_graph_->GetSessionID();
  227. if (mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) {
  228. GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", mem_offset,
  229. VarManager::Instance(session_id)->GetGraphMemoryMaxSize());
  230. ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"size", "item", "maxsize"}, {std::to_string(mem_offset),
  231. "featuremap", std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())});
  232. return ge::FAILED;
  233. }
  234. return SUCCESS;
  235. }
  236. Status GraphMemoryAssigner::AssignZeroCopyMemory(size_t &mem_offset, size_t &zero_mem_copy_size) {
  237. BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
  238. GE_IF_BOOL_EXEC(priority_assigner == nullptr, GELOGE(FAILED, "Get priority_assigner failed."); return ge::FAILED;);
  239. size_t mem_offset_tmp = mem_offset;
  240. // set offset for zero copy block
  241. for (auto &memory_block : priority_assigner->GetMemoryBlocks()) {
  242. if (memory_block == nullptr || memory_block->deleted_block_ || !memory_block->is_zero_copy_) {
  243. continue;
  244. }
  245. memory_block->Resize();
  246. memory_block->SetHeadOffset(mem_offset);
  247. mem_offset += memory_block->Size();
  248. memory_block->SetTailOffset(mem_offset - 1);
  249. }
  250. GELOGI("mem_offset_ include zero_copy_memory is %zu.", mem_offset);
  251. // set offset for zero copy nodes
  252. priority_assigner->SetOpMemOffset(true);
  253. zero_mem_copy_size = mem_offset - mem_offset_tmp;
  254. memory_offset_[0].mem_offset_ = mem_offset;
  255. GELOGI("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset, mem_offset_tmp, zero_mem_copy_size);
  256. return SUCCESS;
  257. }
  258. Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
  259. GELOGI("Begin to reassign continuous memory");
  260. Status ret;
  261. for (auto &node : compute_graph_->GetAllNodes()) {
  262. // Get the continuous input type of the node, default is false
  263. bool is_input_continuous = false;
  264. GE_CHECK_NOTNULL(node->GetOpDesc());
  265. // If GetBool fail, is_input_continuous is false.
  266. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
  267. // Assign continuous input memory
  268. if (is_input_continuous) {
  269. int64_t mem_clean_start = 0;
  270. int64_t mem_clean_size = 0;
  271. ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size);
  272. if (ret != ge::SUCCESS) {
  273. GELOGE(ret, "Assign continuous input memory failed!");
  274. return ret;
  275. }
  276. // Clean up atomic address, eg, hcom node
  277. vector<int32_t> input_indexes;
  278. // If GetListInt fail, input_indexes is empty.
  279. (void)ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes);
  280. if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) {
  281. // check whether there is an atomic conflict between the current node and the peer out node
  282. if (!CheckInputIsSupportAtomic(node)) {
  283. GELOGE(ge::FAILED,
  284. "There is an atomic conflict between the current node and the peer out node, not supported!");
  285. return ge::FAILED;
  286. }
  287. const auto &in_control_anchor = node->GetInControlAnchor();
  288. GE_CHECK_NOTNULL(in_control_anchor);
  289. for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  290. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  291. if (peer_out_node->GetType() == ATOMICADDRCLEAN) {
  292. ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size});
  293. if (ret != SUCCESS) {
  294. GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str());
  295. return ret;
  296. }
  297. }
  298. }
  299. }
  300. }
  301. // Get the reference type of the node, default is false
  302. bool is_ref = false;
  303. // If GetBool fail, is_ref is false.
  304. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  305. // Get the continuous output type of the node, default is false
  306. bool is_output_continuous = false;
  307. // If GetBool fail, is_output_continuous is false.
  308. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous);
  309. // If the output is ref type and refers to the ref of an input, the name of the output
  310. // and the input are the same. Ge encounters ref type, finds matching relationship according
  311. // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast
  312. if (!is_ref && is_output_continuous) { // Assign continuous output memory
  313. ret = AssignContinuousOutputMemory(node);
  314. if (ret != ge::SUCCESS) {
  315. GELOGE(ret, "Assign reference memory failed!");
  316. return ret;
  317. }
  318. }
  319. }
  320. GELOGI("After reassign continuous memory, memoffset = %zu.", memory_offset_[0].mem_offset_);
  321. return ge::SUCCESS;
  322. }
  323. Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
  324. int64_t &continuous_mem_size) {
  325. GELOGI("Current node %s needs continuous input.", node->GetName().c_str());
  326. continuous_mem_start = memory_offset_[0].mem_offset_;
  327. bool continuous_input_alloc = false;
  328. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc);
  329. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  330. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  331. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
  332. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  333. GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
  334. bool is_peer_output_continuous = false;
  335. // If GetBool fail, is_peer_output_continuous is false.
  336. (void)ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous);
  337. // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and
  338. // continuous output of the previous node is the same, we can support it. If size != 1, there may be
  339. // conflict between the two, we can not support it.
  340. auto peer_output_size = peer_op_desc->GetOutputsSize();
  341. GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1),
  342. GELOGE(PARAM_INVALID,
  343. "Current node %s requires continuous input, while the previous node %s requires "
  344. "continuous output. There may be conflict between the two. This node is not supported now.",
  345. node->GetOpDesc()->GetName().c_str(), peer_op_desc->GetName().c_str());
  346. return PARAM_INVALID;);
  347. bool is_peer_reference = false;
  348. // If GetBool fail, is_peer_reference is false.
  349. (void)AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference);
  350. GE_IF_BOOL_EXEC(is_peer_reference,
  351. GELOGE(PARAM_INVALID,
  352. "Current node %s requires continuous input, while the previous node %s requires "
  353. "reference. There may be conflict between the two. This node is not supported now.",
  354. node->GetOpDesc()->GetName().c_str(), peer_op_desc->GetName().c_str());
  355. return PARAM_INVALID;);
  356. vector<int64_t> output_list = peer_op_desc->GetOutputOffset();
  357. std::vector<int64_t> offsets_for_fusion = {};
  358. bool has_offset_attr =
  359. AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion);
  360. if (peer_out_data_anchor->GetIdx() < static_cast<int>(output_list.size())) {
  361. if (continuous_input_alloc && !has_offset_attr) {
  362. if (in_data_anchor->GetIdx() == 0) {
  363. continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx());
  364. }
  365. // can not use else if, incase only one input
  366. if (in_data_anchor->GetIdx() == static_cast<int>(node->GetAllInDataAnchors().size()) - 1) {
  367. int64_t tensor_desc_size = 0;
  368. Status ret = ge::TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())),
  369. tensor_desc_size);
  370. GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;);
  371. tensor_desc_size = (tensor_desc_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  372. continuous_mem_size =
  373. output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE;
  374. }
  375. GELOGI(
  376. "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] "
  377. "real_size[%u].",
  378. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(),
  379. peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(),
  380. 0, 0);
  381. continue;
  382. }
  383. output_list.at(peer_out_data_anchor->GetIdx()) = memory_offset_[0].mem_offset_;
  384. } else {
  385. GELOGE(FAILED, "index : %d is out of range.", peer_out_data_anchor->GetIdx());
  386. return FAILED;
  387. }
  388. peer_op_desc->SetOutputOffset(output_list);
  389. size_t pre_mem_offset = memory_offset_[0].mem_offset_;
  390. int64_t tensor_desc_size = 0;
  391. if (has_offset_attr) {
  392. if (peer_out_data_anchor->GetIdx() < static_cast<int>(offsets_for_fusion.size())) {
  393. auto offset_for_fusion = offsets_for_fusion[peer_out_data_anchor->GetIdx()];
  394. memory_offset_[0].mem_offset_ += offset_for_fusion;
  395. } else {
  396. GELOGE(FAILED, "fusion: peer node %s index : %d is out of range.", peer_op_desc->GetName().c_str(),
  397. peer_out_data_anchor->GetIdx());
  398. return FAILED;
  399. }
  400. } else {
  401. Status ret =
  402. TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size);
  403. GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;);
  404. memory_offset_[0].mem_offset_ += tensor_desc_size;
  405. }
  406. // If set tensor_actual_size, Memory alignment is not required.
  407. int32_t is_tensor_actual_size = 0;
  408. ge::AttrUtils::GetInt(peer_op_desc, ATTR_NAME_GET_TENSOR_ACTUAL_SIZE, is_tensor_actual_size);
  409. if (is_tensor_actual_size == 0) {
  410. AlignMemOffset(MEM_ALIGN_SIZE);
  411. }
  412. GELOGI(
  413. "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] "
  414. "real_size[%ld].",
  415. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx(),
  416. pre_mem_offset, peer_op_desc->GetStreamId(), (memory_offset_[0].mem_offset_ - pre_mem_offset), tensor_desc_size);
  417. }
  418. memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE;
  419. if (!continuous_input_alloc) {
  420. continuous_mem_size = memory_offset_[0].mem_offset_ - continuous_mem_start;
  421. }
  422. return SUCCESS;
  423. }
  424. Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node) {
  425. GELOGI("Current node %s needs continuous output.", node->GetName().c_str());
  426. auto out_op_desc = node->GetOpDesc();
  427. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  428. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  429. if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) {
  430. GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
  431. out_op_desc->GetOutputsSize(), output_list.size());
  432. return ge::FAILED;
  433. }
  434. size_t mem_offset = output_list[0];
  435. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  436. output_list[out_data_anchor->GetIdx()] = mem_offset;
  437. int64_t tensor_desc_size = 0;
  438. if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) !=
  439. ge::SUCCESS) {
  440. GELOGE(FAILED, "GetSize failed.");
  441. return FAILED;
  442. }
  443. mem_offset += tensor_desc_size;
  444. if (mem_offset <= 0) {
  445. return FAILED;
  446. }
  447. mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  448. GELOGI(
  449. "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] "
  450. "real_size[%ld].",
  451. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
  452. output_list[out_data_anchor->GetIdx()] , out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size);
  453. }
  454. out_op_desc->SetOutputOffset(output_list);
  455. return ge::SUCCESS;
  456. }
  457. Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse) {
  458. OpDescPtr op_desc = node->GetOpDesc();
  459. vector<int64_t> output_list = op_desc->GetOutputOffset();
  460. if (output_list.empty()) {
  461. GELOGE(FAILED, "Outputoffset is empty node name:%s", node->GetName().c_str());
  462. return FAILED;
  463. }
  464. output_list.at(0) = mem_offset_reuse;
  465. op_desc->SetOutputOffset(output_list);
  466. GELOGI("Set virtual input node %s output offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse);
  467. int64_t attr_dim_index;
  468. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  469. if (!get_attr_dim_flag) {
  470. GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed.");
  471. return FAILED;
  472. }
  473. size_t extra_memory_size = 0;
  474. for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
  475. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  476. GE_CHECK_NOTNULL(peer_out_data_anchor);
  477. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  478. GE_CHECK_NOTNULL(peer_op_desc);
  479. vector<int64_t> output_offsets = peer_op_desc->GetOutputOffset();
  480. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_offsets.size())) {
  481. GELOGE(ge::FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx());
  482. return ge::FAILED;
  483. }
  484. output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse;
  485. peer_op_desc->SetOutputOffset(output_offsets);
  486. size_t pre_mem_offset = mem_offset_reuse;
  487. // Calculate tensor real size of each piece of data and out size of complete data
  488. ge::ConstGeTensorDescPtr output_desc = peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx());
  489. GE_CHECK_NOTNULL(output_desc);
  490. int64_t output_mem_size;
  491. int64_t batch_dim_num = 1;
  492. int64_t out_size;
  493. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) !=
  494. SUCCESS) {
  495. GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].",
  496. peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx());
  497. return FAILED;
  498. }
  499. mem_offset_reuse += output_mem_size;
  500. extra_memory_size = extra_memory_size + out_size - output_mem_size;
  501. GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] "
  502. "real_size[%ld].",
  503. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(),
  504. peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), out_size,
  505. output_mem_size);
  506. }
  507. mem_offset_reuse += extra_memory_size;
  508. size_t after_mem_offset = mem_offset_reuse;
  509. GELOGI("After reassign virtual input node[name: %s, type: %s] memory, memory offset = %zu.",
  510. op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset);
  511. return SUCCESS;
  512. }
  513. Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() {
  514. map<string, vector<NodePtr>> mem_reuse_virtual_input_nodes_map;
  515. for (const auto &n : compute_graph_->GetAllNodes()) {
  516. OpDescPtr op_desc = n->GetOpDesc();
  517. GE_CHECK_NOTNULL(op_desc);
  518. bool attr_continuous = false;
  519. bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, attr_continuous);
  520. GE_IF_BOOL_EXEC(!get_continuous_flag, continue);
  521. bool attr_reuse = false;
  522. bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  523. GE_IF_BOOL_EXEC(!get_reuse_flag, continue);
  524. if (attr_reuse && attr_continuous) {
  525. if (op_desc->GetOutputsSize() != kVirtualInputNodeOutputSize) {
  526. // When current virtual node has several outputs, can't directly determine which input is the tensor for reuse.
  527. GELOGE(FAILED, "Only one output is supported, current virtual node %s has %zu inputs.", n->GetName().c_str(),
  528. op_desc->GetOutputsSize());
  529. return FAILED;
  530. }
  531. GELOGD("Start to reassign memory for virtual input node, memory offset = %zu.", memory_offset_[0].mem_offset_);
  532. string batch_label_string;
  533. // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter
  534. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  535. if (batch_label_string.empty()) {
  536. size_t node_mem_offset = memory_offset_[0].mem_offset_;
  537. // No ATTR_NAME_BATCH_LABEL, no need to reuse memory.
  538. Status status = ReAssignVirtualInputNodeMemory(n, node_mem_offset);
  539. if (status != SUCCESS) {
  540. GELOGE(FAILED, "Reassign memory of virtual input node failed, node name: %s.", n->GetName().c_str());
  541. return FAILED;
  542. }
  543. memory_offset_[0].mem_offset_ = node_mem_offset;
  544. AlignMemOffset(MEM_ALIGN_SIZE);
  545. GELOGD("After reassign memory for virtual input node, align memory = %zu.", memory_offset_[0].mem_offset_);
  546. } else {
  547. // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory.
  548. string current_node_full_name = op_desc->GetName();
  549. size_t pos = current_node_full_name.find(kMbatchNodeNameFlag);
  550. if (pos == string::npos) {
  551. GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual input node, node name: %s.",
  552. kMbatchNodeNameFlag, n->GetName().c_str());
  553. return FAILED;
  554. }
  555. string fixed_name = current_node_full_name.substr(0, pos);
  556. vector<NodePtr> parallel_virtual_input_nodes;
  557. if(mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) {
  558. parallel_virtual_input_nodes = mem_reuse_virtual_input_nodes_map[fixed_name];
  559. }
  560. parallel_virtual_input_nodes.emplace_back(n);
  561. mem_reuse_virtual_input_nodes_map[fixed_name] = parallel_virtual_input_nodes;
  562. }
  563. }
  564. }
  565. int32_t mem_reuse_model = 0;
  566. if (ReAssignVirtualNodesMemory(mem_reuse_virtual_input_nodes_map, mem_reuse_model) != SUCCESS) {
  567. GELOGE(FAILED, "Reassign memory of virtual input nodes failed.");
  568. return FAILED;
  569. }
  570. return SUCCESS;
  571. }
  572. Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse) {
  573. OpDescPtr op_desc = node->GetOpDesc();
  574. // 1. set memory of to be reused input tensor
  575. auto in_data_anchor_list = node->GetAllInDataAnchors();
  576. auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor();
  577. GE_CHECK_NOTNULL(peer_out_data_anchor);
  578. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  579. GE_CHECK_NOTNULL(peer_op_desc);
  580. vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset();
  581. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) {
  582. GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx());
  583. return FAILED;
  584. }
  585. in_node_output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse;
  586. peer_op_desc->SetOutputOffset(in_node_output_offsets);
  587. GELOGI("Set virtual output node %s input data offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse);
  588. // 2. set memory of output tensor
  589. vector<int64_t> output_list = op_desc->GetOutputOffset();
  590. if (output_list.empty()) {
  591. GELOGE(FAILED, "Outputoffset is empty, node name: %s", node->GetName().c_str());
  592. return FAILED;
  593. }
  594. if (op_desc->GetOutputsSize() > output_list.size()) {
  595. GELOGE(FAILED, "The size %zu of op_desc is more than output_list's size %zu.", op_desc->GetOutputsSize(),
  596. output_list.size());
  597. return FAILED;
  598. }
  599. int64_t attr_dim_index;
  600. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  601. if (!get_attr_dim_flag) {
  602. GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed.");
  603. return FAILED;
  604. }
  605. size_t extra_memory_size = 0;
  606. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  607. output_list[out_data_anchor->GetIdx()] = mem_offset_reuse;
  608. size_t pre_mem_offset = mem_offset_reuse;
  609. // calculate tensor real size of each piece of data and out size of complete data
  610. ge::ConstGeTensorDescPtr output_desc = op_desc->GetOutputDescPtr(out_data_anchor->GetIdx());
  611. GE_CHECK_NOTNULL(output_desc);
  612. int64_t output_mem_size;
  613. int64_t batch_dim_num = 1;
  614. int64_t out_size;
  615. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) !=
  616. SUCCESS) {
  617. GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].",
  618. op_desc->GetName().c_str(), out_data_anchor->GetIdx());
  619. return FAILED;
  620. }
  621. mem_offset_reuse += output_mem_size;
  622. extra_memory_size = extra_memory_size + out_size - output_mem_size;
  623. GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu], size[%ld], real_size[%ld].",
  624. node->GetOwnerComputeGraph()->GetName().c_str(), op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
  625. pre_mem_offset, out_size, output_mem_size);
  626. }
  627. op_desc->SetOutputOffset(output_list);
  628. mem_offset_reuse += extra_memory_size;
  629. size_t after_mem_offset = mem_offset_reuse;
  630. GELOGI("After reassign virtual output node[name: %s, type: %s] memory, memory offset = %zu.",
  631. op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset);
  632. return SUCCESS;
  633. }
  634. Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() {
  635. map<string, vector<NodePtr>> mem_reuse_virtual_output_nodes_map;
  636. for (const auto &n : compute_graph_->GetAllNodes()) {
  637. OpDescPtr op_desc = n->GetOpDesc();
  638. GE_CHECK_NOTNULL(op_desc);
  639. bool attr_continuous = false;
  640. bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, attr_continuous);
  641. GE_IF_BOOL_EXEC(!get_continuous_flag, continue);
  642. bool attr_reuse = false;
  643. bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  644. GE_IF_BOOL_EXEC(!get_reuse_flag, continue);
  645. if (attr_reuse && attr_continuous) {
  646. auto in_data_anchor_list = n->GetAllInDataAnchors();
  647. if (in_data_anchor_list.size() != kVirtualOutputNodeInputSize) {
  648. // When current virtual node has several inputs, can't directly determine which input is the tensor for reuse.
  649. GELOGE(FAILED, "Only one input is supported, current virtual node %s has %zu inputs.", n->GetName().c_str(),
  650. in_data_anchor_list.size());
  651. return FAILED;
  652. }
  653. GELOGD("Start to reassign memory for virtual output node, memory offset = %zu.", memory_offset_[0].mem_offset_);
  654. string batch_label_string;
  655. // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter
  656. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  657. if (batch_label_string.empty()) {
  658. size_t node_mem_offset = memory_offset_[0].mem_offset_;
  659. // No ATTR_NAME_BATCH_LABEL, no need to reuse memory.
  660. Status status = ReAssignVirtualOutputNodeMemory(n, node_mem_offset);
  661. if (status != SUCCESS) {
  662. GELOGE(FAILED, "Reassign memory of virtual output node failed, node name: %s.", n->GetName().c_str());
  663. return FAILED;
  664. }
  665. memory_offset_[0].mem_offset_ = node_mem_offset;
  666. AlignMemOffset(MEM_ALIGN_SIZE);
  667. GELOGD("After reassign memory for virtual output node, align memory = %zu.", memory_offset_[0].mem_offset_);
  668. } else {
  669. // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory.
  670. string current_node_full_name = op_desc->GetName();
  671. size_t pos = current_node_full_name.find(kMbatchNodeNameFlag);
  672. if (pos == string::npos) {
  673. GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual output node, node name: %s.",
  674. kMbatchNodeNameFlag, n->GetName().c_str());
  675. return FAILED;
  676. }
  677. string fixed_name = current_node_full_name.substr(0, pos);
  678. vector<NodePtr> parallel_virtual_output_nodes;
  679. if(mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) {
  680. parallel_virtual_output_nodes = mem_reuse_virtual_output_nodes_map[fixed_name];
  681. }
  682. parallel_virtual_output_nodes.emplace_back(n);
  683. mem_reuse_virtual_output_nodes_map[fixed_name] = parallel_virtual_output_nodes;
  684. }
  685. }
  686. }
  687. int32_t mem_reuse_model = 1;
  688. if (ReAssignVirtualNodesMemory(mem_reuse_virtual_output_nodes_map, mem_reuse_model) != SUCCESS) {
  689. GELOGE(FAILED, "Reassign memory of virtual output nodes failed.");
  690. return FAILED;
  691. }
  692. return SUCCESS;
  693. }
  694. Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePtr>> &mem_reuse_nodes_map,
  695. int32_t mem_reuse_model) {
  696. // Find max batch label value
  697. string max_batch_label;
  698. if (GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label) != SUCCESS) {
  699. GELOGE(FAILED, "Get max batch label failed.");
  700. return FAILED;
  701. }
  702. GELOGI("The batch label of max batch virtual nodes is %s.", max_batch_label.c_str());
  703. // Assign memory of max batch nodes that have the same batch label.
  704. GELOGD("Start to reassign memory for max batch virtual nodes, memory offset = %zu.", memory_offset_[0].mem_offset_);
  705. vector<size_t> nodes_mem_offset_list;
  706. for (auto &i_map : mem_reuse_nodes_map) {
  707. size_t max_batch_node_mem_offset = memory_offset_[0].mem_offset_;
  708. nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset);
  709. vector<NodePtr> virtual_nodes_list = i_map.second;
  710. for (auto &i_node : virtual_nodes_list) {
  711. // Op_desc is not nullptr, it has been checked.
  712. OpDescPtr op_desc = i_node->GetOpDesc();
  713. string batch_label_string;
  714. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  715. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  716. if (batch_label_string == max_batch_label) {
  717. Status status = SUCCESS;
  718. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  719. status = ReAssignVirtualInputNodeMemory(i_node, max_batch_node_mem_offset);
  720. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  721. status = ReAssignVirtualOutputNodeMemory(i_node, max_batch_node_mem_offset);
  722. } else {
  723. GELOGE(FAILED, "Invalid parameter memory reuse model, which is: %d.", mem_reuse_model);
  724. return FAILED;
  725. }
  726. if (status != SUCCESS) {
  727. GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str());
  728. return FAILED;
  729. }
  730. memory_offset_[0].mem_offset_ = max_batch_node_mem_offset;
  731. AlignMemOffset(MEM_ALIGN_SIZE);
  732. GELOGD("After reassign memory for virtual node, align memory = %zu.", memory_offset_[0].mem_offset_);
  733. // Only assign memory of max batch nodes.
  734. break;
  735. }
  736. }
  737. }
  738. // Assign memory of remaining nodes that have the same fixed_name.
  739. GELOGD("Start to reassign memory for remaining batch virtual nodes, memory offset = %zu.",
  740. memory_offset_[0].mem_offset_);
  741. size_t memory_reuse_index = 0;
  742. for (auto &i_map : mem_reuse_nodes_map) {
  743. vector<NodePtr> virtual_nodes_list = i_map.second;
  744. for (auto &i_node : virtual_nodes_list) {
  745. size_t remaining_batch_node_mem_offset = nodes_mem_offset_list[memory_reuse_index];
  746. Status status = SUCCESS;
  747. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  748. status = ReAssignVirtualInputNodeMemory(i_node, remaining_batch_node_mem_offset);
  749. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  750. status = ReAssignVirtualOutputNodeMemory(i_node, remaining_batch_node_mem_offset);
  751. } else {
  752. GELOGE(FAILED, "Invalid parameter memory reuse model, which is: %d.", mem_reuse_model);
  753. return FAILED;
  754. }
  755. if (status != SUCCESS) {
  756. GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str());
  757. return FAILED;
  758. }
  759. }
  760. memory_reuse_index++;
  761. }
  762. return SUCCESS;
  763. }
  764. Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
  765. map<NodePtr, vector<NodePtr>> normal_atomic_and_clean_nodes_map;
  766. vector<NodePtr> connecting_output_atomic_nodes;
  767. Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes);
  768. if (status != SUCCESS) {
  769. GELOGE(status, "Failed to filter atomic nodes for memory assignment.");
  770. return status;
  771. }
  772. for (auto &iter : normal_atomic_and_clean_nodes_map) {
  773. int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
  774. GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start);
  775. for (auto &atomic_node : iter.second) {
  776. vector<int64_t> mem_offset_end;
  777. status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end);
  778. if (status != SUCCESS) {
  779. GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.",
  780. atomic_node->GetName().c_str());
  781. return status;
  782. }
  783. }
  784. int64_t atomic_mem_size = static_cast<int64_t>(memory_offset_[0].mem_offset_) - atomic_mem_start;
  785. status = SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size});
  786. if (status != SUCCESS) {
  787. GELOGE(status, "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str());
  788. return status;
  789. }
  790. }
  791. if (AssignConnectNetOutputAtomicMemory(connecting_output_atomic_nodes) != SUCCESS) {
  792. GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput.");
  793. return FAILED;
  794. }
  795. return SUCCESS;
  796. }
  797. Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(map<NodePtr, vector<NodePtr>> &normal_atomic_nodes_map,
  798. vector<NodePtr> &connecting_output_atomic_nodes) {
  799. GE_CHECK_NOTNULL(compute_graph_);
  800. for (const auto &node : compute_graph_->GetAllNodes()) {
  801. if (node->GetType() == ATOMICADDRCLEAN) {
  802. vector<NodePtr> tmp_normal_atomic_nodes;
  803. const auto &out_control_anchor = node->GetOutControlAnchor();
  804. GE_CHECK_NOTNULL(out_control_anchor);
  805. for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
  806. if (peer_in_control_anchor != nullptr) {
  807. auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
  808. auto peer_in_node_desc = peer_in_node->GetOpDesc();
  809. if (peer_in_node_desc != nullptr) {
  810. bool is_atomic_node = false;
  811. // If GetBool fail, is_atomic_node is false.
  812. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
  813. if (is_atomic_node) {
  814. bool is_reference = false;
  815. // If GetBool fail, is_reference is false.
  816. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference);
  817. if (is_reference) {
  818. GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and is_reference attribute.",
  819. peer_in_node_desc->GetName().c_str());
  820. return ge::PARAM_INVALID;
  821. }
  822. vector<int> is_connecting_output;
  823. // If GetBool fail, attr is_connecting_output is an empty vector.
  824. (void) ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output);
  825. if (is_connecting_output.empty()) {
  826. tmp_normal_atomic_nodes.emplace_back(peer_in_node);
  827. continue;
  828. }
  829. connecting_output_atomic_nodes.emplace_back(peer_in_node);
  830. tmp_normal_atomic_nodes.clear();
  831. break;
  832. }
  833. }
  834. }
  835. }
  836. if (!tmp_normal_atomic_nodes.empty()) {
  837. normal_atomic_nodes_map[node] = tmp_normal_atomic_nodes;
  838. }
  839. }
  840. }
  841. return SUCCESS;
  842. }
  843. Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
  844. vector<int64_t> &mem_offset_end) {
  845. auto node_op_desc = node->GetOpDesc();
  846. // Assign atomic node output memory
  847. Status ret = AssignAtomicOutputMemory(node, mem_offset_end);
  848. if (ret != SUCCESS) {
  849. GELOGE(ret, "Failed to assign atomic output memory, node is %s.", node_op_desc->GetName().c_str());
  850. return ret;
  851. }
  852. // Check and assign atomic node workspace memory
  853. map<string, map<int64_t, int64_t>> atomic_workspace_info;
  854. atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info);
  855. if (!atomic_workspace_info.empty()) {
  856. bool is_fusion_node = false;
  857. // If GetBool fail, is_fusion_node is false.
  858. (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);
  859. if (is_fusion_node) {
  860. // Assign fusion atomic node workspace memory
  861. ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  862. } else {
  863. // Assign single ordinary atomic node workspace memory, not include fusion node
  864. ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  865. }
  866. if (ret != SUCCESS) {
  867. GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str());
  868. return ret;
  869. }
  870. } else {
  871. GELOGW("Current atomic node %s does not have attr ATOMIC_WORKSPACE_INFO.", node->GetName().c_str());
  872. }
  873. return SUCCESS;
  874. }
  875. Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) {
  876. for (auto &node : connect_netoutput_nodes) {
  877. GE_CHECK_NOTNULL(node);
  878. if (node->GetOpDesc() == nullptr) {
  879. GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str());
  880. continue;
  881. }
  882. // Atomic memory start addr
  883. int64_t original_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
  884. GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.",
  885. node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start);
  886. vector<int64_t> mem_offset_end;
  887. if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
  888. GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str());
  889. return FAILED;
  890. }
  891. // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately.
  892. if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end) != SUCCESS) {
  893. GELOGE(FAILED, "Failed to set atomic attr separately.");
  894. return FAILED;
  895. }
  896. }
  897. return SUCCESS;
  898. }
  899. Status GraphMemoryAssigner::AssignReferenceMemory() {
  900. for (auto &node : compute_graph_->GetDirectNode()) {
  901. // Get the reference type of the node, default is false
  902. bool is_ref = false;
  903. // If GetBool fail, is_ref is false.
  904. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  905. if (!is_ref) {
  906. continue;
  907. }
  908. GELOGI("Current node %s needs to support the reference relationship between output and input.",
  909. node->GetName().c_str());
  910. auto out_op_desc = node->GetOpDesc();
  911. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  912. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  913. if (out_op_desc->GetOutputsSize() > output_list.size()) {
  914. GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
  915. out_op_desc->GetOutputsSize(), output_list.size());
  916. return ge::FAILED;
  917. }
  918. map<string, int> input_name_index;
  919. for (const auto &input_name : out_op_desc->GetAllInputNames()) {
  920. int index = out_op_desc->GetInputIndexByName(input_name);
  921. input_name_index.emplace(input_name, index);
  922. }
  923. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  924. string out_data_anchor_name = out_op_desc->GetOutputNameByIndex(out_data_anchor->GetIdx());
  925. auto iter = input_name_index.find(out_data_anchor_name);
  926. if (iter != input_name_index.end()) {
  927. int index = iter->second;
  928. GELOGI("Reference memory: input anchor index = %d, input anchor name = %s, output anchor name = %s.", index,
  929. iter->first.c_str(), out_data_anchor_name.c_str());
  930. GE_CHECK_NOTNULL(node->GetInDataAnchor(index));
  931. auto peer_out_anchor = node->GetInDataAnchor(index)->GetPeerOutAnchor();
  932. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  933. int peer_out_anchor_index = peer_out_anchor->GetIdx();
  934. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  935. auto peer_out_op_desc = peer_out_node->GetOpDesc();
  936. GE_CHECK_NOTNULL(peer_out_op_desc);
  937. output_list[out_data_anchor->GetIdx()] = peer_out_op_desc->GetOutputOffset()[peer_out_anchor_index];
  938. GELOGI("Reference output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld]",
  939. node->GetOwnerComputeGraph()->GetName().c_str(), peer_out_op_desc->GetName().c_str(),
  940. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], peer_out_op_desc->GetStreamId());
  941. } else {
  942. GELOGI("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]",
  943. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(),
  944. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId());
  945. }
  946. }
  947. out_op_desc->SetOutputOffset(output_list);
  948. }
  949. return ge::SUCCESS;
  950. }
  951. bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
  952. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  953. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  954. if (peer_out_data_anchor == nullptr) {
  955. continue;
  956. }
  957. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  958. if (peer_op_desc == nullptr) {
  959. continue;
  960. }
  961. if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) ||
  962. (peer_op_desc->GetType() == VARIABLE)) {
  963. GELOGE(ge::FAILED,
  964. "The current node is %s, and the peer out node is %s. Currently, this scenario is not supported",
  965. node->GetName().c_str(), peer_op_desc->GetName().c_str());
  966. return false;
  967. }
  968. }
  969. return true;
  970. }
  971. Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) {
  972. auto op_desc = node->GetOpDesc();
  973. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED);
  974. mem_offset_end.clear();
  975. GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());
  976. vector<int64_t> atomic_output_index;
  977. // If GetListInt fail, atomic_output_index is empty.
  978. (void)ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
  979. // Check atomic output
  980. vector<int64_t> output_list = op_desc->GetOutputOffset();
  981. if (atomic_output_index.size() > output_list.size()) {
  982. GELOGE(ge::FAILED, "The size of atomic_output_index is more than the size of output_list");
  983. return ge::FAILED;
  984. }
  985. auto output_list_size = static_cast<int64_t>(output_list.size());
  986. for (auto &output_index : atomic_output_index) {
  987. if (output_index >= output_list_size) {
  988. GELOGE(ge::PARAM_INVALID, "The output index %ld is more than the size %ld of output_list.", output_index,
  989. output_list_size);
  990. return ge::PARAM_INVALID;
  991. }
  992. // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here
  993. bool is_assigned_mem = false;
  994. if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) {
  995. GELOGE(ge::FAILED, "Failed to get memory assignment of node %s.", node->GetName().c_str());
  996. return ge::FAILED;
  997. }
  998. // If you have already assigned an atomic address, skip it, and you don't need to reassign it.
  999. if (is_assigned_mem) {
  1000. GELOGI(
  1001. "Node %s atomic output : we have assigned atomic memory as the input of next node in "
  1002. "ReAssignContinuousMemory function.",
  1003. op_desc->GetName().c_str());
  1004. continue;
  1005. }
  1006. auto output_desc = op_desc->GetAllOutputsDescPtr().at(output_index);
  1007. int64_t size = 0;
  1008. if (ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS) {
  1009. GELOGI("Get size failed");
  1010. }
  1011. output_list[output_index] = memory_offset_[0].mem_offset_;
  1012. GELOGI("[IMAS]Atomic output : Set %s name[%s] output[%ld] offset to [%zu] stream_id[%ld] size[%ld] real_size[%ld].",
  1013. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), output_index, memory_offset_[0].mem_offset_,
  1014. op_desc->GetStreamId(), size, size);
  1015. memory_offset_[0].mem_offset_ += size;
  1016. AlignMemOffset(MEM_ALIGN_SIZE);
  1017. mem_offset_end.emplace_back(memory_offset_[0].mem_offset_);
  1018. }
  1019. op_desc->SetOutputOffset(output_list);
  1020. return ge::SUCCESS;
  1021. }
  1022. Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index,
  1023. bool &is_mem_assigned) {
  1024. if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) {
  1025. GELOGE(ge::PARAM_INVALID, "Output index %ld is more than the size of node's AllOutDataAnchors.", output_index);
  1026. return ge::PARAM_INVALID;
  1027. }
  1028. auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
  1029. GE_CHECK_NOTNULL(out_data_anchor);
  1030. auto input_anchors = out_data_anchor->GetPeerInDataAnchors();
  1031. for (auto &input_anchor : input_anchors) {
  1032. auto output_node = input_anchor->GetOwnerNode();
  1033. /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address
  1034. /// has been assigned
  1035. vector<int64_t> atomic_input_index;
  1036. (void)ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index);
  1037. if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) {
  1038. is_mem_assigned = true;
  1039. break;
  1040. }
  1041. }
  1042. return SUCCESS;
  1043. }
  1044. Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1045. map<string, map<int64_t, int64_t>> &workspace_info,
  1046. vector<int64_t> &mem_offset_end) {
  1047. GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str());
  1048. vector<int64_t> workspace_vector = op_desc->GetWorkspace();
  1049. for (auto iter = workspace_info.begin(); iter != workspace_info.end(); ++iter) {
  1050. if (op_desc->GetName() != iter->first) {
  1051. GELOGE(ge::PARAM_INVALID, "The node name %s and the node name %s in workspace info are inconsistent.",
  1052. op_desc->GetName().c_str(), iter->first.c_str());
  1053. return ge::PARAM_INVALID;
  1054. }
  1055. if (iter->second.empty()) {
  1056. continue;
  1057. }
  1058. for (auto &info_iter : iter->second) {
  1059. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1060. auto workspace_size = info_iter.second;
  1061. if (workspace_index >= workspace_vector.size()) {
  1062. GELOGE(ge::PARAM_INVALID, "The workspace index %lu is more than the size %zu of workspace vector.",
  1063. workspace_index, workspace_vector.size());
  1064. return ge::PARAM_INVALID;
  1065. }
  1066. workspace_vector[workspace_index] = memory_offset_[0].mem_offset_;
  1067. GELOGI(
  1068. "[IMAS]Atomic ordinary workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1069. "size[%ld] real_size[%ld].",
  1070. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, memory_offset_[0].mem_offset_,
  1071. op_desc->GetStreamId(), workspace_size, workspace_size);
  1072. memory_offset_[0].mem_offset_ += workspace_size;
  1073. mem_offset_end.emplace_back(memory_offset_[0].mem_offset_);
  1074. }
  1075. }
  1076. op_desc->SetWorkspace(workspace_vector);
  1077. return SUCCESS;
  1078. }
  1079. Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1080. map<string, map<int64_t, int64_t>> &workspace_info,
  1081. vector<int64_t> &mem_offset_end) {
  1082. GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str());
  1083. map<string, map<int64_t, int64_t>> sub_node_workspace_offset;
  1084. for (auto &iter : workspace_info) {
  1085. if (iter.second.empty()) {
  1086. continue;
  1087. }
  1088. map<int64_t, int64_t> index_offset;
  1089. for (auto &info_iter : iter.second) {
  1090. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1091. auto workspace_size = info_iter.second;
  1092. size_t workspace_offset = memory_offset_[0].mem_offset_;
  1093. GELOGI(
  1094. "[IMAS]Atomic fusion workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] size[%ld] "
  1095. "real_size[%ld].",
  1096. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, memory_offset_[0].mem_offset_,
  1097. op_desc->GetStreamId(), workspace_size, workspace_size);
  1098. memory_offset_[0].mem_offset_ += workspace_size;
  1099. mem_offset_end.emplace_back(memory_offset_[0].mem_offset_);
  1100. index_offset.insert(std::make_pair(workspace_index, workspace_offset));
  1101. }
  1102. sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset));
  1103. }
  1104. if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) {
  1105. GELOGE(FAILED, "Set EXT_ATTR_ATOMIC_WORKSPACE_OFFSET failed, op name:%s.", op_desc->GetName().c_str());
  1106. return FAILED;
  1107. }
  1108. return SUCCESS;
  1109. }
  1110. Status GraphMemoryAssigner::CheckOffset() {
  1111. std::map<std::string, std::string> anchor_to_symbol;
  1112. std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors;
  1113. if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
  1114. GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str());
  1115. return FAILED;
  1116. }
  1117. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1118. GE_CHECK_NOTNULL(node->GetOpDesc());
  1119. vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset();
  1120. for (auto input : input_list) {
  1121. if (input == ge::kInvalidOffset) {
  1122. GELOGE(FAILED, "Invalid offset in node: %s input: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1123. return FAILED;
  1124. }
  1125. }
  1126. bool need_update_output = false;
  1127. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  1128. for (uint32_t i = 0; i < output_list.size(); ++i) {
  1129. if (output_list[i] == ge::kInvalidOffset) {
  1130. GELOGE(FAILED, "Invalid offset in node: %s output: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1131. return FAILED;
  1132. }
  1133. if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) {
  1134. auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i);
  1135. if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) {
  1136. output_list[i] = symbol_offset;
  1137. need_update_output = true;
  1138. }
  1139. }
  1140. }
  1141. if (need_update_output) {
  1142. node->GetOpDesc()->SetOutputOffset(output_list);
  1143. }
  1144. vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace();
  1145. for (auto workspace : workspace_list) {
  1146. if (workspace == ge::kInvalidOffset) {
  1147. GELOGE(FAILED, "Invalid offset in node: %s workspace: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1148. return FAILED;
  1149. }
  1150. }
  1151. }
  1152. return SUCCESS;
  1153. }
  1154. ge::Status GraphMemoryAssigner::SetInputOffset() {
  1155. if (memory_offset_.empty()) {
  1156. GELOGE(FAILED, "memory_offset_ is empty.");
  1157. return FAILED;
  1158. }
  1159. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu]", compute_graph_->GetName().c_str(),
  1160. memory_offset_[0].mem_offset_);
  1161. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1162. if (UpdateOpInputOffset(node) != ge::SUCCESS) {
  1163. GELOGE(ge::FAILED, "Update op input offset failed");
  1164. return ge::FAILED;
  1165. }
  1166. }
  1167. return ge::SUCCESS;
  1168. }
  1169. NodePtr GraphMemoryAssigner::GetKnownInputNode(const NodePtr &node) const {
  1170. if (!node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
  1171. return node;
  1172. }
  1173. if (NodeUtils::IsDynamicShape(node)) {
  1174. return node;
  1175. }
  1176. return NodeUtils::GetParentInput(node);
  1177. }
  1178. ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1179. uint32_t parent_index = 0;
  1180. if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  1181. return SUCCESS;
  1182. }
  1183. // Subgraph Data Node, check for constant input.
  1184. std::string op_type;
  1185. const auto &in_node = NodeUtils::GetParentInput(node);
  1186. if (NodeUtils::GetConstOpType(in_node, op_type)) {
  1187. input_list = in_node->GetOpDesc()->GetOutputOffset();
  1188. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output.
  1189. return SUCCESS; // Constant input.
  1190. }
  1191. // Memory allocated for dynamic shape subgraph Data.
  1192. if (NodeUtils::IsDynamicShape(node)) {
  1193. return SUCCESS;
  1194. }
  1195. const auto &owner = node->GetOwnerComputeGraph();
  1196. const auto &parent_desc = owner->GetParentNode()->GetOpDesc();
  1197. const auto parent_inputs = parent_desc->GetInputOffset();
  1198. if (parent_inputs.size() <= parent_index) {
  1199. GELOGE(FAILED, "Get Parent input offset failed, node: %s, input size: %zu, parent index: %u",
  1200. node->GetName().c_str(), parent_inputs.size(), parent_index);
  1201. return FAILED;
  1202. }
  1203. input_list = { parent_inputs[parent_index] };
  1204. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input.
  1205. return SUCCESS;
  1206. }
  1207. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1208. vector<int64_t> origin_input_list;
  1209. vector<int64_t> memory_type;
  1210. auto tmp_op_desc = node->GetOpDesc();
  1211. origin_input_list = tmp_op_desc->GetInputOffset();
  1212. int64_t valid_input_index = 0;
  1213. bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
  1214. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1215. vector<int64_t> output_list;
  1216. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1217. if (peer_out_anchor == nullptr) {
  1218. continue;
  1219. }
  1220. // If the current node not broadcast, the OutputOffset of the previous node is used to update the input_list
  1221. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1222. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1223. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1224. output_list = last_peer_out_op_desc->GetOutputOffset();
  1225. auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx());
  1226. if (output_list.size() > static_cast<size_t>(out_index)) {
  1227. int64_t input_offset = output_list.at(out_index);
  1228. if (has_mem_type_attr) {
  1229. auto input_size = tmp_op_desc->GetInputsSize();
  1230. auto ori_input_offset_list_size = origin_input_list.size();
  1231. auto mem_type_size = memory_type.size();
  1232. if ((input_size != mem_type_size) || (input_size != ori_input_offset_list_size)) {
  1233. GELOGE(ge::FAILED,
  1234. "fusion: node[%s] input_size[%zu] diff from memory_type_size[%zu]"
  1235. " from ori_input_offset_list_size[%lu]",
  1236. tmp_op_desc->GetName().c_str(), input_size, mem_type_size, ori_input_offset_list_size);
  1237. return ge::FAILED;
  1238. }
  1239. // not hbm keep orignal inputoffest
  1240. // hbm inputoffset = original inputoffset + outputoffset
  1241. input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 ? origin_input_list[valid_input_index]
  1242. : origin_input_list[valid_input_index] + output_list.at(out_index));
  1243. }
  1244. const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
  1245. if (in_node->GetType() == CONSTANT) {
  1246. GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx()));
  1247. GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset));
  1248. }
  1249. GELOGI("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]",
  1250. has_mem_type_attr == true ? "Fusion" : "",
  1251. tmp_op_desc->GetName().c_str(),
  1252. valid_input_index,
  1253. peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(),
  1254. out_index,
  1255. input_offset);
  1256. input_list.emplace_back(input_offset);
  1257. valid_input_index++;
  1258. }
  1259. }
  1260. return ge::SUCCESS;
  1261. }
  1262. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const {
  1263. GE_CHECK_NOTNULL(node->GetOpDesc());
  1264. vector<int64_t> input_list;
  1265. if (node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) {
  1266. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1267. vector<int64_t> output_list;
  1268. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1269. if (peer_out_anchor == nullptr) {
  1270. continue;
  1271. }
  1272. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1273. // If the current node is broadcast and the preceding node is variable, because InputOffset has been set
  1274. // in function:AssignVarAttr2Nodes, then the InputOffset of the broadcast node is taken to update the input_list.
  1275. // Otherwise, the OutputOffset of the previous node is used to update the input_list.
  1276. if (last_peer_out_node->GetType() != VARIABLE) {
  1277. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1278. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1279. output_list = last_peer_out_op_desc->GetOutputOffset();
  1280. if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
  1281. input_list.emplace_back(output_list.at(peer_out_anchor->GetIdx()));
  1282. }
  1283. } else {
  1284. vector<int64_t> cur_node_input_list;
  1285. auto cur_node_op_desc = node->GetOpDesc();
  1286. GE_CHECK_NOTNULL(cur_node_op_desc);
  1287. cur_node_input_list = cur_node_op_desc->GetInputOffset();
  1288. if (cur_node_input_list.size() > static_cast<size_t>(anchor->GetIdx())) {
  1289. input_list.emplace_back(cur_node_input_list.at(anchor->GetIdx()));
  1290. }
  1291. }
  1292. }
  1293. } else if (node->GetType() == DATA_TYPE) {
  1294. if (UpdateConstArgsOffset(node, input_list) != SUCCESS) {
  1295. GELOGE(FAILED, "Update data: %s args offset failed.", node->GetName().c_str());
  1296. return FAILED;
  1297. }
  1298. } else {
  1299. if (UpdateOpInputOffset(node, input_list) != SUCCESS) {
  1300. GELOGE(FAILED, "Update node: %s input offset failed.", node->GetName().c_str());
  1301. return FAILED;
  1302. }
  1303. }
  1304. node->GetOpDesc()->SetInputOffset(input_list);
  1305. return SUCCESS;
  1306. }
  1307. Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
  1308. const vector<int64_t> &mem_offset_end) {
  1309. GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start);
  1310. // Parsing offset and size vectors
  1311. vector<int64_t> memory_offset_start;
  1312. vector<int64_t> memory_offset_size;
  1313. memory_offset_start.emplace_back(atomic_mem_start);
  1314. for (size_t i = 0; i < mem_offset_end.size(); ++i) {
  1315. memory_offset_start.emplace_back(mem_offset_end[i]);
  1316. // Number 1 means element index
  1317. auto size = memory_offset_start[i + 1] - memory_offset_start[i];
  1318. memory_offset_size.emplace_back(size);
  1319. }
  1320. memory_offset_start.pop_back();
  1321. const auto &in_control_anchor = node->GetInControlAnchor();
  1322. if (!memory_offset_size.empty() && in_control_anchor != nullptr) {
  1323. for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1324. if (peer_out_control_anchor == nullptr) {
  1325. continue;
  1326. }
  1327. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1328. auto peer_out_node_desc = peer_out_node->GetOpDesc();
  1329. if (peer_out_node_desc == nullptr) {
  1330. continue;
  1331. }
  1332. GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(),
  1333. peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str());
  1334. if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
  1335. if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size) != SUCCESS) {
  1336. GELOGE(FAILED, "Set atomic clean attr failed.");
  1337. return FAILED;
  1338. }
  1339. }
  1340. }
  1341. }
  1342. return SUCCESS;
  1343. }
  1344. ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start,
  1345. const vector<int64_t> &atomic_mem_size) {
  1346. auto node_op_desc = node->GetOpDesc();
  1347. if (node_op_desc != nullptr) {
  1348. GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
  1349. vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
  1350. vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
  1351. workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1352. workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1353. node_op_desc->SetWorkspace(workspace_vector);
  1354. node_op_desc->SetWorkspaceBytes(workspace_byte_vector);
  1355. std::vector<int64_t> mem_start_vector;
  1356. // If GetListInt fail, mem_start_vector is empty.
  1357. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
  1358. mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1359. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
  1360. GELOGE(FAILED, "SetListInt failed.");
  1361. return FAILED);
  1362. std::vector<int64_t> mem_size_vector;
  1363. // If GetListInt fail, mem_size_vector is empty.
  1364. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
  1365. mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1366. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
  1367. GELOGE(FAILED, "SetListInt failed.");
  1368. return FAILED);
  1369. std::stringstream ss;
  1370. for (auto iter : atomic_mem_start) {
  1371. ss << iter << " ";
  1372. }
  1373. string atomic_mem_start_str = ss.str();
  1374. ss.clear();
  1375. ss.str("");
  1376. for (auto iter : atomic_mem_size) {
  1377. ss << iter << " ";
  1378. }
  1379. string atomic_mem_size_str = ss.str();
  1380. GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]",
  1381. node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
  1382. atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId());
  1383. }
  1384. return SUCCESS;
  1385. }
  1386. void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size) {
  1387. if (mem_align_size <= 0) {
  1388. return;
  1389. }
  1390. memory_offset_[0].mem_offset_ =
  1391. (memory_offset_[0].mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size;
  1392. }
  1393. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示