You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

davinci_model.h 36 kB

5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_
  17. #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_
  18. #include <map>
  19. #include <memory>
  20. #include <set>
  21. #include <string>
  22. #include <thread>
  23. #include <vector>
  24. #include "framework/common/ge_types.h"
  25. #include "framework/common/helper/model_helper.h"
  26. #include "framework/common/helper/om_file_helper.h"
  27. #include "common/opskernel/ge_task_info.h"
  28. #include "common/properties_manager.h"
  29. #include "common/dump/exception_dumper.h"
  30. #include "common/dump/opdebug_register.h"
  31. #include "framework/common/types.h"
  32. #include "framework/common/util.h"
  33. #include "graph/debug/ge_attr_define.h"
  34. #include "graph/load/model_manager/aipp_utils.h"
  35. #include "graph/load/model_manager/data_dumper.h"
  36. #include "graph/load/model_manager/data_inputer.h"
  37. #include "graph/load/model_manager/model_utils.h"
  38. #include "graph/load/model_manager/zero_copy_offset.h"
  39. #include "graph/load/model_manager/zero_copy_task.h"
  40. #include "graph/model.h"
  41. #include "graph/node.h"
  42. #include "graph/op_desc.h"
  43. #include "external/graph/operator.h"
  44. #include "graph/utils/attr_utils.h"
  45. #include "graph/utils/tensor_utils.h"
  46. #include "mmpa/mmpa_api.h"
  47. #include "proto/task.pb.h"
  48. #include "graph/load/model_manager/task_info/task_info.h"
  49. #include "common/local_context.h"
  50. using std::mutex;
  51. using std::thread;
  52. using std::multimap;
  53. namespace ge {
  54. // op debug need 2048 bits buffer
  55. const size_t kOpDebugMemorySize = 2048UL;
  56. const size_t kDebugP2pSize = 8UL;
  57. const size_t kDebugP2pSize = 8UL;
  58. const int64_t kInvalidDieId = -1;
  59. typedef enum tagModelProcStage {
  60. MODEL_LOAD_START = 1,
  61. MODEL_LOAD_END,
  62. MODEL_PRE_PROC_START,
  63. MODEL_PRE_PROC_END,
  64. MODEL_INFER_START,
  65. MODEL_INFER_END,
  66. MODEL_AFTER_PROC_START,
  67. MODEL_AFTER_PROC_END,
  68. MODEL_PROC_INVALID,
  69. } ModelProcStage;
  70. struct timeInfo {
  71. uint32_t modelId;
  72. int64_t processBeginTime;
  73. int64_t processEndTime;
  74. int64_t inferenceBeginTime;
  75. int64_t inferenceEndTime;
  76. int64_t dumpBeginTime;
  77. int64_t dumpEndTime;
  78. };
  79. // For super kernel
  80. struct SuperKernelTaskInfo {
  81. uint32_t last_block_dim;
  82. uint32_t last_args_size;
  83. uint32_t last_task_id;
  84. uint32_t last_stream_id;
  85. void *last_stream;
  86. void *last_sm_desc;
  87. vector<void *> kernel_list;
  88. vector<void *> arg_list;
  89. vector<uint32_t> dump_flag_list;
  90. vector<OpDescPtr> op_desc_list;
  91. vector<uintptr_t> dump_args_list;
  92. uint32_t last_dump_flag;
  93. int64_t last_group_key;
  94. uintptr_t last_dump_args;
  95. OpDescPtr last_op;
  96. };
  97. struct TaskMemInfo {
  98. int64_t input_size{0};
  99. int64_t output_size{0};
  100. int64_t weight_size{0};
  101. int64_t workspace_size{0};
  102. int64_t total_size{0};
  103. };
  104. struct ProfileInfo {
  105. FusionOpInfo fusion_info;
  106. TaskMemInfo memory_info;
  107. uint32_t task_count{0};
  108. };
  109. enum ExecuteMode {
  110. INITIALIZATION,
  111. SYNCHRONIZATION,
  112. ASYNCHRONIZATION,
  113. };
  114. // comments
  115. class DavinciModel {
  116. public:
  117. ///
  118. /// @ingroup ge
  119. /// @brief DavinciModel constructor
  120. /// @author
  121. ///
  122. DavinciModel(int32_t priority, const shared_ptr<ModelListener> &listener);
  123. ///
  124. /// @ingroup ge
  125. /// @brief DavinciModel desctructor, free Parse and Init resources
  126. /// @author
  127. ///
  128. ~DavinciModel();
  129. ///
  130. /// @ingroup ge
  131. /// @brief apply model to model_def_
  132. ///
  133. Status Assign(const GeModelPtr &ge_model);
  134. ///
  135. /// @ingroup ge
  136. /// @brief DavinciModel initialization, including Stream, ccHandle, Event, DataInputer, etc
  137. /// @return execute result
  138. /// @author
  139. ///
  140. Status Init(void *dev_ptr = nullptr, size_t memsize = 0, void *weight_ptr = nullptr, size_t weightsize = 0);
  141. ///
  142. /// @ingroup ge
  143. /// @brief ACL case, Load task list with queue.
  144. /// @param [in] input_que_ids: input queue ids from user, nums equal Data Op.
  145. /// @param [in] output_que_ids: input queue ids from user, nums equal NetOutput Op.
  146. /// @return: 0 for success / others for fail
  147. ///
  148. Status SetQueIds(const vector<uint32_t> &input_queue_ids, const vector<uint32_t> &output_queue_ids);
  149. ///
  150. /// @ingroup ge
  151. /// @brief Get DataInputer
  152. /// @return model ID
  153. ///
  154. uint32_t Id() const { return model_id_; }
  155. ///
  156. /// @ingroup ge
  157. /// @brief Get DataInputer
  158. /// @return model ID
  159. ///
  160. void SetId(uint32_t model_id) { model_id_ = model_id; }
  161. ///
  162. /// @ingroup ge
  163. /// @brief Get SubModelId
  164. /// @return sub model ID
  165. ///
  166. uint32_t SubModelId() const { return sub_model_id_; }
  167. ///
  168. /// @ingroup ge
  169. /// @brief Get SubModelId
  170. /// @return sub model ID
  171. ///
  172. void SetSubModelId(uint32_t sub_model_id) { sub_model_id_ = sub_model_id; }
  173. static void *Run(DavinciModel *model_pointer);
  174. ///
  175. /// @ingroup ge
  176. /// @brief NnExecute
  177. /// @param [in] stream execute stream
  178. /// @param [in] async_mode is asynchronize mode.
  179. /// @param [in] input_data model input data
  180. /// @param [out] output_data model output data
  181. ///
  182. Status NnExecute(rtStream_t stream, bool async_mode, const InputData &input_data, OutputData &output_data);
  183. ///
  184. /// @ingroup ge
  185. /// @brief lock mutex run flag
  186. /// @author
  187. ///
  188. void LockRunFlg() { mux_run_flg_.lock(); }
  189. ///
  190. /// @ingroup ge
  191. /// @brief unlock mutex run flag
  192. /// @author
  193. ///
  194. void UnlockRunFlg() { mux_run_flg_.unlock(); }
  195. ///
  196. /// @ingroup ge
  197. /// @brief get DataInputer
  198. /// @return DataInputer pointer
  199. ///
  200. DataInputer *const GetDataInputer() const { return data_inputer_; }
  201. uint32_t GetDataInputerSize() {
  202. GE_CHECK_NOTNULL(data_inputer_);
  203. return data_inputer_->Size();
  204. }
  205. // get Stream number
  206. uint32_t StreamNum() const { return runtime_param_.stream_num; }
  207. // get Event number
  208. uint32_t EventNum() const { return runtime_param_.event_num; }
  209. // get Lable number
  210. uint32_t LabelNum() const { return runtime_param_.label_num; }
  211. // get batch number
  212. uint32_t BatchNum() const { return runtime_param_.batch_num; }
  213. // get session id
  214. uint64_t SessionId() const { return runtime_param_.session_id; }
  215. // get model priority
  216. int32_t Priority() const { return priority_; }
  217. // get total mem size
  218. size_t TotalMemSize() const { return runtime_param_.mem_size; }
  219. ///
  220. /// @ingroup ge
  221. /// @brief Get total useful size, in known subgraph, no need to allocate zero copy memory during initialization.
  222. /// @param [in] total_useful_size: total mem size - zero copy size.
  223. /// @return Status
  224. ///
  225. Status GetTotalMemSizeExcludeZeroCopy(int64_t &total_useful_size);
  226. // model name
  227. string Name() const { return name_; }
  228. // om_name
  229. const string &OmName() const { return om_name_; }
  230. // dump_model_name
  231. const string &DumpModelName() const { return dump_model_name_; }
  232. // version
  233. uint32_t Version() const { return version_; }
  234. // get total weights mem size
  235. size_t TotalWeightsMemSize() const { return runtime_param_.weight_size; }
  236. size_t TotalVarMemSize() const { return runtime_param_.var_size; }
  237. // get base memory address
  238. uint8_t *MemBase() { return mem_base_; }
  239. // get weight base memory address
  240. uint8_t *WeightsMemBase() { return weights_mem_base_; }
  241. uint8_t *VarMemBase() { return var_mem_base_; }
  242. // get Event list
  243. const vector<rtEvent_t> &GetEventList() const { return event_list_; }
  244. const vector<rtStream_t> &GetStreamList() const { return stream_list_; }
  245. const vector<rtLabel_t> &GetLabelList() const { return label_list_; }
  246. Status GetLabelGotoAddr(uint32_t label_index, rtMemType_t memory_type, void *&addr, uint32_t &size);
  247. Status DestroyThread();
  248. // get Op
  249. OpDescPtr GetOpByIndex(uint32_t index) const {
  250. if (op_list_.find(index) == op_list_.end()) {
  251. return nullptr;
  252. }
  253. return op_list_.at(index);
  254. }
  255. void SetGlobalStep(void *global_step, uint64_t global_step_size);
  256. void *GetGlobalStep() const { return global_step_addr_; }
  257. // get task info for profiling
  258. const vector<TaskDescInfo> &GetTaskDescInfo() const { return task_desc_info_; }
  259. // get updated task info list
  260. vector<TaskInfoPtr> GetTaskList() { return task_list_; }
  261. // Modified from KernelTaskInfo.
  262. SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; }
  263. rtModel_t GetRtModelHandle() const { return rt_model_handle_; }
  264. rtStream_t GetRtModelStream() const { return rt_model_stream_; }
  265. uint64_t GetRtBaseAddr() const { return runtime_param_.logic_mem_base; }
  266. uint64_t GetRtWeightAddr() const { return runtime_param_.logic_weight_base; }
  267. uint64_t GetRtVarAddr() const { return runtime_param_.logic_var_base; }
  268. uint32_t GetFlowctrlIndex(uint32_t op_index);
  269. void PushHcclStream(rtStream_t value);
  270. bool IsBroadCastOpData(const NodePtr &var_node);
  271. ///
  272. /// @ingroup ge
  273. /// @brief For TVM Op, avoid Addr Reuse.
  274. /// @return void*
  275. ///
  276. const char *GetRegisterStub(const string &tvm_binfile_key, const string &session_graph_model_id = "");
  277. ///
  278. /// @ingroup ge
  279. /// @brief get model input and output desc info
  280. /// @param [out] input_shape model input size
  281. /// @param [out] output_shape model output size
  282. /// @return execute result
  283. ///
  284. Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc);
  285. Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc,
  286. vector<uint32_t> &input_formats, vector<uint32_t> &output_formats, bool by_dims);
  287. ///
  288. /// @ingroup ge
  289. /// @brief Get dynamic batch_info
  290. /// @param [out] batch_info
  291. /// @param [out] dynamic_type
  292. /// @return execute result
  293. ///
  294. Status GetDynamicBatchInfo(vector<vector<int64_t>> &batch_info, int32_t &dynamic_type) const;
  295. ///
  296. /// @ingroup ge
  297. /// @brief Get combined dynamic dims info
  298. /// @param [out] batch_info
  299. /// @return None
  300. ///
  301. void GetCombinedDynamicDims(vector<vector<int64_t>> &batch_info) const;
  302. void GetUserDesignateShapeOrder(vector<string> &user_input_shape_order) const;
  303. void GetCurShape(vector<int64_t> &batch_info, int32_t &dynamic_type) const;
  304. Status GetOpAttr(const std::string &op_name, const std::string &attr_name, std::string &attr_value) const;
  305. void GetModelAttr(vector<string> &dynamic_output_shape_info) const;
  306. ///
  307. /// @ingroup ge
  308. /// @brief Get AIPP input info
  309. /// @param [in] index
  310. /// @param [out] aipp_info
  311. /// @return execute result
  312. ///
  313. Status GetAippInfo(uint32_t index, AippConfigInfo &aipp_info) const;
  314. Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) const;
  315. ///
  316. /// @ingroup ge
  317. /// @brief Get model_id.
  318. /// @return model_id
  319. ///
  320. uint32_t GetModelId() const { return model_id_; }
  321. ///
  322. /// @ingroup ge
  323. /// @brief get unique identification for op when load two or more models
  324. /// @param [in] op_desc : current op.
  325. /// @param [in] string identification: unique identification for current op.
  326. /// @return None
  327. ///
  328. void GetUniqueId(const OpDescPtr &op_desc, string &unique_identification);
  329. Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data);
  330. Status ReturnNoOutput(uint32_t data_id);
  331. Status ModelRunStart();
  332. ///
  333. /// @ingroup ge
  334. /// @brief stop run model
  335. /// @return Status
  336. ///
  337. Status ModelRunStop();
  338. ///
  339. /// @ingroup ge
  340. /// @brief model run flag
  341. /// @return Status
  342. ///
  343. bool RunFlag() const { return run_flg_; }
  344. ///
  345. /// @ingroup ge
  346. /// @brief Set Session Id
  347. /// @return void
  348. ///
  349. void SetSessionId(uint64_t session_id) { session_id_ = session_id; }
  350. ///
  351. /// @ingroup ge
  352. /// @brief Get Session Id
  353. /// @return sessionID
  354. ///
  355. uint64_t GetSessionId() const { return session_id_; }
  356. const struct error_message::Context &GetErrorContext() const { return error_context_; }
  357. ///
  358. /// @ingroup ge
  359. /// @brief SetDeviceId
  360. /// @return void
  361. ///
  362. void SetDeviceId(uint32_t device_id) { device_id_ = device_id; }
  363. void SetDieId(int64_t die_id) { die_id_ = die_id; }
  364. ///
  365. /// @ingroup ge
  366. /// @brief Get device Id
  367. /// @return device id
  368. ///
  369. uint32_t GetDeviceId() const {
  370. return die_id_ == kInvalidDieId ? device_id_ : die_id_;
  371. }
  372. int64_t GetDieId() const { return die_id_; }
  373. bool NeedDestroyAicpuKernel() const { return need_destroy_aicpu_kernel_; }
  374. Status UpdateSessionId(uint64_t session_id);
  375. const RuntimeParam &GetRuntimeParam() { return runtime_param_; }
  376. int32_t GetDataInputTid() const { return dataInputTid; }
  377. void SetDataInputTid(int32_t data_input_tid) { dataInputTid = data_input_tid; }
  378. void DisableZeroCopy(const void *addr);
  379. bool GetOpDugReg() const { return is_op_debug_reg_; }
  380. ///
  381. /// @ingroup ge
  382. /// @brief Save outside address of Data or NetOutput used info for ZeroCopy.
  383. /// @param [in] const OpDescPtr &op_desc: current op desc
  384. /// @param [in] const vector<void *> &outside_addrs: address of task
  385. /// @param [in] const void *args_offset: arguments address save the address.
  386. /// @return None.
  387. ///
  388. void SetZeroCopyAddr(const OpDescPtr &op_desc, const vector<void *> &outside_addrs, const void *info, void *args,
  389. size_t size, size_t offset);
  390. void SetDynamicSize(const vector<uint64_t> &batch_num, int32_t dynamic_type);
  391. bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; }
  392. void SetProfileTime(ModelProcStage stage, int64_t endTime = 0);
  393. int64_t GetLoadBeginTime() { return load_begin_time_; }
  394. int64_t GetLoadEndTime() { return load_end_time_; }
  395. void SaveSpecifyAttrValues(const OpDescPtr &op_desc);
  396. Status ReportProfilingData();
  397. void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) {
  398. exception_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id);
  399. }
  400. void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const shared_ptr<OpDesc> &op_desc, uintptr_t args) {
  401. data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args);
  402. }
  403. Status DumpExceptionInfo(const std::vector<rtExceptionInfo> &exception_infos) const {
  404. return exception_dumper_.DumpExceptionInfo(exception_infos);
  405. }
  406. void DumperShrink() {
  407. data_dumper_.DumpShrink();
  408. }
  409. bool OpNeedDump(const string &op_name) {
  410. return GetDumpProperties().IsLayerNeedDump(dump_model_name_, om_name_, op_name);
  411. }
  412. bool ModelNeedDump();
  413. void SetEndGraphId(uint32_t task_id, uint32_t stream_id);
  414. DavinciModel &operator=(const DavinciModel &model) = delete;
  415. DavinciModel(const DavinciModel &model) = delete;
  416. const map<int64_t, vector<rtStream_t>> &GetHcclFolowStream() {
  417. return main_follow_stream_mapping_;
  418. }
  419. void SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream);
  420. void InitRuntimeParams();
  421. Status InitVariableMem();
  422. void UpdateMemBase(uint8_t *mem_base) {
  423. runtime_param_.mem_base = mem_base;
  424. mem_base_ = mem_base;
  425. }
  426. void SetTotalArgsSize(uint32_t args_size) { total_args_size_ += args_size; }
  427. uint32_t GetTotalArgsSize() { return total_args_size_; }
  428. void *GetCurrentArgsAddr(uint32_t offset) {
  429. void *cur_args = static_cast<char *>(args_) + offset;
  430. return cur_args;
  431. }
  432. void SetTotalIOAddrs(const vector<void *> &io_addrs);
  433. void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; }
  434. uint32_t GetHybridArgsSize() {
  435. return total_hybrid_args_size_;
  436. }
  437. void *GetCurrentHybridArgsAddr(uint32_t offset) {
  438. void *cur_args = static_cast<char *>(hybrid_addrs_) + offset;
  439. return cur_args;
  440. }
  441. void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size);
  442. int64_t GetFixedAddrsSize(string tensor_name);
  443. void *GetCurrentFixedAddr(int64_t offset) const {
  444. void *cur_addr = static_cast<char *>(fixed_addrs_) + offset;
  445. return cur_addr;
  446. }
  447. uint32_t GetFixedAddrOutputIndex(string tensor_name) {
  448. if (tensor_name_to_peer_output_index_.find(tensor_name) != tensor_name_to_peer_output_index_.end()) {
  449. return tensor_name_to_peer_output_index_[tensor_name];
  450. }
  451. return UINT32_MAX;
  452. }
  453. void SetKnownNode(bool known_node) { known_node_ = known_node; }
  454. bool IsKnownNode() { return known_node_; }
  455. Status MallocKnownArgs();
  456. Status CheckCapability(rtFeatureType_t featureType, int32_t featureInfo, bool &is_support) const;
  457. Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
  458. Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
  459. Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args = true);
  460. Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const;
  461. Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims,
  462. vector<InputOutputDims> &output_dims) const;
  463. // om file name
  464. void SetOmName(const string &om_name) { om_name_ = om_name; }
  465. void SetDumpModelName(const string &dump_model_name) { dump_model_name_ = dump_model_name; }
  466. void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); }
  467. const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); }
  468. bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const {
  469. return exception_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info);
  470. }
  471. void UpdateOpIOAddrs(uint32_t task_id, uint32_t stream_id, const std::vector<void *> &io_addrs);
  472. bool GetRunningFlag() const { return running_flg_; }
  473. void SetRunningFlag(bool flag) { running_flg_ = flag; }
  474. Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback);
  475. // for blocking aicpu op
  476. Status GetEventByStream(const rtStream_t &stream, rtEvent_t &rt_event);
  477. Status GetEventIdForBlockingAicpuOp(const OpDescPtr &op_desc, rtStream_t stream, uint32_t &event_id);
  478. private:
  479. // memory address of weights
  480. uint8_t *weights_mem_base_;
  481. uint8_t *var_mem_base_;
  482. // memory address of model
  483. uintptr_t fixed_mem_base_; // Initial of mem_base_, keep forever.
  484. uint8_t *mem_base_;
  485. bool is_inner_mem_base_;
  486. bool is_inner_weight_base_;
  487. // input data manager
  488. DataInputer *data_inputer_;
  489. int64_t load_begin_time_;
  490. int64_t load_end_time_;
  491. struct timeInfo time_info_;
  492. int32_t dataInputTid;
  493. void *GetRunAddress(void *addr) const;
  494. ///
  495. /// @ingroup ge
  496. /// @brief Copy Check input size and model op size.
  497. /// @param [in] const int64_t &input_size: input size.
  498. /// @param [in] const int64_t &op_size: model op size.
  499. /// @param [in] is_dynamic: dynamic batch input flag.
  500. /// @return true if success
  501. ///
  502. bool CheckUserAndModelSize(const int64_t &size, const int64_t &op_size, bool is_input, bool is_dynamic);
  503. ///
  504. /// @ingroup ge
  505. /// @brief Set copy only for No task feed NetOutput address.
  506. /// @return None.
  507. ///
  508. void SetCopyOnlyOutput();
  509. ///
  510. /// @ingroup ge
  511. /// @brief Copy Input/Output to model for direct use.
  512. /// @param [in] const InputData &input_data: user input data info.
  513. /// @param [in/out] OutputData &output_data: user output data info.
  514. /// @param [in] bool is_dynamic: whether is dynamic input, true: is dynamic input; false: not is dynamic input
  515. /// @return SUCCESS handle successfully / others handle failed
  516. ///
  517. Status CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic);
  518. ///
  519. /// @ingroup ge
  520. /// @brief Copy Data addr to model for direct use.
  521. /// @param [in] data_info: model memory addr/size map { data_index, { tensor_size, tensor_addr } }.
  522. /// @param [in] is_input: input data or output data
  523. /// @param [in] blobs: user input/output data list.
  524. /// @param [in] is_dynamic: whether is dynamic input, true: is dynamic input; false: not is dynamic input
  525. /// @param [in] batch_label: batch label for multi-batch scenes
  526. /// @return SUCCESS handle successfully / others handle failed
  527. ///
  528. Status UpdateIoTaskArgs(const map<uint32_t, ZeroCopyOffset> &data_info, bool is_input,
  529. const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label);
  530. Status CopyInputData(const InputData &input_data);
  531. Status CopyOutputData(uint32_t data_id, OutputData &output_data, rtMemcpyKind_t kind);
  532. Status SyncVarData();
  533. Status InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size);
  534. Status InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size);
  535. void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, ShapeDescription &shape1, ShapeDescription &shape2);
  536. void SetInputDimsInfo(const vector<int64_t> &input_dims, Format &format, ShapeDescription &shape_info);
  537. Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<uint32_t> &input_formats, bool by_dims) const;
  538. Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &output_formats) const;
  539. Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo);
  540. void UnbindHcomStream();
  541. Status DistributeTask();
  542. void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task,
  543. const domi::TaskDef &task_def, size_t task_index);
  544. uint8_t *MallocFeatureMapMem(size_t data_size);
  545. uint8_t *MallocWeightsMem(size_t weights_size);
  546. Status MallocExMem();
  547. void FreeFeatureMapMem();
  548. void FreeWeightsMem();
  549. void FreeExMem();
  550. void ReleaseTask();
  551. void ClearTaskAddrs();
  552. void UnbindTaskSinkStream();
  553. bool IsAicpuKernelConnectSpecifiedLayer();
  554. ///
  555. /// @ingroup ge
  556. /// @brief Reduce memory usage after task sink.
  557. /// @return: void
  558. ///
  559. void Shrink();
  560. ///
  561. /// @ingroup ge
  562. /// @brief Travel all nodes and do some init.
  563. /// @param [in] compute_graph: ComputeGraph to load.
  564. /// @return Status
  565. ///
  566. Status InitNodes(const ComputeGraphPtr &compute_graph);
  567. ///
  568. /// @ingroup ge
  569. /// @brief Data Op Initialize.
  570. /// @param [in] ComputeGraphPtr: root graph of the model.
  571. /// @param [in] NodePtr: Data Op.
  572. /// @param [in/out] data_op_index: index of courrent count.
  573. /// @param [in/out] data_by_index: Data ordered by index.
  574. /// @return Status
  575. ///
  576. Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index,
  577. map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs);
  578. ///
  579. /// @ingroup ge
  580. /// @brief Sort Data op list by index.
  581. /// @param [in] data_by_index: map of Data Op.
  582. /// @param [in] output_op_list: list of NetOutput op.
  583. /// @return Status
  584. ///
  585. Status GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, const vector<OpDescPtr> &output_op_list);
  586. ///
  587. /// @ingroup ge
  588. /// @brief NetOutput Op Initialize.
  589. /// @param [in] ComputeGraphPtr: root graph of the model.
  590. /// @param [in] NodePtr: NetOutput Op.
  591. /// @param [in/out] vector<OpDescPtr>: All NetOutput node in model.
  592. /// @return Status
  593. ///
  594. Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list,
  595. set<const void *> &output_outside_addrs);
  596. ///
  597. /// @ingroup ge
  598. /// @brief Constant Op Init.
  599. /// @return Status
  600. ///
  601. Status InitConstant(const OpDescPtr &op_desc);
  602. Status InitVariable(const OpDescPtr &op_desc, map<string, OpDescPtr> &variable_by_name);
  603. /// @ingroup ge
  604. /// @brief LabelSet Op Initialize.
  605. /// @param [in] op_desc: LabelSet Op descriptor.
  606. /// @return Status
  607. Status InitLabelSet(const OpDescPtr &op_desc);
  608. Status InitStreamSwitch(const OpDescPtr &op_desc);
  609. Status InitStreamActive(const OpDescPtr &op_desc);
  610. Status InitStreamSwitchN(const OpDescPtr &op_desc);
  611. ///
  612. /// @ingroup ge
  613. /// @brief Case Op Init.
  614. /// @return Status
  615. ///
  616. Status InitCase(const OpDescPtr &op_desc);
  617. Status SetDynamicBatchInfo(const OpDescPtr &op_desc, uint32_t batch_num);
  618. ///
  619. /// @ingroup ge
  620. /// @brief TVM Op Init.
  621. /// @return Status
  622. ///
  623. Status InitTbeHandle(const OpDescPtr &op_desc);
  624. Status InitTbeHandleWithFfts(const OpDescPtr &op_desc);
  625. Status FunctionRegister(const OpDescPtr &op_desc, string &bin_file, OpKernelBinPtr &tbe_kernel, bool is_ffts,
  626. size_t thread_index = 0);
  627. Status InitBinaryMagic(const OpDescPtr &op_desc, bool is_ffts, size_t thread_index, rtDevBinary_t &binary);
  628. Status InitMetaData(const OpDescPtr &op_desc, bool is_ffts, size_t thread_index, void *bin_handle);
  629. Status InitKernelName(const OpDescPtr &op_desc, bool is_ffts, size_t thread_index, string &kernel_name);
  630. void StoreTbeHandle(const string &handle_key);
  631. void CleanTbeHandle();
  632. ///
  633. /// @ingroup ge
  634. /// @brief Make active stream list and bind to model.
  635. /// @return: 0 for success / others for fail
  636. ///
  637. Status BindModelStream();
  638. ///
  639. /// @ingroup ge
  640. /// @brief Init model stream for NN model.
  641. /// @return Status
  642. ///
  643. Status InitModelStream(rtStream_t stream);
  644. ///
  645. /// @ingroup ge
  646. /// @brief ACL, Load task list with queue entrance.
  647. /// @return: 0 for success / others for fail
  648. ///
  649. Status LoadWithQueue();
  650. ///
  651. /// @ingroup ge
  652. /// @brief ACL, Bind Data Op addr to input queue.
  653. /// @return: 0 for success / others for fail
  654. ///
  655. Status BindInputQueue();
  656. Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs);
  657. ///
  658. /// @ingroup ge
  659. /// @brief ACL, Bind NetOutput Op addr to output queue.
  660. /// @return: 0 for success / others for fail
  661. ///
  662. Status BindOutputQueue();
  663. Status CpuModelPrepareOutput(uintptr_t addr, uint32_t size);
  664. ///
  665. /// @ingroup ge
  666. /// @brief definiteness queue schedule, bind input queue to task.
  667. /// @param [in] queue_id: input queue id from user.
  668. /// @param [in] addr: Data Op output tensor address.
  669. /// @param [in] size: Data Op output tensor size.
  670. /// @return: 0 for success / others for fail
  671. ///
  672. Status CpuModelDequeue(uint32_t queue_id);
  673. ///
  674. /// @ingroup ge
  675. /// @brief definiteness queue schedule, bind output queue to task.
  676. /// @param [in] queue_id: output queue id from user.
  677. /// @param [in] addr: NetOutput Op input tensor address.
  678. /// @param [in] size: NetOutput Op input tensor size.
  679. /// @return: 0 for success / others for fail
  680. ///
  681. Status CpuModelEnqueue(uint32_t queue_id, uintptr_t addr, uint32_t size);
  682. ///
  683. /// @ingroup ge
  684. /// @brief definiteness queue schedule, active original model stream.
  685. /// @return: 0 for success / others for fail
  686. ///
  687. Status CpuActiveStream();
  688. ///
  689. /// @ingroup ge
  690. /// @brief definiteness queue schedule, wait for end graph.
  691. /// @return: 0 for success / others for fail
  692. ///
  693. Status CpuWaitEndGraph();
  694. Status BindEnqueue();
  695. Status CpuModelEnqueue(uint32_t queue_id, uintptr_t out_mbuf);
  696. ///
  697. /// @ingroup ge
  698. /// @brief definiteness queue schedule, repeat run model.
  699. /// @return: 0 for success / others for fail
  700. ///
  701. Status CpuModelRepeat();
  702. Status InitEntryTask();
  703. Status AddHeadStream();
  704. ///
  705. /// @ingroup ge
  706. /// @brief set ts device.
  707. /// @return: 0 for success / others for fail
  708. ///
  709. Status SetTSDevice();
  710. Status OpDebugRegister();
  711. void OpDebugUnRegister();
  712. void CheckHasHcomOp(const ComputeGraphPtr &graph);
  713. Status DoTaskSink();
  714. void CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result);
  715. Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id);
  716. void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name);
  717. Status InitL1DataDumperArgs();
  718. Status InitModelProfile();
  719. Status SinkModelProfile();
  720. Status SinkTimeProfile(const InputData &current_data);
  721. Status InitOutputTensorInfo(const OpDescPtr &op_desc);
  722. Status GenOutputTensorInfo(OutputData *output_data, vector<ge::Tensor> &outputs);
  723. Status InitInputDescInfo(const OpDescPtr &op_desc);
  724. Status InitOutputDescInfo(const OpDescPtr &op_desc, const vector<string> &out_node_name);
  725. Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc);
  726. Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc);
  727. Status InitAippType(uint32_t index, const OpDescPtr &op_desc, const map<uint32_t, OpDescPtr> &data_list);
  728. Status InitAippInputOutputDims(uint32_t index, const OpDescPtr &op_desc);
  729. void ParseAIPPInfo(string in_out_info, InputOutputDims &dims_info);
  730. void SetLabelForDynamic(const NodePtr &node);
  731. void ParseDynamicOutShape(const vector<string> &str_info, vector<vector<int64_t>> &vec_info);
  732. bool IsGetNextSinkDynamic(const OpDescPtr &op_desc);
  733. Status InitRealSizeAndShapeInfo(const ComputeGraphPtr &compute_graph, const NodePtr &node);
  734. void GetAllGearsInfo(const NodePtr &node);
  735. Status GetGetDynamicDimsNodeInfo(const NodePtr &node);
  736. Status GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node);
  737. Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node);
  738. Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node);
  739. bool is_weight_mem_has_inited_;
  740. bool is_feature_map_mem_has_inited_;
  741. uint32_t model_id_;
  742. uint32_t runtime_model_id_;
  743. uint32_t sub_model_id_ = 0;
  744. string name_;
  745. // used for inference data dump
  746. string om_name_;
  747. string dump_model_name_;
  748. uint32_t version_;
  749. GeModelPtr ge_model_; // release after DavinciModel::Init
  750. bool need_destroy_aicpu_kernel_{false};
  751. map<uint32_t, OpDescPtr> op_list_; // release after DavinciModel::Init
  752. map<string, GeTensorDesc> broadcast_variable_;
  753. void *global_step_addr_{nullptr};
  754. uint64_t global_step_size_{0};
  755. map<uint32_t, ZeroCopyOffset> input_data_info_;
  756. map<uint32_t, ZeroCopyOffset> output_data_info_;
  757. set<const void *> real_virtual_addrs_;
  758. // output op: save cce op actual needed memory size
  759. vector<int64_t> output_memory_size_list_;
  760. thread thread_id_;
  761. shared_ptr<ModelListener> listener_;
  762. bool run_flg_;
  763. // check whether model is running with data
  764. bool running_flg_ = false;
  765. mutex mux_run_flg_;
  766. int32_t priority_;
  767. vector<rtStream_t> stream_list_;
  768. mutex all_hccl_stream_list_mutex_;
  769. vector<rtStream_t> all_hccl_stream_list_;
  770. // for reuse hccl_follow_stream
  771. mutex capacity_of_stream_mutex_;
  772. map<int64_t, vector<rtStream_t>> main_follow_stream_mapping_;
  773. vector<rtEvent_t> event_list_;
  774. vector<rtLabel_t> label_list_;
  775. set<uint32_t> label_id_indication_;
  776. mutex label_args_mutex_;
  777. map<uint32_t, pair<void *, uint32_t>> label_goto_args_;
  778. mutex outside_addrs_mutex_;
  779. vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr.
  780. set<const void *> copy_only_addrs_; // Address need copy to original place.
  781. vector<TaskInfoPtr> task_list_;
  782. // rt_moodel_handle
  783. rtModel_t rt_model_handle_;
  784. rtStream_t rt_model_stream_;
  785. bool is_inner_model_stream_;
  786. bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_.
  787. ExecuteMode last_execute_mode_;
  788. bool is_stream_list_bind_{false};
  789. bool is_pure_head_stream_{false};
  790. rtStream_t rt_head_stream_{nullptr};
  791. rtStream_t rt_entry_stream_{nullptr};
  792. rtAicpuDeployType_t deploy_type_{AICPU_DEPLOY_RESERVED};
  793. // ACL queue schedule, save queue ids for Init.
  794. vector<TaskInfoPtr> cpu_task_list_;
  795. vector<uint32_t> input_queue_ids_; // input queue ids created by caller.
  796. vector<uint32_t> output_queue_ids_; // output queue ids created by caller.
  797. vector<uintptr_t> input_mbuf_list_; // input mbuf created by dequeue task.
  798. vector<uintptr_t> output_mbuf_list_; // output mbuf created by dequeue task.
  799. uint64_t session_id_;
  800. struct error_message::Context error_context_;
  801. uint32_t device_id_;
  802. int64_t die_id_ = kInvalidDieId;
  803. mutex flowctrl_op_index_internal_map_mutex_;
  804. map<uint32_t, uint32_t> flowctrl_op_index_internal_map_;
  805. vector<rtStream_t> active_stream_list_;
  806. set<uint32_t> active_stream_indication_;
  807. set<uint32_t> hcom_streams_;
  808. RuntimeParam runtime_param_;
  809. static mutex tvm_bin_mutex_;
  810. set<string> tvm_bin_kernel_;
  811. map<string, uint32_t> used_tbe_handle_map_;
  812. // for profiling task and graph info
  813. vector<TaskDescInfo> task_desc_info_;
  814. std::map<std::string, std::pair<uint32_t, uint32_t>> profiler_report_op_info_;
  815. int64_t maxDumpOpNum_;
  816. // for data dump
  817. DataDumper data_dumper_;
  818. ExceptionDumper exception_dumper_;
  819. OpdebugRegister opdebug_register_;
  820. uint64_t iterator_count_;
  821. bool is_l1_fusion_enable_;
  822. map<OpDescPtr, void *> saved_task_addrs_; // release after DavinciModel::Init
  823. void *l1_fusion_addr_ = nullptr;
  824. bool known_node_ = false;
  825. uint32_t total_args_size_ = 0;
  826. void *args_ = nullptr;
  827. void *args_host_ = nullptr;
  828. void *fixed_addrs_ = nullptr;
  829. void *hybrid_addrs_ = nullptr;
  830. uint32_t total_hybrid_args_size_ = 0;
  831. int64_t total_fixed_addr_size_ = 0;
  832. map<const void *, void *> known_input_data_info_;
  833. map<const void *, void *> known_output_data_info_;
  834. vector<void *> total_io_addrs_;
  835. vector<vector<int64_t>> batch_info_;
  836. vector<vector<int64_t>> combined_batch_info_;
  837. vector<string> user_designate_shape_order_;
  838. int32_t dynamic_type_ = 0;
  839. bool is_dynamic_ = false;
  840. vector<uint64_t> batch_size_;
  841. // key: input tensor name, generally rts op;
  842. // value: the fixed addr of input anchor, same as the peer output anchor addr of the peer op
  843. map<string, int64_t> tensor_name_to_fixed_addr_size_;
  844. // key: input tensor name, generally rts op; value: the peer output anchor of the peer op
  845. map<string, int64_t> tensor_name_to_peer_output_index_;
  846. // if model is first execute
  847. bool is_first_execute_;
  848. // for op debug
  849. mutex debug_reg_mutex_;
  850. bool is_op_debug_reg_ = false;
  851. bool is_online_infer_dynamic_ = false;
  852. bool is_getnext_sink_dynamic_ = false;
  853. vector<int32_t> cur_dynamic_dims_;
  854. void *netoutput_last_input_addr_ = nullptr;
  855. int64_t netoutput_last_input_size_ = 0;
  856. size_t shape_of_cur_dynamic_dims_ = 0;
  857. // key: input_index: input is merge node; value: each gear info and each output size
  858. map<size_t, map<vector<int32_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_;
  859. // key: input_index: input is merge node; value: each gear info and each output shape
  860. map<size_t, map<vector<int32_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
  861. vector<vector<int32_t>> all_gears_info_;
  862. multimap<uint32_t, uint32_t> op_id_map_;
  863. vector<ProfileInfo> profile_list_;
  864. // For super kernel.
  865. SuperKernelTaskInfo skt_info_;
  866. bool has_output_node_ = false;
  867. bool is_dynamic_aipp_ = false;
  868. vector<string> dynamic_output_shape_info_;
  869. vector<vector<void *>> input_addrs_list_;
  870. vector<vector<void *>> output_addrs_list_;
  871. vector<int64_t> output_buffer_size_;
  872. vector<GeShape> output_shape_info_;
  873. map<uint32_t, OriginInputInfo> orig_input_info_;
  874. map<uint32_t, AippConfigInfo> aipp_info_list_;
  875. map<uint32_t, pair<InputAippType, size_t>> aipp_type_list_;
  876. map<uint32_t, pair<vector<InputOutputDims>, vector<InputOutputDims>>> aipp_dims_info_;
  877. vector<InputOutputDescInfo> input_descs_;
  878. vector<InputOutputDescInfo> input_descs_dims_;
  879. vector<uint32_t> input_formats_;
  880. vector<InputOutputDescInfo> output_descs_;
  881. vector<uint32_t> output_formats_;
  882. // op name to attrs mapping
  883. std::map<std::string, std::map<std::string, std::vector<std::string>>> op_name_to_attrs_;
  884. std::map<rtStream_t, rtEvent_t> stream_2_event_;
  885. };
  886. } // namespace ge
  887. #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示