You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

davinci_model.h 35 kB

5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_
  17. #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_
  18. #include <map>
  19. #include <memory>
  20. #include <set>
  21. #include <string>
  22. #include <thread>
  23. #include <vector>
  24. #include "common/ge_types.h"
  25. #include "common/helper/model_helper.h"
  26. #include "common/helper/om_file_helper.h"
  27. #include "common/opskernel/ge_task_info.h"
  28. #include "common/properties_manager.h"
  29. #include "common/dump/exception_dumper.h"
  30. #include "common/dump/opdebug_register.h"
  31. #include "common/types.h"
  32. #include "framework/common/util.h"
  33. #include "graph/debug/ge_attr_define.h"
  34. #include "graph/load/model_manager/aipp_utils.h"
  35. #include "graph/load/model_manager/data_dumper.h"
  36. #include "graph/load/model_manager/data_inputer.h"
  37. #include "graph/load/model_manager/model_utils.h"
  38. #include "graph/load/model_manager/zero_copy_offset.h"
  39. #include "graph/load/model_manager/zero_copy_task.h"
  40. #include "graph/model.h"
  41. #include "graph/node.h"
  42. #include "graph/op_desc.h"
  43. #include "graph/operator.h"
  44. #include "graph/utils/attr_utils.h"
  45. #include "graph/utils/tensor_utils.h"
  46. #include "mmpa/mmpa_api.h"
  47. #include "proto/task.pb.h"
  48. #include "task_info/task_info.h"
  49. #include "graph/common/local_context.h"
  50. using std::mutex;
  51. using std::thread;
  52. using std::multimap;
  53. namespace ge {
  54. // op debug need 2048 bits buffer
  55. const size_t kOpDebugMemorySize = 2048UL;
  56. const size_t kDebugP2pSize = 8UL;
  57. typedef enum tagModelProcStage {
  58. MODEL_LOAD_START = 1,
  59. MODEL_LOAD_END,
  60. MODEL_PRE_PROC_START,
  61. MODEL_PRE_PROC_END,
  62. MODEL_INFER_START,
  63. MODEL_INFER_END,
  64. MODEL_AFTER_PROC_START,
  65. MODEL_AFTER_PROC_END,
  66. MODEL_PROC_INVALID,
  67. } ModelProcStage;
  68. struct timeInfo {
  69. uint32_t modelId;
  70. int64_t processBeginTime;
  71. int64_t processEndTime;
  72. int64_t inferenceBeginTime;
  73. int64_t inferenceEndTime;
  74. int64_t dumpBeginTime;
  75. int64_t dumpEndTime;
  76. };
  77. // For super kernel
  78. struct SuperKernelTaskInfo {
  79. uint32_t last_block_dim;
  80. uint32_t last_args_size;
  81. uint32_t last_task_id;
  82. uint32_t last_stream_id;
  83. void *last_stream;
  84. void *last_sm_desc;
  85. vector<void *> kernel_list;
  86. vector<void *> arg_list;
  87. vector<uint32_t> dump_flag_list;
  88. vector<OpDescPtr> op_desc_list;
  89. vector<uintptr_t> dump_args_list;
  90. uint32_t last_dump_flag;
  91. int64_t last_group_key;
  92. uintptr_t last_dump_args;
  93. OpDescPtr last_op;
  94. };
  95. struct TaskMemInfo {
  96. int64_t input_size{0};
  97. int64_t output_size{0};
  98. int64_t weight_size{0};
  99. int64_t workspace_size{0};
  100. int64_t total_size{0};
  101. };
  102. struct ProfileInfo {
  103. FusionOpInfo fusion_info;
  104. TaskMemInfo memory_info;
  105. uint32_t task_count{0};
  106. };
  107. enum ExecuteMode {
  108. INITIALIZATION,
  109. SYNCHRONIZATION,
  110. ASYNCHRONIZATION,
  111. };
  112. // comments
  113. class DavinciModel {
  114. public:
  115. ///
  116. /// @ingroup ge
  117. /// @brief DavinciModel constructor
  118. /// @author
  119. ///
  120. DavinciModel(int32_t priority, const shared_ptr<ModelListener> &listener);
  121. ///
  122. /// @ingroup ge
  123. /// @brief DavinciModel desctructor, free Parse and Init resources
  124. /// @author
  125. ///
  126. ~DavinciModel();
  127. ///
  128. /// @ingroup ge
  129. /// @brief apply model to model_def_
  130. ///
  131. Status Assign(const GeModelPtr &ge_model);
  132. ///
  133. /// @ingroup ge
  134. /// @brief DavinciModel initialization, including Stream, ccHandle, Event, DataInputer, etc
  135. /// @return execute result
  136. /// @author
  137. ///
  138. Status Init(void *dev_ptr = nullptr, size_t memsize = 0, void *weight_ptr = nullptr, size_t weightsize = 0);
  139. ///
  140. /// @ingroup ge
  141. /// @brief ACL case, Load task list with queue.
  142. /// @param [in] input_que_ids: input queue ids from user, nums equal Data Op.
  143. /// @param [in] output_que_ids: input queue ids from user, nums equal NetOutput Op.
  144. /// @return: 0 for success / others for fail
  145. ///
  146. Status SetQueIds(const vector<uint32_t> &input_queue_ids, const vector<uint32_t> &output_queue_ids);
  147. ///
  148. /// @ingroup ge
  149. /// @brief Get DataInputer
  150. /// @return model ID
  151. ///
  152. uint32_t Id() const { return model_id_; }
  153. ///
  154. /// @ingroup ge
  155. /// @brief Get DataInputer
  156. /// @return model ID
  157. ///
  158. void SetId(uint32_t model_id) { model_id_ = model_id; }
  159. ///
  160. /// @ingroup ge
  161. /// @brief Get SubModelId
  162. /// @return sub model ID
  163. ///
  164. uint32_t SubModelId() const { return sub_model_id_; }
  165. ///
  166. /// @ingroup ge
  167. /// @brief Get SubModelId
  168. /// @return sub model ID
  169. ///
  170. void SetSubModelId(uint32_t sub_model_id) { sub_model_id_ = sub_model_id; }
  171. static void *Run(DavinciModel *model_pointer);
  172. ///
  173. /// @ingroup ge
  174. /// @brief NnExecute
  175. /// @param [in] stream execute stream
  176. /// @param [in] async_mode is asynchronize mode.
  177. /// @param [in] input_data model input data
  178. /// @param [out] output_data model output data
  179. ///
  180. Status NnExecute(rtStream_t stream, bool async_mode, const InputData &input_data, OutputData &output_data);
  181. ///
  182. /// @ingroup ge
  183. /// @brief lock mutex run flag
  184. /// @author
  185. ///
  186. void LockRunFlg() { mux_run_flg_.lock(); }
  187. ///
  188. /// @ingroup ge
  189. /// @brief unlock mutex run flag
  190. /// @author
  191. ///
  192. void UnlockRunFlg() { mux_run_flg_.unlock(); }
  193. ///
  194. /// @ingroup ge
  195. /// @brief get DataInputer
  196. /// @return DataInputer pointer
  197. ///
  198. DataInputer *const GetDataInputer() const { return data_inputer_; }
  199. uint32_t GetDataInputerSize() {
  200. GE_CHECK_NOTNULL(data_inputer_);
  201. return data_inputer_->Size();
  202. }
  203. // get Stream number
  204. uint32_t StreamNum() const { return runtime_param_.stream_num; }
  205. // get Event number
  206. uint32_t EventNum() const { return runtime_param_.event_num; }
  207. // get Lable number
  208. uint32_t LabelNum() const { return runtime_param_.label_num; }
  209. // get batch number
  210. uint32_t BatchNum() const { return runtime_param_.batch_num; }
  211. // get session id
  212. uint64_t SessionId() const { return runtime_param_.session_id; }
  213. // get model priority
  214. int32_t Priority() const { return priority_; }
  215. // get total mem size
  216. size_t TotalMemSize() const { return runtime_param_.mem_size; }
  217. const map<uint32_t, MemInfo> &P2PMemInfos() const { return runtime_param_.memory_infos; }
  218. // model name
  219. string Name() const { return name_; }
  220. // om_name
  221. const string &OmName() const { return om_name_; }
  222. // dump_model_name
  223. const string &DumpModelName() const { return dump_model_name_; }
  224. // version
  225. uint32_t Version() const { return version_; }
  226. // get total weights mem size
  227. size_t TotalWeightsMemSize() const { return runtime_param_.weight_size; }
  228. size_t TotalVarMemSize() const { return runtime_param_.var_size; }
  229. // get base memory address
  230. uint8_t *MemBase() { return mem_base_; }
  231. // get weight base memory address
  232. uint8_t *WeightsMemBase() { return weights_mem_base_; }
  233. uint8_t *VarMemBase() { return var_mem_base_; }
  234. // get Event list
  235. const vector<rtEvent_t> &GetEventList() const { return event_list_; }
  236. const vector<rtStream_t> &GetStreamList() const { return stream_list_; }
  237. const vector<rtLabel_t> &GetLabelList() const { return label_list_; }
  238. Status GetLabelGotoAddr(uint32_t label_index, rtMemType_t memory_type, void *&addr, uint32_t &size);
  239. Status DestroyThread();
  240. // get Op
  241. OpDescPtr GetOpByIndex(uint32_t index) const {
  242. if (op_list_.find(index) == op_list_.end()) {
  243. return nullptr;
  244. }
  245. return op_list_.at(index);
  246. }
  247. void *GetGlobalStep() const { return global_step_addr_; }
  248. // get task info for profiling
  249. const vector<TaskDescInfo> &GetTaskDescInfo() const { return task_desc_info_; }
  250. // get updated task info list
  251. vector<TaskInfoPtr> GetTaskList() { return task_list_; }
  252. // Modified from KernelTaskInfo.
  253. SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; }
  254. rtModel_t GetRtModelHandle() const { return rt_model_handle_; }
  255. rtStream_t GetRtModelStream() const { return rt_model_stream_; }
  256. uint64_t GetRtBaseAddr() const { return runtime_param_.logic_mem_base; }
  257. uint64_t GetRtWeightAddr() const { return runtime_param_.logic_weight_base; }
  258. uint64_t GetRtVarAddr() const { return runtime_param_.logic_var_base; }
  259. uint32_t GetFlowctrlIndex(uint32_t op_index);
  260. void PushHcclStream(rtStream_t value);
  261. bool IsBroadCastOpData(const NodePtr &var_node);
  262. ///
  263. /// @ingroup ge
  264. /// @brief For TVM Op, avoid Addr Reuse.
  265. /// @return void*
  266. ///
  267. const char *GetRegisterStub(const string &tvm_binfile_key, const string &session_graph_model_id = "");
  268. ///
  269. /// @ingroup ge
  270. /// @brief get model input and output desc info
  271. /// @param [out] input_shape model input size
  272. /// @param [out] output_shape model output size
  273. /// @return execute result
  274. ///
  275. Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc);
  276. Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc,
  277. vector<uint32_t> &input_formats, vector<uint32_t> &output_formats, bool by_dims);
  278. ///
  279. /// @ingroup ge
  280. /// @brief Get dynamic batch_info
  281. /// @param [out] batch_info
  282. /// @param [out] dynamic_type
  283. /// @return execute result
  284. ///
  285. Status GetDynamicBatchInfo(vector<vector<int64_t>> &batch_info, int32_t &dynamic_type) const;
  286. ///
  287. /// @ingroup ge
  288. /// @brief Get combined dynamic dims info
  289. /// @param [out] batch_info
  290. /// @return None
  291. ///
  292. void GetCombinedDynamicDims(vector<vector<int64_t>> &batch_info) const;
  293. void GetUserDesignateShapeOrder(vector<string> &user_input_shape_order) const;
  294. void GetCurShape(vector<int64_t> &batch_info, int32_t &dynamic_type) const;
  295. Status GetOpAttr(const std::string &op_name, const std::string &attr_name, std::string &attr_value) const;
  296. void GetModelAttr(vector<string> &dynamic_output_shape_info) const;
  297. ///
  298. /// @ingroup ge
  299. /// @brief Get AIPP input info
  300. /// @param [in] index
  301. /// @param [out] aipp_info
  302. /// @return execute result
  303. ///
  304. Status GetAippInfo(uint32_t index, AippConfigInfo &aipp_info) const;
  305. Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) const;
  306. ///
  307. /// @ingroup ge
  308. /// @brief Get model_id.
  309. /// @return model_id
  310. ///
  311. uint32_t GetModelId() const { return model_id_; }
  312. ///
  313. /// @ingroup ge
  314. /// @brief get unique identification for op when load two or more models
  315. /// @param [in] op_desc : current op.
  316. /// @param [in] string identification: unique identification for current op.
  317. /// @return None
  318. ///
  319. void GetUniqueId(const OpDescPtr &op_desc, string &unique_identification);
  320. Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data);
  321. Status ReturnNoOutput(uint32_t data_id);
  322. Status ModelRunStart();
  323. ///
  324. /// @ingroup ge
  325. /// @brief stop run model
  326. /// @return Status
  327. ///
  328. Status ModelRunStop();
  329. ///
  330. /// @ingroup ge
  331. /// @brief model run flag
  332. /// @return Status
  333. ///
  334. bool RunFlag() const { return run_flg_; }
  335. ///
  336. /// @ingroup ge
  337. /// @brief Set Session Id
  338. /// @return void
  339. ///
  340. void SetSessionId(uint64_t session_id) { session_id_ = session_id; }
  341. ///
  342. /// @ingroup ge
  343. /// @brief Get Session Id
  344. /// @return sessionID
  345. ///
  346. uint64_t GetSessionId() const { return session_id_; }
  347. const struct error_message::Context &GetErrorContext() const { return error_context_; }
  348. ///
  349. /// @ingroup ge
  350. /// @brief SetDeviceId
  351. /// @return void
  352. ///
  353. void SetDeviceId(uint32_t device_id) { device_id_ = device_id; }
  354. ///
  355. /// @ingroup ge
  356. /// @brief Get device Id
  357. /// @return device id
  358. ///
  359. uint32_t GetDeviceId() const { return device_id_; }
  360. bool NeedDestroyAicpuKernel() const { return need_destroy_aicpu_kernel_; }
  361. Status UpdateSessionId(uint64_t session_id);
  362. const RuntimeParam &GetRuntimeParam() { return runtime_param_; }
  363. int32_t GetDataInputTid() const { return dataInputTid; }
  364. void SetDataInputTid(int32_t data_input_tid) { dataInputTid = data_input_tid; }
  365. void DisableZeroCopy(const void *addr);
  366. bool GetOpDugReg() const { return is_op_debug_reg_; }
  367. ///
  368. /// @ingroup ge
  369. /// @brief Save outside address of Data or NetOutput used info for ZeroCopy.
  370. /// @param [in] const OpDescPtr &op_desc: current op desc
  371. /// @param [in] const vector<void *> &outside_addrs: address of task
  372. /// @param [in] const void *args_offset: arguments address save the address.
  373. /// @return None.
  374. ///
  375. void SetZeroCopyAddr(const OpDescPtr &op_desc, const vector<void *> &outside_addrs, const void *info, void *args,
  376. size_t size, size_t offset);
  377. void SetDynamicSize(const vector<uint64_t> &batch_num, int32_t dynamic_type);
  378. bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; }
  379. void SetProfileTime(ModelProcStage stage, int64_t endTime = 0);
  380. int64_t GetLoadBeginTime() { return load_begin_time_; }
  381. int64_t GetLoadEndTime() { return load_end_time_; }
  382. void SaveSpecifyAttrValues(const OpDescPtr &op_desc);
  383. Status ReportProfilingData();
  384. void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) {
  385. exception_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id);
  386. }
  387. void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const shared_ptr<OpDesc> &op_desc, uintptr_t args) {
  388. data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args);
  389. }
  390. Status DumpExceptionInfo(const std::vector<rtExceptionInfo> &exception_infos) const {
  391. return exception_dumper_.DumpExceptionInfo(exception_infos);
  392. }
  393. void SetKnownShapeGlobalStep(void *global_step) {
  394. known_shape_global_step_ = global_step;
  395. }
  396. void DumperShrink() {
  397. data_dumper_.DumpShrink();
  398. }
  399. bool OpNeedDump(const string &op_name) {
  400. return GetDumpProperties().IsLayerNeedDump(dump_model_name_, om_name_, op_name);
  401. }
  402. bool ModelNeedDump();
  403. void SetEndGraphId(uint32_t task_id, uint32_t stream_id);
  404. DavinciModel &operator=(const DavinciModel &model) = delete;
  405. DavinciModel(const DavinciModel &model) = delete;
  406. const map<int64_t, vector<rtStream_t>> &GetHcclFolowStream() {
  407. return main_follow_stream_mapping_;
  408. }
  409. void SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream);
  410. void InitRuntimeParams();
  411. Status InitVariableMem();
  412. void UpdateMemBase(uint8_t *mem_base) {
  413. runtime_param_.mem_base = mem_base;
  414. mem_base_ = mem_base;
  415. }
  416. void SetTotalArgsSize(uint32_t args_size) { total_args_size_ += args_size; }
  417. uint32_t GetTotalArgsSize() { return total_args_size_; }
  418. void *GetCurrentArgsAddr(uint32_t offset) {
  419. void *cur_args = static_cast<char *>(args_) + offset;
  420. return cur_args;
  421. }
  422. void SetTotalIOAddrs(const vector<void *> &io_addrs);
  423. void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; }
  424. uint32_t GetHybridArgsSize() {
  425. return total_hybrid_args_size_;
  426. }
  427. void *GetCurrentHybridArgsAddr(uint32_t offset) {
  428. void *cur_args = static_cast<char *>(hybrid_addrs_) + offset;
  429. return cur_args;
  430. }
  431. void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size);
  432. int64_t GetFixedAddrsSize(string tensor_name);
  433. void *GetCurrentFixedAddr(int64_t offset) const {
  434. void *cur_addr = static_cast<char *>(fixed_addrs_) + offset;
  435. return cur_addr;
  436. }
  437. uint32_t GetFixedAddrOutputIndex(string tensor_name) {
  438. if (tensor_name_to_peer_output_index_.find(tensor_name) != tensor_name_to_peer_output_index_.end()) {
  439. return tensor_name_to_peer_output_index_[tensor_name];
  440. }
  441. return UINT32_MAX;
  442. }
  443. void SetKnownNode(bool known_node) { known_node_ = known_node; }
  444. bool IsKnownNode() { return known_node_; }
  445. Status MallocKnownArgs();
  446. Status CheckCapability(rtFeatureType_t featureType, int32_t featureInfo, bool &is_support) const;
  447. Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
  448. Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
  449. Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args = true);
  450. Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const;
  451. Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims,
  452. vector<InputOutputDims> &output_dims) const;
  453. // om file name
  454. void SetOmName(const string &om_name) { om_name_ = om_name; }
  455. void SetDumpModelName(const string &dump_model_name) { dump_model_name_ = dump_model_name; }
  456. void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); }
  457. const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); }
  458. bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const {
  459. return exception_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info);
  460. }
  461. void UpdateOpIOAddrs(uint32_t task_id, uint32_t stream_id, const std::vector<void *> &io_addrs);
  462. bool GetRunningFlag() const { return running_flg_; }
  463. void SetRunningFlag(bool flag) { running_flg_ = flag; }
  464. Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback);
  465. private:
  466. // memory address of weights
  467. uint8_t *weights_mem_base_;
  468. uint8_t *var_mem_base_;
  469. // memory address of model
  470. uintptr_t fixed_mem_base_; // Initial of mem_base_, keep forever.
  471. uint8_t *mem_base_;
  472. uint8_t *p2p_mem_base_;
  473. bool is_inner_mem_base_;
  474. bool is_inner_weight_base_;
  475. bool is_inner_p2p_mem_base_;
  476. // input data manager
  477. DataInputer *data_inputer_;
  478. int64_t load_begin_time_;
  479. int64_t load_end_time_;
  480. struct timeInfo time_info_;
  481. int32_t dataInputTid;
  482. void *GetRunAddress(void *addr) const;
  483. ///
  484. /// @ingroup ge
  485. /// @brief Copy Check input size and model op size.
  486. /// @param [in] const int64_t &input_size: input size.
  487. /// @param [in] const int64_t &op_size: model op size.
  488. /// @param [in] is_dynamic: dynamic batch input flag.
  489. /// @return true if success
  490. ///
  491. bool CheckInputAndModelSize(const int64_t &input_size, const int64_t &op_size, bool is_dynamic);
  492. ///
  493. /// @ingroup ge
  494. /// @brief Set copy only for No task feed NetOutput address.
  495. /// @return None.
  496. ///
  497. void SetCopyOnlyOutput();
  498. ///
  499. /// @ingroup ge
  500. /// @brief Copy Input/Output to model for direct use.
  501. /// @param [in] const InputData &input_data: user input data info.
  502. /// @param [in/out] OutputData &output_data: user output data info.
  503. /// @param [in] bool is_dynamic: whether is dynamic input, true: is dynamic input; false: not is dynamic input
  504. /// @return SUCCESS handle successfully / others handle failed
  505. ///
  506. Status CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic);
  507. ///
  508. /// @ingroup ge
  509. /// @brief Copy Data addr to model for direct use.
  510. /// @param [in] data_info: model memory addr/size map { data_index, { tensor_size, tensor_addr } }.
  511. /// @param [in] is_input: input data or output data
  512. /// @param [in] blobs: user input/output data list.
  513. /// @param [in] is_dynamic: whether is dynamic input, true: is dynamic input; false: not is dynamic input
  514. /// @param [in] batch_label: batch label for multi-batch scenes
  515. /// @return SUCCESS handle successfully / others handle failed
  516. ///
  517. Status UpdateIoTaskArgs(const map<uint32_t, ZeroCopyOffset> &data_info, bool is_input,
  518. const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label);
  519. Status CopyInputData(const InputData &input_data);
  520. Status CopyOutputData(uint32_t data_id, OutputData &output_data, rtMemcpyKind_t kind);
  521. Status SyncVarData();
  522. Status InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size);
  523. Status InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size);
  524. void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, ShapeDescription &shape1, ShapeDescription &shape2);
  525. void SetInputDimsInfo(const vector<int64_t> &input_dims, Format &format, ShapeDescription &shape_info);
  526. Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<uint32_t> &input_formats, bool by_dims) const;
  527. Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &output_formats) const;
  528. Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo);
  529. void UnbindHcomStream();
  530. Status DistributeTask();
  531. void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task,
  532. const domi::TaskDef &task_def, size_t task_index);
  533. uint8_t *MallocFeatureMapMem(size_t data_size);
  534. uint8_t *MallocWeightsMem(size_t weights_size);
  535. uint8_t *MallocP2PMem(size_t p2p_data_size);
  536. void FreeFeatureMapMem();
  537. void FreeWeightsMem();
  538. void FreeP2PMem();
  539. void ReleaseTask();
  540. void ClearTaskAddrs();
  541. void UnbindTaskSinkStream();
  542. bool IsAicpuKernelConnectSpecifiedLayer();
  543. ///
  544. /// @ingroup ge
  545. /// @brief Reduce memory usage after task sink.
  546. /// @return: void
  547. ///
  548. void Shrink();
  549. ///
  550. /// @ingroup ge
  551. /// @brief Travel all nodes and do some init.
  552. /// @param [in] compute_graph: ComputeGraph to load.
  553. /// @return Status
  554. ///
  555. Status InitNodes(const ComputeGraphPtr &compute_graph);
  556. ///
  557. /// @ingroup ge
  558. /// @brief Data Op Initialize.
  559. /// @param [in] ComputeGraphPtr: root graph of the model.
  560. /// @param [in] NodePtr: Data Op.
  561. /// @param [in/out] data_op_index: index of courrent count.
  562. /// @param [in/out] data_by_index: Data ordered by index.
  563. /// @return Status
  564. ///
  565. Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index,
  566. map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs);
  567. ///
  568. /// @ingroup ge
  569. /// @brief Sort Data op list by index.
  570. /// @param [in] data_by_index: map of Data Op.
  571. /// @param [in] output_op_list: list of NetOutput op.
  572. /// @return Status
  573. ///
  574. Status GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, const vector<OpDescPtr> &output_op_list);
  575. ///
  576. /// @ingroup ge
  577. /// @brief NetOutput Op Initialize.
  578. /// @param [in] ComputeGraphPtr: root graph of the model.
  579. /// @param [in] NodePtr: NetOutput Op.
  580. /// @param [in/out] vector<OpDescPtr>: All NetOutput node in model.
  581. /// @return Status
  582. ///
  583. Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list,
  584. set<const void *> &output_outside_addrs);
  585. ///
  586. /// @ingroup ge
  587. /// @brief Constant Op Init.
  588. /// @return Status
  589. ///
  590. Status InitConstant(const OpDescPtr &op_desc);
  591. Status InitVariable(const OpDescPtr &op_desc, map<string, OpDescPtr> &variable_by_name);
  592. /// @ingroup ge
  593. /// @brief LabelSet Op Initialize.
  594. /// @param [in] op_desc: LabelSet Op descriptor.
  595. /// @return Status
  596. Status InitLabelSet(const OpDescPtr &op_desc);
  597. Status InitStreamSwitch(const OpDescPtr &op_desc);
  598. Status InitStreamActive(const OpDescPtr &op_desc);
  599. Status InitStreamSwitchN(const OpDescPtr &op_desc);
  600. ///
  601. /// @ingroup ge
  602. /// @brief Case Op Init.
  603. /// @return Status
  604. ///
  605. Status InitCase(const OpDescPtr &op_desc);
  606. Status SetDynamicBatchInfo(const OpDescPtr &op_desc, uint32_t batch_num);
  607. ///
  608. /// @ingroup ge
  609. /// @brief TVM Op Init.
  610. /// @return Status
  611. ///
  612. Status InitTbeHandle(const OpDescPtr &op_desc);
  613. void StoreTbeHandle(const string &handle_key);
  614. void CleanTbeHandle();
  615. ///
  616. /// @ingroup ge
  617. /// @brief Make active stream list and bind to model.
  618. /// @return: 0 for success / others for fail
  619. ///
  620. Status BindModelStream();
  621. ///
  622. /// @ingroup ge
  623. /// @brief Init model stream for NN model.
  624. /// @return Status
  625. ///
  626. Status InitModelStream(rtStream_t stream);
  627. ///
  628. /// @ingroup ge
  629. /// @brief ACL, Load task list with queue entrance.
  630. /// @return: 0 for success / others for fail
  631. ///
  632. Status LoadWithQueue();
  633. ///
  634. /// @ingroup ge
  635. /// @brief ACL, Bind Data Op addr to input queue.
  636. /// @return: 0 for success / others for fail
  637. ///
  638. Status BindInputQueue();
  639. Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs);
  640. ///
  641. /// @ingroup ge
  642. /// @brief ACL, Bind NetOutput Op addr to output queue.
  643. /// @return: 0 for success / others for fail
  644. ///
  645. Status BindOutputQueue();
  646. Status CpuModelPrepareOutput(uintptr_t addr, uint32_t size);
  647. ///
  648. /// @ingroup ge
  649. /// @brief definiteness queue schedule, bind input queue to task.
  650. /// @param [in] queue_id: input queue id from user.
  651. /// @param [in] addr: Data Op output tensor address.
  652. /// @param [in] size: Data Op output tensor size.
  653. /// @return: 0 for success / others for fail
  654. ///
  655. Status CpuModelDequeue(uint32_t queue_id);
  656. ///
  657. /// @ingroup ge
  658. /// @brief definiteness queue schedule, bind output queue to task.
  659. /// @param [in] queue_id: output queue id from user.
  660. /// @param [in] addr: NetOutput Op input tensor address.
  661. /// @param [in] size: NetOutput Op input tensor size.
  662. /// @return: 0 for success / others for fail
  663. ///
  664. Status CpuModelEnqueue(uint32_t queue_id, uintptr_t addr, uint32_t size);
  665. ///
  666. /// @ingroup ge
  667. /// @brief definiteness queue schedule, active original model stream.
  668. /// @return: 0 for success / others for fail
  669. ///
  670. Status CpuActiveStream();
  671. ///
  672. /// @ingroup ge
  673. /// @brief definiteness queue schedule, wait for end graph.
  674. /// @return: 0 for success / others for fail
  675. ///
  676. Status CpuWaitEndGraph();
  677. Status BindEnqueue();
  678. Status CpuModelEnqueue(uint32_t queue_id, uintptr_t out_mbuf);
  679. ///
  680. /// @ingroup ge
  681. /// @brief definiteness queue schedule, repeat run model.
  682. /// @return: 0 for success / others for fail
  683. ///
  684. Status CpuModelRepeat();
  685. Status InitEntryTask();
  686. Status AddHeadStream();
  687. ///
  688. /// @ingroup ge
  689. /// @brief set ts device.
  690. /// @return: 0 for success / others for fail
  691. ///
  692. Status SetTSDevice();
  693. Status OpDebugRegister();
  694. void OpDebugUnRegister();
  695. void CheckHasHcomOp(const ComputeGraphPtr &graph);
  696. Status DoTaskSink();
  697. void CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result);
  698. Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id);
  699. void SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name);
  700. Status InitL1DataDumperArgs();
  701. Status InitModelProfile();
  702. Status SinkModelProfile();
  703. Status SinkTimeProfile(const InputData &current_data);
  704. Status InitOutputTensorInfo(const OpDescPtr &op_desc);
  705. Status GenOutputTensorInfo(OutputData *output_data, vector<ge::Tensor> &outputs);
  706. Status InitInputDescInfo(const OpDescPtr &op_desc);
  707. Status InitOutputDescInfo(const OpDescPtr &op_desc, const vector<string> &out_node_name);
  708. Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc);
  709. Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc);
  710. Status InitAippType(uint32_t index, const OpDescPtr &op_desc, const map<uint32_t, OpDescPtr> &data_list);
  711. Status InitAippInputOutputDims(uint32_t index, const OpDescPtr &op_desc);
  712. void ParseAIPPInfo(string in_out_info, InputOutputDims &dims_info);
  713. void SetLabelForDynamic(const NodePtr &node);
  714. void ParseDynamicOutShape(const vector<string> &str_info, vector<vector<int64_t>> &vec_info);
  715. bool IsGetNextSinkDynamic(const OpDescPtr &op_desc);
  716. Status InitRealSizeAndShapeInfo(const ComputeGraphPtr &compute_graph, const NodePtr &node);
  717. void GetAllGearsInfo(const NodePtr &node);
  718. Status GetGetDynamicDimsNodeInfo(const NodePtr &node);
  719. Status GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node);
  720. Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node);
  721. Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node);
  722. bool is_weight_mem_has_inited_;
  723. bool is_feature_map_mem_has_inited_;
  724. uint32_t model_id_;
  725. uint32_t runtime_model_id_;
  726. uint32_t sub_model_id_ = 0;
  727. string name_;
  728. // used for inference data dump
  729. string om_name_;
  730. string dump_model_name_;
  731. uint32_t version_;
  732. GeModelPtr ge_model_; // release after DavinciModel::Init
  733. bool need_destroy_aicpu_kernel_{false};
  734. map<uint32_t, OpDescPtr> op_list_; // release after DavinciModel::Init
  735. map<string, GeTensorDesc> broadcast_variable_;
  736. void *global_step_addr_{nullptr};
  737. uint64_t global_step_size_{0};
  738. map<uint32_t, ZeroCopyOffset> input_data_info_;
  739. map<uint32_t, ZeroCopyOffset> output_data_info_;
  740. set<const void *> real_virtual_addrs_;
  741. // output op: save cce op actual needed memory size
  742. vector<int64_t> output_memory_size_list_;
  743. thread thread_id_;
  744. shared_ptr<ModelListener> listener_;
  745. bool run_flg_;
  746. // check whether model is running with data
  747. bool running_flg_ = false;
  748. mutex mux_run_flg_;
  749. int32_t priority_;
  750. vector<rtStream_t> stream_list_;
  751. mutex all_hccl_stream_list_mutex_;
  752. vector<rtStream_t> all_hccl_stream_list_;
  753. // for reuse hccl_follow_stream
  754. mutex capacity_of_stream_mutex_;
  755. map<int64_t, vector<rtStream_t>> main_follow_stream_mapping_;
  756. vector<rtEvent_t> event_list_;
  757. vector<rtLabel_t> label_list_;
  758. set<uint32_t> label_id_indication_;
  759. mutex label_args_mutex_;
  760. map<uint32_t, pair<void *, uint32_t>> label_goto_args_;
  761. mutex outside_addrs_mutex_;
  762. vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr.
  763. set<const void *> copy_only_addrs_; // Address need copy to original place.
  764. vector<TaskInfoPtr> task_list_;
  765. // rt_moodel_handle
  766. rtModel_t rt_model_handle_;
  767. rtStream_t rt_model_stream_;
  768. bool is_inner_model_stream_;
  769. bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_.
  770. ExecuteMode last_execute_mode_;
  771. bool is_stream_list_bind_{false};
  772. bool is_pure_head_stream_{false};
  773. rtStream_t rt_head_stream_{nullptr};
  774. rtStream_t rt_entry_stream_{nullptr};
  775. rtAicpuDeployType_t deploy_type_{AICPU_DEPLOY_RESERVED};
  776. // ACL queue schedule, save queue ids for Init.
  777. vector<TaskInfoPtr> cpu_task_list_;
  778. vector<uint32_t> input_queue_ids_; // input queue ids created by caller.
  779. vector<uint32_t> output_queue_ids_; // output queue ids created by caller.
  780. vector<uintptr_t> input_mbuf_list_; // input mbuf created by dequeue task.
  781. vector<uintptr_t> output_mbuf_list_; // output mbuf created by dequeue task.
  782. uint64_t session_id_;
  783. struct error_message::Context error_context_;
  784. uint32_t device_id_;
  785. mutex flowctrl_op_index_internal_map_mutex_;
  786. map<uint32_t, uint32_t> flowctrl_op_index_internal_map_;
  787. vector<rtStream_t> active_stream_list_;
  788. set<uint32_t> active_stream_indication_;
  789. set<uint32_t> hcom_streams_;
  790. RuntimeParam runtime_param_;
  791. static mutex tvm_bin_mutex_;
  792. set<string> tvm_bin_kernel_;
  793. map<string, uint32_t> used_tbe_handle_map_;
  794. // for profiling task and graph info
  795. vector<TaskDescInfo> task_desc_info_;
  796. std::map<std::string, std::pair<uint32_t, uint32_t>> profiler_report_op_info_;
  797. int64_t maxDumpOpNum_;
  798. // for data dump
  799. DataDumper data_dumper_;
  800. ExceptionDumper exception_dumper_;
  801. OpdebugRegister opdebug_register_;
  802. uint64_t iterator_count_;
  803. bool is_l1_fusion_enable_;
  804. map<OpDescPtr, void *> saved_task_addrs_; // release after DavinciModel::Init
  805. void *l1_fusion_addr_ = nullptr;
  806. bool known_node_ = false;
  807. uint32_t total_args_size_ = 0;
  808. void *args_ = nullptr;
  809. void *args_host_ = nullptr;
  810. void *fixed_addrs_ = nullptr;
  811. void *hybrid_addrs_ = nullptr;
  812. uint32_t total_hybrid_args_size_ = 0;
  813. int64_t total_fixed_addr_size_ = 0;
  814. map<const void *, void *> known_input_data_info_;
  815. map<const void *, void *> known_output_data_info_;
  816. vector<void *> total_io_addrs_;
  817. vector<vector<int64_t>> batch_info_;
  818. vector<vector<int64_t>> combined_batch_info_;
  819. vector<string> user_designate_shape_order_;
  820. int32_t dynamic_type_ = 0;
  821. bool is_dynamic_ = false;
  822. vector<uint64_t> batch_size_;
  823. // key: input tensor name, generally rts op;
  824. // value: the fixed addr of input anchor, same as the peer output anchor addr of the peer op
  825. map<string, int64_t> tensor_name_to_fixed_addr_size_;
  826. // key: input tensor name, generally rts op; value: the peer output anchor of the peer op
  827. map<string, int64_t> tensor_name_to_peer_output_index_;
  828. // if model is first execute
  829. bool is_first_execute_;
  830. // for op debug
  831. mutex debug_reg_mutex_;
  832. bool is_op_debug_reg_ = false;
  833. bool is_online_infer_dynamic_ = false;
  834. bool is_getnext_sink_dynamic_ = false;
  835. vector<int32_t> cur_dynamic_dims_;
  836. void *netoutput_last_input_addr_ = nullptr;
  837. int64_t netoutput_last_input_size_ = 0;
  838. size_t shape_of_cur_dynamic_dims_ = 0;
  839. // key: input_index: input is merge node; value: each gear info and each output size
  840. map<size_t, map<vector<int32_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_;
  841. // key: input_index: input is merge node; value: each gear info and each output shape
  842. map<size_t, map<vector<int32_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
  843. vector<vector<int32_t>> all_gears_info_;
  844. multimap<uint32_t, uint32_t> op_id_map_;
  845. vector<ProfileInfo> profile_list_;
  846. // For super kernel.
  847. SuperKernelTaskInfo skt_info_;
  848. bool has_output_node_ = false;
  849. bool is_dynamic_aipp_ = false;
  850. vector<string> dynamic_output_shape_info_;
  851. vector<vector<void *>> input_addrs_list_;
  852. vector<vector<void *>> output_addrs_list_;
  853. vector<int64_t> output_buffer_size_;
  854. vector<GeShape> output_shape_info_;
  855. map<uint32_t, OriginInputInfo> orig_input_info_;
  856. map<uint32_t, AippConfigInfo> aipp_info_list_;
  857. map<uint32_t, pair<InputAippType, size_t>> aipp_type_list_;
  858. map<uint32_t, pair<vector<InputOutputDims>, vector<InputOutputDims>>> aipp_dims_info_;
  859. vector<InputOutputDescInfo> input_descs_;
  860. vector<InputOutputDescInfo> input_descs_dims_;
  861. vector<uint32_t> input_formats_;
  862. vector<InputOutputDescInfo> output_descs_;
  863. vector<uint32_t> output_formats_;
  864. // known shape node for dump
  865. void *known_shape_global_step_;
  866. // op name to attrs mapping
  867. std::map<std::string, std::map<std::string, std::vector<std::string>>> op_name_to_attrs_;
  868. };
  869. } // namespace ge
  870. #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示