You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

prof_acl_api.h 13 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MSPROFILER_API_PROF_ACL_API_H_
  17. #define MSPROFILER_API_PROF_ACL_API_H_
  18. #define MSVP_MAX_DEV_NUM 64
  19. #define MSVP_PROF_API __attribute__((visibility("default")))
  20. // DataTypeConfig
  21. #define PROF_ACL_API 0x0001
  22. #define PROF_TASK_TIME 0x0002
  23. #define PROF_AICORE_METRICS 0x0004
  24. #define PROF_AICPU_TRACE 0x0008
  25. #define PROF_MODEL_EXECUTE 0x0010
  26. #define PROF_RUNTIME_API 0x0020
  27. #define PROF_RUNTIME_TRACE 0x0040
  28. #define PROF_SCHEDULE_TIMELINE 0x0080
  29. #define PROF_SCHEDULE_TRACE 0x0100
  30. #define PROF_AIVECTORCORE_METRICS 0x0200
  31. #define PROF_SUBTASK_TIME 0x0400
  32. #define PROF_TRAINING_TRACE 0x0800
  33. #define PROF_HCCL_TRACE 0x1000
  34. #define PROF_DATA_PROCESS 0x2000
  35. #define PROF_TASK_TRACE 0x3842
  36. #define PROF_MODEL_LOAD 0x8000000000000000
  37. // DataTypeConfig MASK
  38. #define PROF_ACL_API_MASK 0x0001
  39. #define PROF_TASK_TIME_MASK 0x0002
  40. #define PROF_AICORE_METRICS_MASK 0x0004
  41. #define PROF_AICPU_TRACE_MASK 0x0008
  42. #define PROF_MODEL_EXECUTE_MASK 0x0010
  43. #define PROF_RUNTIME_API_MASK 0x0020
  44. #define PROF_RUNTIME_TRACE_MASK 0x0040
  45. #define PROF_SCHEDULE_TIMELINE_MASK 0x0080
  46. #define PROF_SCHEDULE_TRACE_MASK 0x0100
  47. #define PROF_AIVECTORCORE_METRICS_MASK 0x0200
  48. #define PROF_SUBTASK_TIME_MASK 0x0400
  49. #define PROF_TRAINING_TRACE_MASK 0x0800
  50. #define PROF_HCCL_TRACE_MASK 0x1000
  51. #define PROF_DATA_PROCESS_MASK 0x2000
  52. #define PROF_MODEL_LOAD_MASK 0x8000000000000000
  53. #include <cstdint>
  54. #include <string>
  55. /**
  56. * @name ProrErrorCode
  57. * @brief error code enum of prof_acl_apis
  58. */
  59. enum ProfErrorCode {
  60. PROF_ERROR_NONE = 0, // ok
  61. PROF_ERROR_PARAM_INVALID, // param invalid, for example nullptr
  62. PROF_ERROR_REPEAT_INIT, // profiling has already been inited
  63. PROF_ERROR_CONFIG_INVALID, // config invalid, for example invalid json string
  64. PROF_ERROR_DIR_NO_ACCESS, // dir is not accessable
  65. PROF_ERROR_FAILURE, // failed to init or start profiling
  66. PROF_ERROR_NOT_INITED, // profiling has not been inited
  67. PROF_ERROR_DEVICE_INVALID, // device id invalid
  68. PROF_ERROR_UNSUPPORTED, // unsupported data type or ai core metrics
  69. PROF_ERROR_REPEAT_START, // profiilng has already been started
  70. PROF_ERROR_NOT_STARTED, // profiling has not been started
  71. PROF_ERROR_REPEAT_SUBSCRIBE, // same model id has already been subscribed
  72. PROF_ERROR_MODEL_ID_INVALID, // model id does not exist or has not been subscribed
  73. PROF_ERROR_API_CONFLICT, // prof ctrl api mode conflicts with subscribe mode
  74. };
  75. /**
  76. * @brief transfer profiling config in acl.json to sample config
  77. * @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...}
  78. * @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]}
  79. * @return ProfErrorCode
  80. */
  81. MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg);
  82. /**
  83. * @name ProfInit
  84. * @brief init profiling
  85. * @param profInitCfg [IN] config of init profiling of json format
  86. * @return ProfErrorCode
  87. */
  88. MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg);
  89. /**
  90. * @name ProfAicoreMetrics
  91. * @brief aicore metrics enum
  92. */
  93. enum ProfAicoreMetrics {
  94. PROF_AICORE_ARITHMATIC_THROUGHPUT = 0,
  95. PROF_AICORE_PIPELINE = 1,
  96. PROF_AICORE_SYNCHRONIZATION = 2,
  97. PROF_AICORE_MEMORY = 3,
  98. PROF_AICORE_INTERNAL_MEMORY = 4,
  99. PROF_AICORE_STALL = 5,
  100. PROF_AICORE_METRICS_COUNT,
  101. PROF_AICORE_NONE = 0xff,
  102. };
  103. /**
  104. * @name ProfConfig
  105. * @brief struct of ProfStart
  106. */
  107. struct ProfConfig {
  108. uint32_t devNums; // length of device id list
  109. uint32_t devIdList[MSVP_MAX_DEV_NUM]; // physical device id list
  110. ProfAicoreMetrics aicoreMetrics; // aicore metric
  111. uint64_t dataTypeConfig; // data type to start profiling
  112. };
  113. /**
  114. * @name ProfStartProfiling
  115. * @brief start profiling
  116. * @param profStartCfg [IN] config to start profiling
  117. * @return ProfErrorCode
  118. */
  119. MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg);
  120. /**
  121. * @name ProfStopProfiling
  122. * @brief stop profiling
  123. * @param profStopCfg [IN] config to stop profiling
  124. * @return ProfErrorCode
  125. */
  126. MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg);
  127. /**
  128. * @name ProfFinalize
  129. * @brief finalize profiling task
  130. * @return ProfErrorCode
  131. */
  132. MSVP_PROF_API int32_t ProfFinalize();
  133. /**
  134. * @name ProfGetDataTypeConfig
  135. * @brief get dataTypeConfig started with of one device
  136. * @param deviceId [IN] deviceId to get dataTypeConfig
  137. * @param dataTypeConfig [OUT] result get
  138. * @return ProfErrorCode
  139. */
  140. MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig);
  141. namespace Msprofiler {
  142. namespace Api {
  143. /**
  144. * @brief transfer profiling config in acl.json to sample config
  145. * @param aclCfg [IN] profiling json string from acl.json as {"switch":"on", "result_path":"/home",...}
  146. * @param sampleCfg [OUT] json string for GE as {"startCfg":[{"deviceID":"all","jobID":"1234",...}]}
  147. * @return ProfErrorCode
  148. */
  149. MSVP_PROF_API int32_t ProfAclCfgToSampleCfg(const std::string &aclCfg, std::string &sampleCfg);
  150. /**
  151. * @name ProfInit
  152. * @brief init profiling
  153. * @param profInitCfg [IN] config of init profiling of json format
  154. * @return ProfErrorCode
  155. */
  156. MSVP_PROF_API int32_t ProfInit(const std::string &profInitCfg);
  157. /**
  158. * @name ProfStartProfiling
  159. * @brief start profiling
  160. * @param profStartCfg [IN] config to start profiling
  161. * @return ProfErrorCode
  162. */
  163. MSVP_PROF_API int32_t ProfStartProfiling(const ProfConfig *profStartCfg);
  164. /**
  165. * @name ProfStopProfiling
  166. * @brief stop profiling
  167. * @param profStopCfg [IN] config to stop profiling
  168. * @return ProfErrorCode
  169. */
  170. MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg);
  171. /**
  172. * @name ProfFinalize
  173. * @brief finalize profiling task
  174. * @return ProfErrorCode
  175. */
  176. MSVP_PROF_API int32_t ProfFinalize();
  177. /**
  178. * @name ProfGetDataTypeConfig
  179. * @brief get dataTypeConfig started with of one device
  180. * @param deviceId [IN] deviceId to get dataTypeConfig
  181. * @param dataTypeConfig [OUT] result get
  182. * @return ProfErrorCode
  183. */
  184. MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig);
  185. /**
  186. * @name WorkMode
  187. * @brief profiling api work mode
  188. */
  189. enum WorkMode {
  190. WORK_MODE_OFF, // profiling not at work
  191. WORK_MODE_API_CTRL, // profiling work on api ctrl mode, (ProfInit)
  192. WORK_MODE_SUBSCRIBE, // profiling work on subscribe mode
  193. };
  194. /**
  195. * @name ProfGetApiWorkMode
  196. * @brief get profiling api work mode
  197. * @return WorkMode
  198. */
  199. MSVP_PROF_API WorkMode ProfGetApiWorkMode();
  200. /**
  201. * @name ProfSubscribeConfig
  202. * @brief config of subscribe api
  203. */
  204. struct ProfSubscribeConfig {
  205. bool timeInfo; // subscribe op time
  206. ProfAicoreMetrics aicoreMetrics; // subscribe ai core metrics
  207. void* fd; // pipe fd
  208. };
  209. /**
  210. * @name ProfGetDataTypeConfig
  211. * @brief get DataTypeConfig of subscribe
  212. * @param profSubscribeConfig [IN] config to subscribe data
  213. * @return DataTypeConfig
  214. */
  215. MSVP_PROF_API uint64_t ProfGetDataTypeConfig(const ProfSubscribeConfig *profSubscribeConfig);
  216. /**
  217. * @name ProfModelSubscribe
  218. * @brief subscribe data of one model id
  219. * @param modelId [IN] model id to subscribe data
  220. * @param devId [IN] device id of model
  221. * @param profSubscribeConfig [IN] config to subscribe data
  222. * @return ProfErrorCode
  223. */
  224. MSVP_PROF_API int32_t ProfModelSubscribe(uint32_t modelId, uint32_t devId,
  225. const ProfSubscribeConfig *profSubscribeConfig);
  226. /**
  227. * @name ProfIsModelSubscribed
  228. * @brief check if a model id is subscribed
  229. * @param modeiId [IN] modei id to check
  230. * @return true: subscribed, false: not
  231. */
  232. MSVP_PROF_API bool ProfIsModelSubscribed(uint32_t modelId);
  233. /**
  234. * @name ProfModelUnSubscribe
  235. * @brief unsubscribe a model id
  236. * @param modeiId [IN] modei id to unsubscribe
  237. * @return ProfErrorCode
  238. */
  239. MSVP_PROF_API int32_t ProfModelUnSubscribe(uint32_t modelId);
  240. /**
  241. * @name ProfGetOpDescSize
  242. * @brief get profiling data struct size
  243. * @param opDescSize [OUT] bytes of profiling subscribe data struct
  244. * @return ProfErrorCode
  245. */
  246. MSVP_PROF_API int32_t ProfGetOpDescSize(uint32_t *opDescSize);
  247. /**
  248. * @name ProfGetOpNum
  249. * @brief get how many op data there are in data
  250. * @param data [IN] data read from pipe
  251. * @param len [IN] data length
  252. * @param opNum [OUT] number of op in data
  253. * @return ProfErrorCode
  254. */
  255. MSVP_PROF_API int32_t ProfGetOpNum(const void *data, uint32_t len, uint32_t *opNum);
  256. /**
  257. * @name ProfGetModelId
  258. * @brief get model id of specific part of data
  259. * @param data [IN] data read from pipe
  260. * @param len [IN] data length
  261. * @param index [IN] index of part(op)
  262. * @return model id
  263. */
  264. MSVP_PROF_API uint32_t ProfGetModelId(const void *data, uint32_t len, uint32_t index);
  265. /**
  266. * @name ProfGetOpType
  267. * @brief get op type of specific part of data
  268. * @param data [IN] data read from pipe
  269. * @param len [IN] data length
  270. * @param opType [OUT] op type buffer
  271. * @param opTypeLen [IN] buffer size of param opType
  272. * @param index [IN] index of part(op)
  273. * @return ProfErrorCode
  274. */
  275. MSVP_PROF_API int32_t ProfGetOpType(const void *data, uint32_t len, char *opType, uint32_t opTypeLen, uint32_t index);
  276. /**
  277. * @name ProfGetOpName
  278. * @brief get op name of specific part of data
  279. * @param data [IN] data read from pipe
  280. * @param len [IN] data length
  281. * @param opType [OUT] op name buffer
  282. * @param opTypeLen [IN] buffer size of param opName
  283. * @param index [IN] index of part(op)
  284. * @return ProfErrorCode
  285. */
  286. MSVP_PROF_API int32_t ProfGetOpName(const void *data, uint32_t len, char *opName, uint32_t opNameLen, uint32_t index);
  287. /**
  288. * @name ProfGetOpStart
  289. * @brief get op start timestamp of specific part of data
  290. * @param data [IN] data read from pipe
  291. * @param len [IN] data length
  292. * @param index [IN] index of part(op)
  293. * @return op start timestamp (us)
  294. */
  295. MSVP_PROF_API uint64_t ProfGetOpStart(const void *data, uint32_t len, uint32_t index);
  296. /**
  297. * @name ProfGetOpEnd
  298. * @brief get op end timestamp of specific part of data
  299. * @param data [IN] data read from pipe
  300. * @param len [IN] data length
  301. * @param index [IN] index of part(op)
  302. * @return op end timestamp (us)
  303. */
  304. MSVP_PROF_API uint64_t ProfGetOpEnd(const void *data, uint32_t len, uint32_t index);
  305. /**
  306. * @name ProfGetOpDuration
  307. * @brief get op duration of specific part of data
  308. * @param data [IN] data read from pipe
  309. * @param len [IN] data length
  310. * @param index [IN] index of part(op)
  311. * @return op duration (us)
  312. */
  313. MSVP_PROF_API uint64_t ProfGetOpDuration(const void *data, uint32_t len, uint32_t index);
  314. /**
  315. * @name ProfGetOpExecutionTime
  316. * @brief get op execution time of specific part of data
  317. * @param data [IN] data read from pipe
  318. * @param len [IN] data length
  319. * @param index [IN] index of part(op)
  320. * @return op execution time (us)
  321. */
  322. MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);
  323. /**
  324. * @name ProfGetOpCubeOps
  325. * @brief get op cube fops of specific part of data
  326. * @param data [IN] data read from pipe
  327. * @param len [IN] data length
  328. * @param index [IN] index of part(op)
  329. * @return op cube fops
  330. */
  331. MSVP_PROF_API uint64_t ProfGetOpCubeOps(const void *data, uint32_t len, uint32_t index);
  332. /**
  333. * @name ProfGetOpVectorOps
  334. * @brief get op vector fops of specific part of data
  335. * @param data [IN] data read from pipe
  336. * @param len [IN] data length
  337. * @param index [IN] index of part(op)
  338. * @return op vector fops
  339. */
  340. MSVP_PROF_API uint64_t ProfGetOpVectorOps(const void *data, uint32_t len, uint32_t index);
  341. } // namespace Api
  342. } // namespace Msprofiler
  343. #endif // MSPROFILER_API_PROF_ACL_API_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示