You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

prof_common.h 13 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
  3. * Description: handle perf data
  4. * Author: Huawei Technologies Co., Ltd.
  5. * Create: 2019-10-13
  6. */
  7. #ifndef MSPROFILER_PROF_COMMON_H_
  8. #define MSPROFILER_PROF_COMMON_H_
  9. #ifdef __cplusplus
  10. extern "C" {
  11. #endif // __cplusplus
  12. #include <stdint.h>
  13. #define MSPROF_DATA_HEAD_MAGIC_NUM 0x5a5a
  14. enum MsprofDataTag {
  15. MSPROF_ACL_DATA_TAG = 0, //acl data tag, range: 0~19
  16. MSPROF_GE_DATA_TAG_MODEL_LOAD = 20, //ge data tag, range: 20~39
  17. MSPROF_GE_DATA_TAG_FUSION = 21,
  18. MSPROF_GE_DATA_TAG_INFER = 22,
  19. MSPROF_GE_DATA_TAG_TASK = 23,
  20. MSPROF_GE_DATA_TAG_TENSOR = 24,
  21. MSPROF_GE_DATA_TAG_STEP = 25,
  22. MSPROF_GE_DATA_TAG_ID_MAP = 26,
  23. MSPROF_GE_DATA_TAG_HOST_SCH = 27,
  24. MSPROF_RUNTIME_DATA_TAG_API = 40, //runtime data tag, range: 40~59
  25. MSPROF_RUNTIME_DATA_TAG_TRACK = 41,
  26. MSPROF_AICPU_DATA_TAG = 60, //aicpu data tag, range: 60~79
  27. MSPROF_HCCL_DATA_TAG = 80, //hccl data tag, range: 80~99
  28. MSPROF_DP_DATA_TAG = 100, //dp data tag, range: 100~119
  29. MSPROF_MSPROFTX_DATA_TAG = 120, //hccl data tag, range: 120~139
  30. MSPROF_DATA_TAG_MAX = 65536, //data tag value type is uint16_t
  31. };
  32. /**
  33. * @brief struct of mixed data
  34. */
  35. #define MSPROF_MIX_DATA_RESERVE_BYTES 7
  36. #define MSPROF_MIX_DATA_STRING_LEN 120
  37. enum MsprofMixDataType {
  38. MSPROF_MIX_DATA_HASH_ID = 0,
  39. MSPROF_MIX_DATA_STRING,
  40. };
  41. struct MsprofMixData {
  42. uint8_t type; // MsprofMixDataType
  43. uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES];
  44. union {
  45. uint64_t hashId;
  46. char dataStr[MSPROF_MIX_DATA_STRING_LEN];
  47. } data;
  48. };
  49. using MixData = struct MsprofMixData;
  50. /**
  51. * @brief profiling command info
  52. */
  53. #define MSPROF_MAX_DEV_NUM 64
  54. struct MsprofCommandHandle {
  55. uint64_t profSwitch;
  56. uint64_t profSwitchHi;
  57. uint32_t devNums;
  58. uint32_t devIdList[MSPROF_MAX_DEV_NUM];
  59. uint32_t modelId;
  60. uint32_t type;
  61. };
  62. /**
  63. * @brief struct of data reported by acl
  64. */
  65. #define MSPROF_ACL_DATA_RESERVE_BYTES 32
  66. #define MSPROF_ACL_API_NAME_LEN 64
  67. enum MsprofAclApiType {
  68. MSPROF_ACL_API_TYPE_OP = 1,
  69. MSPROF_ACL_API_TYPE_MODEL,
  70. MSPROF_ACL_API_TYPE_RUNTIME,
  71. MSPROF_ACL_API_TYPE_OTHERS,
  72. };
  73. struct MsprofAclProfData {
  74. uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
  75. uint16_t dataTag = MSPROF_ACL_DATA_TAG;
  76. uint32_t apiType; // enum MsprofAclApiType
  77. uint64_t beginTime;
  78. uint64_t endTime;
  79. uint32_t processId;
  80. uint32_t threadId;
  81. char apiName[MSPROF_ACL_API_NAME_LEN];
  82. uint8_t reserve[MSPROF_ACL_DATA_RESERVE_BYTES];
  83. };
  84. /**
  85. * @brief struct of data reported by GE
  86. */
  87. #define MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES 104
  88. struct MsprofGeProfModelLoadData {
  89. uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
  90. uint16_t dataTag = MSPROF_GE_DATA_TAG_MODEL_LOAD;
  91. uint32_t modelId;
  92. MixData modelName;
  93. uint64_t startTime;
  94. uint64_t endTime;
  95. uint8_t reserve[MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES];
  96. };
  97. #define MSPROF_GE_FUSION_DATA_RESERVE_BYTES 8
  98. #define MSPROF_GE_FUSION_OP_NUM 8
  99. struct MsprofGeProfFusionData {
  100. uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
  101. uint16_t dataTag = MSPROF_GE_DATA_TAG_FUSION;
  102. uint32_t modelId;
  103. MixData fusionName;
  104. uint64_t inputMemSize;
  105. uint64_t outputMemSize;
  106. uint64_t weightMemSize;
  107. uint64_t workspaceMemSize;
  108. uint64_t totalMemSize;
  109. uint64_t fusionOpNum;
  110. uint64_t fusionOp[MSPROF_GE_FUSION_OP_NUM];
  111. uint8_t reserve[MSPROF_GE_FUSION_DATA_RESERVE_BYTES];
  112. };
  113. #define MSPROF_GE_INFER_DATA_RESERVE_BYTES 64
  114. struct MsprofGeProfInferData {
  115. uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
  116. uint16_t dataTag = MSPROF_GE_DATA_TAG_INFER;
  117. uint32_t modelId;
  118. MixData modelName;
  119. uint32_t requestId;
  120. uint32_t threadId;
  121. uint64_t inputDataStartTime;
  122. uint64_t inputDataEndTime;
  123. uint64_t inferStartTime;
  124. uint64_t inferEndTime;
  125. uint64_t outputDataStartTime;
  126. uint64_t outputDataEndTime;
  127. uint8_t reserve[MSPROF_GE_INFER_DATA_RESERVE_BYTES];
  128. };
  129. #define MSPROF_GE_TASK_DATA_RESERVE_BYTES 16
  130. #define MSPROF_GE_OP_TYPE_LEN 56
  131. enum MsprofGeTaskType {
  132. MSPROF_GE_TASK_TYPE_AI_CORE = 0,
  133. MSPROF_GE_TASK_TYPE_AI_CPU,
  134. MSPROF_GE_TASK_TYPE_AIV,
  135. };
  136. enum MsprofGeShapeType {
  137. MSPROF_GE_SHAPE_TYPE_STATIC = 0,
  138. MSPROF_GE_SHAPE_TYPE_DYNAMIC,
  139. };
  140. struct MsprofGeOpType {
  141. uint8_t type; // MsprofMixDataType
  142. uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES];
  143. union {
  144. uint64_t hashId;
  145. char dataStr[MSPROF_GE_OP_TYPE_LEN];
  146. } data;
  147. };
  148. struct MsprofGeProfTaskData {
  149. uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
  150. uint16_t dataTag = MSPROF_GE_DATA_TAG_TASK;
  151. uint32_t taskType; // MsprofGeTaskType
  152. MixData opName;
  153. MsprofGeOpType opType;
  154. uint64_t curIterNum;
  155. uint64_t timeStamp;
  156. uint32_t shapeType; // MsprofGeShapeType
  157. uint32_t blockDims;
  158. uint32_t modelId;
  159. uint32_t streamId;
  160. uint32_t taskId;
  161. uint32_t threadId;
  162. uint8_t reserve[MSPROF_GE_TASK_DATA_RESERVE_BYTES];
  163. };
  164. #define MSPROF_GE_TENSOR_DATA_RESERVE_BYTES 8
  165. #define MSPROF_GE_TENSOR_DATA_SHAPE_LEN 8
  166. #define MSPROF_GE_TENSOR_DATA_NUM 5
  167. enum MsprofGeTensorType {
  168. MSPROF_GE_TENSOR_TYPE_INPUT = 0,
  169. MSPROF_GE_TENSOR_TYPE_OUTPUT,
  170. };
  171. struct MsprofGeTensorData {
  172. uint32_t tensorType; // MsprofGeTensorType
  173. uint32_t format;
  174. uint32_t dataType;
  175. uint32_t shape[MSPROF_GE_TENSOR_DATA_SHAPE_LEN];
  176. };
  177. struct MsprofGeProfTensorData {
  178. uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
  179. uint16_t dataTag = MSPROF_GE_DATA_TAG_TENSOR;
  180. uint32_t modelId;
  181. uint64_t curIterNum;
  182. uint32_t streamId;
  183. uint32_t taskId;
  184. uint32_t tensorNum;
  185. MsprofGeTensorData tensorData[MSPROF_GE_TENSOR_DATA_NUM];
  186. uint8_t reserve[MSPROF_GE_TENSOR_DATA_RESERVE_BYTES];
  187. };
  188. #define MSPROF_GE_STEP_DATA_RESERVE_BYTES 27
  189. enum MsprofGeStepTag {
  190. MSPROF_GE_STEP_TAG_BEGIN = 0,
  191. MSPROF_GE_STEP_TAG_END,
  192. };
  193. struct MsprofGeProfStepData {
  194. uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
  195. uint16_t dataTag = MSPROF_GE_DATA_TAG_STEP;
  196. uint32_t modelId;
  197. uint32_t streamId;
  198. uint32_t taskId;
  199. uint64_t timeStamp;
  200. uint64_t curIterNum;
  201. uint32_t threadId;
  202. uint8_t tag; // MsprofGeStepTag
  203. uint8_t reserve[MSPROF_GE_STEP_DATA_RESERVE_BYTES];
  204. };
  205. #define MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES 6
  206. struct MsprofGeProfIdMapData {
  207. uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
  208. uint16_t dataTag = MSPROF_GE_DATA_TAG_ID_MAP;
  209. uint32_t graphId;
  210. uint32_t modelId;
  211. uint32_t sessionId;
  212. uint64_t timeStamp;
  213. uint16_t mode;
  214. uint8_t reserve[MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES];
  215. };
  216. #define MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES 24
  217. struct MsprofGeProfHostSchData {
  218. uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
  219. uint16_t dataTag = MSPROF_GE_DATA_TAG_HOST_SCH;
  220. uint32_t threadId; // record in start event
  221. uint64_t element;
  222. uint64_t event;
  223. uint64_t startTime; // record in start event
  224. uint64_t endTime; // record in end event
  225. uint8_t reserve[MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES];
  226. };
  227. /**
  228. * @brief struct of data reported by RunTime
  229. */
  230. #define MSPROF_RUNTIME_API_DATA_RESERVE_BYTES 106
  231. #define MSPROF_RUNTIME_TASK_ID_NUM 10
  232. #define MSPROF_RUNTIME_API_NAME_LEN 64
  233. struct MsprofRuntimeProfApiData {
  234. uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
  235. uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_API;
  236. uint32_t threadId;
  237. uint64_t entryTime;
  238. uint64_t exitTime;
  239. uint64_t dataSize;
  240. uint8_t apiName[MSPROF_RUNTIME_API_NAME_LEN];
  241. uint32_t retCode;
  242. uint32_t streamId;
  243. uint32_t taskNum;
  244. uint32_t taskId[MSPROF_RUNTIME_TASK_ID_NUM];
  245. uint16_t memcpyDirection;
  246. uint8_t reserve[MSPROF_RUNTIME_API_DATA_RESERVE_BYTES];
  247. };
  248. #define MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES 10
  249. #define MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN 32
  250. struct MsprofRuntimeProfTrackData {
  251. uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
  252. uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_TRACK;
  253. uint32_t threadId;
  254. uint64_t timeStamp;
  255. char taskType[MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN];
  256. uint32_t taskId;
  257. uint16_t streamId;
  258. uint8_t reserve[MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES];
  259. };
  260. /**
  261. * @brief struct of data reported by RunTime
  262. */
  263. #define MSPROF_AICPU_DATA_RESERVE_BYTES 9
  264. struct MsprofAicpuProfData {
  265. uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
  266. uint16_t dataTag = MSPROF_AICPU_DATA_TAG;
  267. uint16_t streamId;
  268. uint16_t taskId;
  269. uint64_t runStartTime;
  270. uint64_t runStartTick;
  271. uint64_t computeStartTime;
  272. uint64_t memcpyStartTime;
  273. uint64_t memcpyEndTime;
  274. uint64_t runEndTime;
  275. uint64_t runEndTick;
  276. uint32_t threadId;
  277. uint32_t deviceId;
  278. uint64_t submitTick;
  279. uint64_t scheduleTick;
  280. uint64_t tickBeforeRun;
  281. uint64_t tickAfterRun;
  282. uint32_t kernelType;
  283. uint32_t dispatchTime;
  284. uint32_t totalTime;
  285. uint16_t fftsThreadId;
  286. uint8_t version;
  287. uint8_t reserve[MSPROF_AICPU_DATA_RESERVE_BYTES];
  288. };
  289. /**
  290. * @brief struct of data reported by DP
  291. */
  292. #define MSPROF_DP_DATA_RESERVE_BYTES 16
  293. #define MSPROF_DP_DATA_ACTION_LEN 16
  294. #define MSPROF_DP_DATA_SOURCE_LEN 64
  295. struct MsprofDpProfData {
  296. uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
  297. uint16_t dataTag = MSPROF_DP_DATA_TAG;
  298. uint32_t rsv; // Ensure 8-byte alignment
  299. uint64_t timeStamp;
  300. char action[MSPROF_DP_DATA_ACTION_LEN];
  301. char source[MSPROF_DP_DATA_SOURCE_LEN];
  302. uint64_t index;
  303. uint64_t size;
  304. uint8_t reserve[MSPROF_DP_DATA_RESERVE_BYTES];
  305. };
  306. /**
  307. * @brief struct of data reported by HCCL
  308. */
  309. #pragma pack(4)
  310. struct MsprofHcclProfNotify {
  311. uint32_t taskID;
  312. uint64_t notifyID;
  313. uint32_t stage;
  314. uint32_t remoteRank;
  315. uint32_t transportType;
  316. uint32_t role; // role {0: dst, 1:src}
  317. double durationEstimated;
  318. };
  319. struct MsprofHcclProfReduce {
  320. uint32_t taskID;
  321. uint64_t src;
  322. uint64_t dst;
  323. uint64_t size;
  324. uint32_t op; // {0: sum, 1: mul, 2: max, 3: min}
  325. uint32_t dataType; // data type {0: INT8, 1: INT16, 2: INT32, 3: FP16, 4:FP32, 5:INT64, 6:UINT64}
  326. uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
  327. uint32_t remoteRank;
  328. uint32_t transportType; // transport type {0: SDMA, 1: RDMA, 2:LOCAL}
  329. uint32_t role; // role {0: dst, 1:src}
  330. double durationEstimated;
  331. };
  332. struct MsprofHcclProfRDMA {
  333. uint32_t taskID;
  334. uint64_t src;
  335. uint64_t dst;
  336. uint64_t size;
  337. uint64_t notifyID;
  338. uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
  339. uint32_t remoteRank;
  340. uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL}
  341. uint32_t role; // role {0: dst, 1:src}
  342. uint32_t type; // RDMA type {0: RDMASendNotify, 1:RDMASendPayload}
  343. double durationEstimated;
  344. };
  345. struct MsprofHcclProfMemcpy {
  346. uint32_t taskID;
  347. uint64_t src;
  348. uint64_t dst;
  349. uint64_t size;
  350. uint64_t notifyID;
  351. uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
  352. uint32_t remoteRank;
  353. uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL}
  354. uint32_t role; // role {0: dst, 1:src}
  355. double durationEstimated;
  356. };
  357. struct MsprofHcclProfStageStep {
  358. uint32_t rank;
  359. uint32_t rankSize;
  360. };
  361. struct MsprofHcclProfFlag {
  362. uint64_t cclTag;
  363. uint64_t groupName;
  364. uint32_t localRank;
  365. uint32_t workFlowMode;
  366. };
  367. /**
  368. * @name MsprofHcclProfData
  369. * @brief struct of data reported by hccl
  370. */
  371. struct MsprofHcclProfData {
  372. uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
  373. uint16_t dataTag = MSPROF_HCCL_DATA_TAG;
  374. uint32_t planeID;
  375. uint32_t deviceID;
  376. uint32_t streamID;
  377. double ts;
  378. char name[16];
  379. union {
  380. MsprofHcclProfNotify notify;
  381. MsprofHcclProfReduce reduce;
  382. MsprofHcclProfStageStep stageStep;
  383. MsprofHcclProfMemcpy forMemcpy;
  384. MsprofHcclProfRDMA RDMA;
  385. MsprofHcclProfFlag flag;
  386. } args;
  387. };
  388. #pragma pack()
  389. /**
  390. * @name MsprofStampInfo
  391. * @brief struct of data reported by msproftx
  392. */
  393. struct MsprofStampInfo {
  394. uint16_t magicNumber;
  395. uint16_t dataTag;
  396. uint32_t processId;
  397. uint32_t threadId;
  398. uint32_t category; //marker category
  399. uint32_t eventType;
  400. int32_t payloadType;
  401. union PayloadValue //payload info for marker
  402. {
  403. uint64_t ullValue;
  404. int64_t llValue;
  405. double dValue;
  406. uint32_t uiValue[2];
  407. int32_t iValue[2];
  408. float fValue[2];
  409. } payload;
  410. uint64_t startTime;
  411. uint64_t endTime;
  412. int32_t messageType;
  413. char message[128];
  414. uint8_t reserve0[4];
  415. uint8_t reserve1[72];
  416. };
  417. #ifdef __cplusplus
  418. }
  419. #endif
  420. #endif // MSPROFILER_PROF_COMMON_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示