You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rt_stars_define.h 2.3 kB

4 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  3. * Description: the definition of stars
  4. */
  5. #ifndef __CCE_RUNTIME_STARS_DEFINE__H
  6. #define __CCE_RUNTIME_STARS_DEFINE__H
  7. #include "base.h"
  8. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  9. extern "C" {
  10. #endif
  11. #pragma pack(push)
  12. #pragma pack (1)
  13. typedef struct tagStarsSqeHeader {
  14. uint8_t type: 6;
  15. uint8_t l1Lock: 1;
  16. uint8_t l1Unlock: 1;
  17. uint8_t ie: 2;
  18. uint8_t preP: 2;
  19. uint8_t postP: 2;
  20. uint8_t wrCqe: 1;
  21. uint8_t reserved: 1;
  22. uint16_t blockDim;
  23. uint16_t rtStreamId;
  24. uint16_t taskId;
  25. } rtStarsSqeHeader_t;
  26. // ffts+ type
  27. typedef enum tagFftsPlusType {
  28. RT_FFTS_PLUS_TYPE_RES1 = 2, // Reserved
  29. RT_FFTS_PLUS_TYPE_RES2 = 3, // Reserved
  30. RT_FFTS_PLUS_TYPE = 4, // FFTS+ mode
  31. } rtFftsPlusType_t;
  32. // ffts+ sqe
  33. typedef struct tagFftsPlusSqe {
  34. // 0-7 bytes
  35. rtStarsSqeHeader_t sqeHeader;
  36. // 8-11 bytes
  37. uint16_t fftsType: 3;
  38. uint16_t reserved1: 13;
  39. uint16_t reserved2;
  40. // 12-15 bytes
  41. uint16_t pmg: 2;
  42. uint16_t ns: 1;
  43. uint16_t partId: 8;
  44. uint16_t reserved3: 1;
  45. uint16_t qos: 4;
  46. uint8_t kernelCredit;
  47. uint8_t reserved4;
  48. // 16-23 bytes
  49. uint32_t stackPhyBaseL;
  50. uint32_t stackPhyBaseH;
  51. // 24-31 bytes
  52. uint16_t totalContextNum;
  53. uint16_t readyContextNum;
  54. uint16_t preloadContextNum;
  55. uint16_t reserved5;
  56. // 32-35 bytes
  57. uint16_t reserved6: 8;
  58. uint16_t reserved7: 4;
  59. uint16_t dsplitUnit: 3;
  60. uint16_t reserved8: 1;
  61. uint16_t prefetchOstNum: 5;
  62. uint16_t reserved9: 3;
  63. uint16_t cmaintOstNum: 5;
  64. uint16_t reserved10: 3;
  65. // 36-39 bytes
  66. uint16_t aicPrefetchLower: 5;
  67. uint16_t reserved11: 3;
  68. uint16_t aicPrefetchUpper: 5;
  69. uint16_t Reserved12: 3;
  70. uint16_t aivPrefetchLower: 5;
  71. uint16_t Reserved13: 3;
  72. uint16_t aivPrefetchUpper: 5;
  73. uint16_t Reserved14: 3;
  74. // 40-47 bytes
  75. uint32_t contextAddressBaseL;
  76. uint32_t contextAddressBaseH:17;
  77. uint32_t reserved15:15;
  78. // 48-63 bytes
  79. uint32_t reserved16[4];
  80. } rtFftsPlusSqe_t;
  81. #pragma pack(pop)
  82. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  83. }
  84. #endif
  85. #endif // __CCE_RUNTIME_STARS_DEFINE__H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示