You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rt_stars_define.h 2.1 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  3. * Description: the definition of stars
  4. */
  5. #ifndef CCE_RUNTIME_RT_STARS_DEFINE_H
  6. #define CCE_RUNTIME_RT_STARS_DEFINE_H
  7. #include "base.h"
  8. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  9. extern "C" {
  10. #endif
  11. #pragma pack(push)
  12. #pragma pack (1)
  13. typedef struct tagStarsSqeHeader {
  14. uint8_t type : 6;
  15. uint8_t l1Lock : 1;
  16. uint8_t l1Unlock : 1;
  17. uint8_t ie : 2;
  18. uint8_t preP : 2;
  19. uint8_t postP : 2;
  20. uint8_t wrCqe : 1;
  21. uint8_t reserved : 1;
  22. uint16_t blockDim;
  23. uint16_t rtStreamId;
  24. uint16_t taskId;
  25. } rtStarsSqeHeader_t;
  26. // ffts+ type
  27. typedef enum tagFftsPlusType {
  28. RT_FFTS_PLUS_TYPE_RES1 = 2, // Reserved
  29. RT_FFTS_PLUS_TYPE_RES2 = 3, // Reserved
  30. RT_FFTS_PLUS_TYPE = 4, // FFTS+ mode
  31. } rtFftsPlusType_t;
  32. // ffts+ sqe
  33. typedef struct tagFftsPlusSqe {
  34. // 0-7 bytes
  35. rtStarsSqeHeader_t sqeHeader;
  36. // 8-11 bytes
  37. uint16_t fftsType : 3;
  38. uint16_t reserved1 : 9;
  39. uint16_t wrrRatio : 4;
  40. uint16_t reserved2;
  41. // 12-15 bytes
  42. uint16_t sqeIndex;
  43. uint8_t kernelCredit;
  44. uint8_t reserved4;
  45. // 16-23 bytes
  46. uint32_t stackPhyBaseL;
  47. uint32_t stackPhyBaseH;
  48. // 24-31 bytes
  49. uint16_t totalContextNum;
  50. uint16_t readyContextNum;
  51. uint16_t preloadContextNum;
  52. uint16_t reserved5;
  53. // 32-35 bytes
  54. uint16_t reserved6;
  55. uint16_t prefetchOstNum : 5;
  56. uint16_t reserved9 : 3;
  57. uint16_t cmaintOstNum : 5;
  58. uint16_t reserved10 : 3;
  59. // 36-39 bytes
  60. uint16_t aicPrefetchLower : 5;
  61. uint16_t reserved11 : 3;
  62. uint16_t aicPrefetchUpper : 5;
  63. uint16_t reserved12 : 3;
  64. uint16_t aivPrefetchLower : 5;
  65. uint16_t Reserved13 : 3;
  66. uint16_t aivPrefetchUpper : 5;
  67. uint16_t Reserved14 : 3;
  68. // 40-47 bytes
  69. uint32_t contextAddressBaseL;
  70. uint32_t contextAddressBaseH : 17;
  71. uint32_t reserved15 : 15;
  72. // 48-63 bytes
  73. uint32_t reserved16[4];
  74. } rtFftsPlusSqe_t;
  75. #pragma pack(pop)
  76. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  77. }
  78. #endif
  79. #endif // CCE_RUNTIME_RT_STARS_DEFINE_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示