You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rt_stars_define.h 2.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef CCE_RUNTIME_RT_STARS_DEFINE_H
  17. #define CCE_RUNTIME_RT_STARS_DEFINE_H
  18. #include "base.h"
  19. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  20. extern "C" {
  21. #endif
  22. #pragma pack(push)
  23. #pragma pack (1)
  24. typedef struct tagStarsSqeHeader {
  25. uint8_t type : 6;
  26. uint8_t l1Lock : 1;
  27. uint8_t l1Unlock : 1;
  28. uint8_t ie : 2;
  29. uint8_t preP : 2;
  30. uint8_t postP : 2;
  31. uint8_t wrCqe : 1;
  32. uint8_t reserved : 1;
  33. uint16_t blockDim;
  34. uint16_t rtStreamId;
  35. uint16_t taskId;
  36. } rtStarsSqeHeader_t;
  37. // ffts+ type
  38. typedef enum tagFftsPlusType {
  39. RT_FFTS_PLUS_TYPE_RES1 = 2, // Reserved
  40. RT_FFTS_PLUS_TYPE_RES2 = 3, // Reserved
  41. RT_FFTS_PLUS_TYPE = 4, // FFTS+ mode
  42. } rtFftsPlusType_t;
  43. // ffts+ sqe
  44. typedef struct tagFftsPlusSqe {
  45. // 0-7 bytes
  46. rtStarsSqeHeader_t sqeHeader;
  47. // 8-11 bytes
  48. uint16_t fftsType : 3;
  49. uint16_t reserved1 : 9;
  50. uint16_t wrrRatio : 4;
  51. uint16_t reserved2;
  52. // 12-15 bytes
  53. uint16_t sqeIndex;
  54. uint8_t kernelCredit;
  55. uint8_t reserved4;
  56. // 16-23 bytes
  57. uint32_t stackPhyBaseL;
  58. uint32_t stackPhyBaseH;
  59. // 24-31 bytes
  60. uint16_t totalContextNum;
  61. uint16_t readyContextNum;
  62. uint16_t preloadContextNum;
  63. uint16_t reserved5;
  64. // 32-35 bytes
  65. uint16_t reserved6;
  66. uint16_t prefetchOstNum : 5;
  67. uint16_t reserved9 : 3;
  68. uint16_t cmaintOstNum : 5;
  69. uint16_t reserved10 : 3;
  70. // 36-39 bytes
  71. uint16_t aicPrefetchLower : 5;
  72. uint16_t reserved11 : 3;
  73. uint16_t aicPrefetchUpper : 5;
  74. uint16_t reserved12 : 3;
  75. uint16_t aivPrefetchLower : 5;
  76. uint16_t Reserved13 : 3;
  77. uint16_t aivPrefetchUpper : 5;
  78. uint16_t Reserved14 : 3;
  79. // 40-47 bytes
  80. uint32_t contextAddressBaseL;
  81. uint32_t contextAddressBaseH : 17;
  82. uint32_t reserved15 : 15;
  83. // 48-63 bytes
  84. uint32_t reserved16[4];
  85. } rtFftsPlusSqe_t;
  86. #pragma pack(pop)
  87. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  88. }
  89. #endif
  90. #endif // CCE_RUNTIME_RT_STARS_DEFINE_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示