You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cpu_queue_schedule.h 4.3 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_CPU_QUEUE_SCHEDULE_H_
  17. #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_CPU_QUEUE_SCHEDULE_H_
  18. #include <cstdint>
  19. #include <vector>
  20. #include "framework/common/ge_inner_error_codes.h"
  21. #include "graph/load/model_manager/task_info/task_info.h"
  22. #include "graph/load/model_manager/zero_copy_offset.h"
  23. #include "runtime/kernel.h"
  24. namespace ge {
  25. // For AICPU task "modelPrepare" / "modelPostpare"
  26. struct AicpuPareInfo {
  27. uint32_t aicpu_info_size;
  28. uint32_t model_id;
  29. uint32_t input_addr_num;
  30. uint64_t input_addr_list;
  31. uint64_t input_index_list;
  32. uint32_t output_addr_num;
  33. uint64_t output_addr_list;
  34. uint64_t output_index_list;
  35. uint32_t output_num;
  36. uint64_t output_size_list;
  37. uint32_t in_queue_num;
  38. uint64_t in_queueid_list;
  39. uint32_t out_queue_num;
  40. uint64_t out_queueid_list;
  41. uint64_t mbufptr_list;
  42. };
  43. ///
  44. /// @ingroup ge
  45. /// @brief CpuTask base, inherit from TaskInfo used for manage.
  46. ///
  47. class CpuTaskInfo : public TaskInfo {
  48. public:
  49. explicit CpuTaskInfo(rtStream_t stream);
  50. ~CpuTaskInfo() override;
  51. protected:
  52. void *args_;
  53. uint32_t args_size_;
  54. };
  55. class CpuTaskModelPrepare : public CpuTaskInfo {
  56. public:
  57. explicit CpuTaskModelPrepare(rtStream_t stream) : CpuTaskInfo(stream) {}
  58. ~CpuTaskModelPrepare() override;
  59. Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
  60. Status Init(const vector<uint32_t> &input_queue_ids, const vector<uint32_t> &output_queue_ids,
  61. const map<uint32_t, ZeroCopyOffset> &inside_addrs, const map<uint32_t, ZeroCopyOffset> &outside_addrs,
  62. uintptr_t &out_mbuf);
  63. Status Distribute() override;
  64. private:
  65. Status GenerateOutSizeAddr(const map<uint32_t, ZeroCopyOffset> &outside_addrs, void *&output_size_list_addr);
  66. Status GenerateCpuAddr(const map<uint32_t, ZeroCopyOffset> &node_addrs, void *&data_list_addr, void *&index_list_addr,
  67. uint32_t &num);
  68. void *input_list_addr_ = nullptr;
  69. void *input_index_list_addr_ = nullptr;
  70. void *output_list_addr_ = nullptr;
  71. void *output_index_list_addr_ = nullptr;
  72. void *output_size_list_addr_ = nullptr;
  73. void *queue_id_list_addr_ = nullptr;
  74. void *mbufptr_list_ = nullptr;
  75. };
  76. class CpuTaskModelPostpare : public CpuTaskInfo {
  77. public:
  78. explicit CpuTaskModelPostpare(rtStream_t stream) : CpuTaskInfo(stream) {}
  79. ~CpuTaskModelPostpare() override;
  80. Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
  81. Status Init(uint32_t model_id, const vector<uint32_t> &output_queue_ids, uintptr_t out_mbuf);
  82. Status Distribute() override;
  83. private:
  84. void *queue_id_list_addr_ = nullptr;
  85. };
  86. ///
  87. /// @ingroup ge
  88. /// @brief definiteness queue schedule, active entry stream.
  89. ///
  90. class CpuTaskActiveEntry : public CpuTaskInfo {
  91. public:
  92. explicit CpuTaskActiveEntry(rtStream_t stream) : CpuTaskInfo(stream), active_stream_(nullptr) {}
  93. ~CpuTaskActiveEntry() override {}
  94. Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
  95. Status Init(rtStream_t stream);
  96. Status Distribute() override;
  97. private:
  98. rtStream_t active_stream_;
  99. };
  100. ///
  101. /// @ingroup ge
  102. /// @brief definiteness queue schedule, wait for end graph.
  103. ///
  104. class CpuTaskWaitEndGraph : public CpuTaskInfo {
  105. public:
  106. explicit CpuTaskWaitEndGraph(rtStream_t stream) : CpuTaskInfo(stream) {}
  107. ~CpuTaskWaitEndGraph() override {}
  108. Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
  109. Status Init(uint32_t model_id);
  110. Status Distribute() override;
  111. };
  112. } // namespace ge
  113. #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_CPU_QUEUE_SCHEDULE_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示