You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ts_mem_mall.h 3.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_GRAPH_LOAD_TS_MEM_MALL_H_
  17. #define GE_GRAPH_LOAD_TS_MEM_MALL_H_
  18. #include <mutex>
  19. #include <unordered_map>
  20. #include <memory>
  21. #include "runtime/base.h"
  22. #include "framework/common/debug/ge_log.h"
  23. #define TS_MEM_ALIGNMENT 64
  24. #define TS_MEM_ALIGN_MASK (TS_MEM_ALIGNMENT - 1)
  25. #define TS_MEM_ALIGN_SIZE(size) (((size) + TS_MEM_ALIGN_MASK) & ~TS_MEM_ALIGN_MASK)
  26. namespace ge {
  27. constexpr uint32_t kMaxTsMemBlock = 2 * 1024 * 1024; // Max block 2M.
  28. class TsMemMall {
  29. public:
  30. TsMemMall() = default;
  31. ~TsMemMall() {
  32. for (auto it : mem_store_size_) {
  33. rtError_t ret = rtFree(it.second);
  34. if (ret != RT_ERROR_NONE) {
  35. GELOGE(RT_FAILED, "Call rtFree failed, ret: 0x%X", ret);
  36. }
  37. }
  38. mem_store_size_.clear();
  39. mem_store_addr_.clear();
  40. }
  41. void *Acquire(int64_t offset, uint64_t size) {
  42. if (size == 0) {
  43. GELOGE(RT_FAILED, "Acquire mem block failed, size: %lu", size);
  44. return nullptr;
  45. }
  46. uint64_t bytes = TS_MEM_ALIGN_SIZE(size);
  47. if (bytes > kMaxTsMemBlock) {
  48. GELOGW("Acquire TS memory may not physical continuity, size: %lu", bytes);
  49. }
  50. std::lock_guard<std::mutex> lock(mem_mutex_);
  51. const auto it = mem_store_size_.find(offset);
  52. if (it != mem_store_size_.end()) {
  53. GELOGI("Acquire TS memory: %p, offset: %ld, size: %lu, align: %lu", it->second, offset, size, bytes);
  54. return it->second;
  55. }
  56. void *addr = nullptr;
  57. rtError_t rt_ret = rtMalloc(&addr, bytes, RT_MEMORY_TS_4G);
  58. if (rt_ret != RT_ERROR_NONE) {
  59. GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
  60. return nullptr;
  61. }
  62. GELOGI("Acquire TS memory: %p, offset: %ld, size: %lu, align: %lu", addr, offset, size, bytes);
  63. mem_store_size_[offset] = addr;
  64. mem_store_addr_[addr] = offset;
  65. return addr;
  66. }
  67. void Release(void *addr) {
  68. std::lock_guard<std::mutex> lock(mem_mutex_);
  69. const auto it = mem_store_addr_.find(addr);
  70. if (it == mem_store_addr_.end()) {
  71. GELOGW("Not TS memory: %p.", addr);
  72. return;
  73. }
  74. GELOGI("Release TS memory: %p.", addr);
  75. mem_store_size_.erase(it->second);
  76. mem_store_addr_.erase(it);
  77. rtError_t ret = rtFree(addr);
  78. if (ret != RT_ERROR_NONE) {
  79. GELOGE(RT_FAILED, "Call rtFree failed, ret: 0x%X", ret);
  80. }
  81. }
  82. private:
  83. std::mutex mem_mutex_;
  84. std::unordered_map<int64_t, void *> mem_store_size_;
  85. std::unordered_map<void *, int64_t> mem_store_addr_;
  86. };
  87. } // namespace ge
  88. #endif // GE_GRAPH_LOAD_TS_MEM_MALL_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示