You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

CMakeLists.txt 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. # Copyright 2019-2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. # libge_compiler.so & libge_runner.so
  16. # will later be integrated into libgraph_runner.so, works for both training and inference
  17. # compiling proto files generates some warnings, use no-unused-variable to suppress them
  18. set(CMAKE_CXX_FLAGS "-Wno-unused-variable ${CMAKE_CXX_FLAGS}")
  19. file(GLOB PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
  20. "../proto/fusion_model.proto"
  21. "../proto/optimizer_priority.proto"
  22. )
  23. file(GLOB PROTO_CLIENT_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
  24. "../proto/ge_api.proto"
  25. )
  26. file(GLOB PROTO_HEADER_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
  27. "../proto/om.proto"
  28. "../proto/task.proto"
  29. "../proto/insert_op.proto"
  30. "../proto/ge_ir.proto"
  31. "../proto/fwk_adapter.proto"
  32. "../proto/op_mapping_info.proto"
  33. "../proto/dump_task.proto"
  34. )
  35. protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
  36. protobuf_generate(ge PROTO_CLIENT_SRCS PROTO_CLIENT_HDRS ${PROTO_CLIENT_LIST})
  37. protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST})
  38. # include directories
  39. include_directories(${CMAKE_CURRENT_LIST_DIR})
  40. include_directories(${GE_SOURCE_DIR})
  41. include_directories(${GE_SOURCE_DIR}/src)
  42. include_directories(${GE_SOURCE_DIR}/src/ge/analyzer)
  43. include_directories(${GE_SOURCE_DIR}/inc)
  44. include_directories(${GE_SOURCE_DIR}/inc/common/util)
  45. include_directories(${GE_SOURCE_DIR}/inc/external)
  46. include_directories(${GE_SOURCE_DIR}/inc/external/graph)
  47. include_directories(${GE_SOURCE_DIR}/inc/framework)
  48. include_directories(${GE_SOURCE_DIR}/inc/framework/common)
  49. include_directories(${GE_SOURCE_DIR}/inc/graph)
  50. include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib)
  51. include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc)
  52. include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce)
  53. include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/toolchain)
  54. include_directories(${CMAKE_BINARY_DIR})
  55. include_directories(${CMAKE_BINARY_DIR}/proto/ge)
  56. ######### libge_runner.so #############
  57. # need to remove dependencies on pb files later
  58. file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
  59. "analyzer/analyzer.cc"
  60. "client/ge_prof.cc"
  61. "client/ge_api.cc"
  62. "common/dump/dump_manager.cc"
  63. "common/dump/dump_properties.cc"
  64. "common/dump/dump_op.cc"
  65. "common/formats/format_transfers/*.cc"
  66. "common/formats/formats.cc"
  67. "common/formats/utils/formats_trans_utils.cc"
  68. "common/fp16_t.cc"
  69. "common/ge/op_tiling_manager.cc"
  70. "common/ge/plugin_manager.cc"
  71. "common/helper/model_cache_helper.cc"
  72. "common/profiling/profiling_manager.cc"
  73. "engine_manager/dnnengine_manager.cc"
  74. "executor/ge_executor.cc"
  75. "ge_local_engine/engine/host_cpu_engine.cc"
  76. "generator/ge_generator.cc"
  77. "generator/generator_api.cc"
  78. "graph/build/*.cc"
  79. "graph/common/*.cc"
  80. "graph/execute/graph_execute.cc"
  81. "graph/label/*.cc"
  82. "graph/load/graph_loader.cc"
  83. "graph/load/new_model_manager/*.cc"
  84. "graph/load/new_model_manager/task_info/end_graph_task_info.cc"
  85. "graph/load/new_model_manager/task_info/event_record_task_info.cc"
  86. "graph/load/new_model_manager/task_info/event_wait_task_info.cc"
  87. "graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
  88. "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
  89. "graph/load/new_model_manager/task_info/hccl_task_info.cc"
  90. "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
  91. "graph/load/new_model_manager/task_info/kernel_task_info.cc"
  92. "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc"
  93. "graph/load/new_model_manager/task_info/label_set_task_info.cc"
  94. "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc"
  95. "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
  96. "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
  97. "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
  98. "graph/load/new_model_manager/task_info/stream_active_task_info.cc"
  99. "graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
  100. "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc"
  101. "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
  102. "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
  103. "graph/load/new_model_manager/task_info/task_info.cc"
  104. "graph/manager/graph_context.cc"
  105. "graph/manager/graph_manager.cc"
  106. "graph/manager/graph_manager_utils.cc"
  107. "graph/manager/graph_mem_allocator.cc"
  108. "graph/manager/graph_caching_allocator.cc"
  109. "graph/manager/graph_var_manager.cc"
  110. "graph/manager/model_manager/event_manager.cc"
  111. "graph/manager/rdma_pool_allocator.cc"
  112. "graph/manager/trans_var_data_utils.cc"
  113. "graph/manager/util/debug.cc"
  114. "graph/manager/util/hcom_util.cc"
  115. "graph/manager/util/rt_context_util.cc"
  116. "graph/manager/util/variable_accelerate_ctrl.cc"
  117. "graph/manager/util/debug.cc"
  118. "graph/manager/util/hcom_util.cc"
  119. "graph/manager/util/rt_context_util.cc"
  120. "graph/manager/util/variable_accelerate_ctrl.cc"
  121. "graph/optimize/graph_optimize.cc"
  122. "graph/optimize/mem_rw_conflict_optimize.cc"
  123. "graph/optimize/optimizer/allreduce_fusion_pass.cc"
  124. "graph/optimize/summary_optimize.cc"
  125. "graph/partition/dynamic_shape_partition.cc"
  126. "graph/partition/engine_place.cc"
  127. "graph/partition/graph_partition.cc"
  128. "graph/passes/*.cc"
  129. "graph/preprocess/graph_preprocess.cc"
  130. "graph/preprocess/insert_op/ge_aipp_op.cc"
  131. "graph/preprocess/insert_op/util_insert_aipp_op.cc"
  132. "graph/preprocess/multi_batch_copy_graph.cc"
  133. "graph/preprocess/multi_batch_options.cc"
  134. "host_kernels/add_kernel.cc"
  135. "host_kernels/broadcast_args_kernel.cc"
  136. "host_kernels/broadcast_gradient_args_kernel.cc"
  137. "host_kernels/cast_kernel.cc"
  138. "host_kernels/concat_offset_kernel.cc"
  139. "host_kernels/concat_v2_kernel.cc"
  140. "host_kernels/dynamic_stitch_kernel.cc"
  141. "host_kernels/empty_kernel.cc"
  142. "host_kernels/expanddims_kernel.cc"
  143. "host_kernels/fill_kernel.cc"
  144. "host_kernels/floordiv_kernel.cc"
  145. "host_kernels/floormod_kernel.cc"
  146. "host_kernels/gather_v2_kernel.cc"
  147. "host_kernels/greater_kernel.cc"
  148. "host_kernels/identity_kernel.cc"
  149. "host_kernels/kernel_utils.cc"
  150. "host_kernels/maximum_kernel.cc"
  151. "host_kernels/mul_kernel.cc"
  152. "host_kernels/pack_kernel.cc"
  153. "host_kernels/permute_kernel.cc"
  154. "host_kernels/range_kernel.cc"
  155. "host_kernels/rank_kernel.cc"
  156. "host_kernels/reduce_prod_kernel.cc"
  157. "host_kernels/reshape_kernel.cc"
  158. "host_kernels/rsqrt_kernel.cc"
  159. "host_kernels/shape_kernel.cc"
  160. "host_kernels/shape_n_kernel.cc"
  161. "host_kernels/size_kernel.cc"
  162. "host_kernels/slice_d_kernel.cc"
  163. "host_kernels/slice_kernel.cc"
  164. "host_kernels/squeeze_kernel.cc"
  165. "host_kernels/ssd_prior_box_kernel.cc"
  166. "host_kernels/strided_slice_kernel.cc"
  167. "host_kernels/sub_kernel.cc"
  168. "host_kernels/transdata_kernel.cc"
  169. "host_kernels/transpose_kernel.cc"
  170. "host_kernels/unpack_kernel.cc"
  171. "host_kernels/unsqueeze_kernel.cc"
  172. "hybrid/common/npu_memory_allocator.cc"
  173. "hybrid/common/tensor_value.cc"
  174. "hybrid/executor/*.cc"
  175. "hybrid/executor/worker/*.cc"
  176. "hybrid/hybrid_davinci_model.cc"
  177. "hybrid/model/*.cc"
  178. "hybrid/node_executor/aicore/*.cc"
  179. "hybrid/node_executor/aicpu/aicpu_ext_info.cc"
  180. "hybrid/node_executor/aicpu/aicpu_node_executor.cc"
  181. "hybrid/node_executor/compiledsubgraph/known_node_executor.cc"
  182. "hybrid/node_executor/controlop/control_op_executor.cc"
  183. "hybrid/node_executor/ge_local/ge_local_node_executor.cc"
  184. "hybrid/node_executor/hccl/hccl_node_executor.cc"
  185. "hybrid/node_executor/hostcpu/ge_local_node_executor.cc"
  186. "hybrid/node_executor/host_cpu/host_cpu_node_executor.cc"
  187. "hybrid/node_executor/host_cpu/kernel_factory.cc"
  188. "hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc"
  189. "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
  190. "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
  191. "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
  192. "hybrid/node_executor/node_executor.cc"
  193. "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
  194. "hybrid/node_executor/rts/rts_node_executor.cc"
  195. "hybrid/node_executor/task_context.cc"
  196. "init/gelib.cc"
  197. "model/ge_model.cc"
  198. "model/ge_root_model.cc"
  199. "omm/csa_interact.cc"
  200. "opskernel_manager/ops_kernel_manager.cc"
  201. "session/inner_session.cc"
  202. "session/session_manager.cc"
  203. "single_op/*.cc"
  204. "single_op/task/*.cc"
  205. )
  206. ######### libge_runner.so #############
  207. add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} ${PROTO_HEADER_HDRS})
  208. target_compile_definitions(ge_runner PRIVATE
  209. PROTOBUF_INLINE_NOT_IN_HEADERS=0
  210. DAVINCI_SUPPORT_PROFILING
  211. REUSE_MEMORY=1
  212. DAVINCI_CLOUD)
  213. target_link_libraries(ge_runner
  214. graph
  215. ge_common
  216. ge_memory
  217. #${PROTOBUF_LIBRARY}
  218. protobuf
  219. ${register}
  220. ${c_sec}
  221. ${slog}
  222. ${mmpa}
  223. ${hccl}
  224. ${msprof}
  225. ${runtime}
  226. ${resouce}
  227. ${ascend_hal}
  228. ${adump_server}
  229. ${msprofiler}
  230. rt
  231. dl)
  232. ######### libge_compiler.so #############
  233. # need to remove dependencies on pb files later
  234. file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
  235. "analyzer/analyzer.cc"
  236. "common/dump/dump_properties.cc"
  237. "common/dump/dump_manager.cc"
  238. "common/dump/dump_op.cc"
  239. "common/dump/dump_server.cc"
  240. "common/formats/format_transfers/*.cc"
  241. "common/formats/formats.cc"
  242. "common/formats/utils/formats_trans_utils.cc"
  243. "common/fp16_t.cc"
  244. "common/ge/op_tiling_manager.cc"
  245. "common/ge/plugin_manager.cc"
  246. "common/helper/model_cache_helper.cc"
  247. "common/profiling/profiling_manager.cc"
  248. "engine_manager/dnnengine_manager.cc"
  249. "ge_local_engine/engine/host_cpu_engine.cc"
  250. "generator/ge_generator.cc"
  251. "generator/generator_api.cc"
  252. "graph/build/*.cc"
  253. "graph/common/*.cc"
  254. "graph/execute/graph_execute.cc"
  255. "graph/label/*.cc"
  256. "graph/load/graph_loader.cc"
  257. "graph/load/new_model_manager/*.cc"
  258. "graph/load/new_model_manager/task_info/end_graph_task_info.cc"
  259. "graph/load/new_model_manager/task_info/event_record_task_info.cc"
  260. "graph/load/new_model_manager/task_info/event_wait_task_info.cc"
  261. "graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
  262. "graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
  263. "graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
  264. "graph/load/new_model_manager/task_info/kernel_task_info.cc"
  265. "graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc"
  266. "graph/load/new_model_manager/task_info/label_set_task_info.cc"
  267. "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc"
  268. "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
  269. "graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
  270. "graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
  271. "graph/load/new_model_manager/task_info/stream_active_task_info.cc"
  272. "graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
  273. "graph/load/new_model_manager/task_info/stream_switchn_task_info.cc"
  274. "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
  275. "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
  276. "graph/load/new_model_manager/task_info/task_info.cc"
  277. "graph/manager/graph_caching_allocator.cc"
  278. "graph/manager/graph_context.cc"
  279. "graph/manager/graph_manager.cc"
  280. "graph/manager/graph_manager_utils.cc"
  281. "graph/manager/graph_mem_allocator.cc"
  282. "graph/manager/trans_var_data_utils.cc"
  283. "graph/manager/graph_var_manager.cc"
  284. "graph/manager/model_manager/event_manager.cc"
  285. "graph/manager/rdma_pool_allocator.cc"
  286. "graph/manager/util/debug.cc"
  287. "graph/manager/util/rt_context_util.cc"
  288. "graph/manager/util/variable_accelerate_ctrl.cc"
  289. "graph/optimize/graph_optimize.cc"
  290. "graph/optimize/mem_rw_conflict_optimize.cc"
  291. "graph/optimize/summary_optimize.cc"
  292. "graph/partition/dynamic_shape_partition.cc"
  293. "graph/partition/engine_place.cc"
  294. "graph/partition/graph_partition.cc"
  295. "graph/passes/*.cc"
  296. "graph/preprocess/graph_preprocess.cc"
  297. "graph/preprocess/insert_op/ge_aipp_op.cc"
  298. "graph/preprocess/insert_op/util_insert_aipp_op.cc"
  299. "graph/preprocess/multi_batch_copy_graph.cc"
  300. "graph/preprocess/multi_batch_options.cc"
  301. "host_kernels/add_kernel.cc"
  302. "host_kernels/broadcast_args_kernel.cc"
  303. "host_kernels/broadcast_gradient_args_kernel.cc"
  304. "host_kernels/cast_kernel.cc"
  305. "host_kernels/concat_offset_kernel.cc"
  306. "host_kernels/concat_v2_kernel.cc"
  307. "host_kernels/dynamic_stitch_kernel.cc"
  308. "host_kernels/empty_kernel.cc"
  309. "host_kernels/expanddims_kernel.cc"
  310. "host_kernels/fill_kernel.cc"
  311. "host_kernels/floordiv_kernel.cc"
  312. "host_kernels/floormod_kernel.cc"
  313. "host_kernels/gather_v2_kernel.cc"
  314. "host_kernels/greater_kernel.cc"
  315. "host_kernels/identity_kernel.cc"
  316. "host_kernels/kernel_utils.cc"
  317. "host_kernels/maximum_kernel.cc"
  318. "host_kernels/mul_kernel.cc"
  319. "host_kernels/pack_kernel.cc"
  320. "host_kernels/permute_kernel.cc"
  321. "host_kernels/range_kernel.cc"
  322. "host_kernels/rank_kernel.cc"
  323. "host_kernels/reduce_prod_kernel.cc"
  324. "host_kernels/reshape_kernel.cc"
  325. "host_kernels/rsqrt_kernel.cc"
  326. "host_kernels/shape_kernel.cc"
  327. "host_kernels/shape_n_kernel.cc"
  328. "host_kernels/size_kernel.cc"
  329. "host_kernels/slice_d_kernel.cc"
  330. "host_kernels/slice_kernel.cc"
  331. "host_kernels/squeeze_kernel.cc"
  332. "host_kernels/ssd_prior_box_kernel.cc"
  333. "host_kernels/strided_slice_kernel.cc"
  334. "host_kernels/sub_kernel.cc"
  335. "host_kernels/transdata_kernel.cc"
  336. "host_kernels/transpose_kernel.cc"
  337. "host_kernels/unpack_kernel.cc"
  338. "host_kernels/unsqueeze_kernel.cc"
  339. "hybrid/hybrid_davinci_model_stub.cc"
  340. "hybrid/node_executor/aicpu/aicpu_ext_info.cc"
  341. "init/gelib.cc"
  342. "ir_build/atc_ir_common.cc"
  343. "ir_build/ge_ir_build.cc"
  344. "model/ge_model.cc"
  345. "model/ge_root_model.cc"
  346. "omm/csa_interact.cc"
  347. "opskernel_manager/ops_kernel_manager.cc"
  348. "session/inner_session.cc"
  349. "session/session_manager.cc"
  350. "single_op/*.cc"
  351. "single_op/task/*.cc"
  352. )
  353. add_library(ge_compiler SHARED ${INFER_SRC_LIST} ${PROTO_SRCS} ${PROTO_HEADER_HDRS})
  354. target_compile_definitions(ge_compiler PRIVATE
  355. PROTOBUF_INLINE_NOT_IN_HEADERS=0
  356. REUSE_MEMORY=1
  357. FMK_HOST_INFER
  358. FMK_SUPPORT_DUMP
  359. COMPILE_OMG_PACKAGE
  360. REUSE_MEMORY=1)
  361. target_link_libraries(ge_compiler
  362. graph
  363. ge_common
  364. ge_memory
  365. #${PROTOBUF_LIBRARY}
  366. protobuf
  367. ${register}
  368. ${c_sec}
  369. ${slog}
  370. ${mmpa}
  371. ${msprof}
  372. ${runtime}
  373. ${resouce}
  374. ${error_manager}
  375. rt
  376. dl)

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示