From e91dae123961c6d655f1985d296ee6943a532fd6 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Tue, 13 Oct 2020 17:39:18 +0800 Subject: [PATCH] update code from yellow zone 20201013 --- ge/analyzer/analyzer.cc | 12 +- ge/analyzer/analyzer.h | 2 +- ge/client/ge_api.cc | 4 + ge/client/ge_prof.cc | 25 +- ge/common/auth/file_saver.cc | 2 +- ge/common/auth/file_saver.h | 2 +- ge/common/base64.h | 2 +- ge/common/context/ctx.cc | 2 +- ge/common/cust_aicpu_kernel_store.cc | 2 +- ge/common/cust_aicpu_kernel_store.h | 2 +- ge/common/debug/memory_dumper.cc | 2 +- ge/common/debug/memory_dumper.h | 2 +- ge/common/dump/dump_manager.cc | 2 +- ge/common/dump/dump_manager.h | 2 +- ge/common/dump/dump_op.cc | 20 +- ge/common/dump/dump_op.h | 2 +- ge/common/dump/dump_properties.cc | 2 +- ge/common/dump/dump_properties.h | 2 +- ge/common/dump/dump_server.cc | 2 +- ge/common/fmk_error_codes.cc | 2 +- .../format_transfers/datatype_transfer.cc | 2 +- .../format_transfers/datatype_transfer.h | 2 +- .../format_transfer_c1hwncoc0_hwcn.cc | 2 +- .../format_transfer_c1hwncoc0_hwcn.h | 2 +- .../format_transfer_dhwcn_fracz3D.cc | 3 +- .../format_transfer_dhwcn_fracz3D.h | 3 +- ...format_transfer_dhwnc_fracz3D_transpose.cc | 3 +- .../format_transfer_dhwnc_fracz3D_transpose.h | 3 +- .../format_transfer_fractal_nz.cc | 2 +- .../format_transfer_fractal_nz.h | 2 +- .../format_transfer_fractal_z.cc | 2 +- .../format_transfer_fractal_z.h | 2 +- .../format_transfer_fractal_zz.cc | 2 +- .../format_transfer_fractal_zz.h | 2 +- .../format_transfer_fracz_hwcn.cc | 2 +- .../format_transfer_fracz_hwcn.h | 2 +- .../format_transfer_fracz_nchw.cc | 2 +- .../format_transfer_fracz_nchw.h | 2 +- .../format_transfer_fracz_nhwc.cc | 2 +- .../format_transfer_fracz_nhwc.h | 2 +- .../format_transfer_hwcn_c1hwncoc0.cc | 2 +- .../format_transfer_hwcn_c1hwncoc0.h | 2 +- .../format_transfer_nc1hwc0_nchw.cc | 2 +- .../format_transfer_nc1hwc0_nchw.h | 2 +- .../format_transfer_nc1hwc0_nhwc.cc | 2 +- .../format_transfer_nc1hwc0_nhwc.h | 2 +- .../format_transfer_nchw_fz_c04.cc | 2 +- .../format_transfer_nchw_fz_c04.h | 2 +- .../format_transfer_nchw_nc1hwc0.cc | 2 +- .../format_transfer_nchw_nc1hwc0.h | 2 +- .../format_transfer_nhwc_nc1hwc0.cc | 2 +- .../format_transfer_nhwc_nc1hwc0.h | 2 +- .../format_transfer_transpose.cc | 2 +- .../format_transfer_transpose.h | 2 +- ge/common/formats/formats.cc | 2 +- ge/common/formats/formats.h | 2 +- ge/common/formats/utils/formats_definitions.h | 2 +- .../formats/utils/formats_trans_utils.cc | 2 +- ge/common/formats/utils/formats_trans_utils.h | 2 +- ge/common/fp16_t.cc | 2 +- ge/common/fp16_t.h | 2 +- ge/common/ge/datatype_util.cc | 4 +- ge/common/ge/datatype_util.h | 2 +- ge/common/ge/ge_util.h | 2 +- ge/common/ge/op_tiling_manager.cc | 2 +- ge/common/ge/op_tiling_manager.h | 2 +- ge/common/ge/plugin_manager.cc | 2 +- ge/common/ge/plugin_manager.h | 2 +- ge/common/ge/tbe_plugin_manager.cc | 2 +- ge/common/ge/tbe_plugin_manager.h | 2 +- ge/common/helper/model_cache_helper.cc | 1 - ge/common/helper/model_helper.cc | 2 +- ge/common/helper/om_file_helper.cc | 2 +- ge/common/kernel_store.cc | 2 +- ge/common/kernel_store.h | 2 +- ge/common/math/fp16_math.cc | 2 +- ge/common/math/fp16_math.h | 2 +- ge/common/math/math_util.h | 2 +- ge/common/math_util.h | 2 +- ge/common/model_parser/base.cc | 2 +- ge/common/model_parser/base.h | 2 +- ge/common/model_saver.cc | 2 +- ge/common/model_saver.h | 2 +- ge/common/op/attr_value_util.cc | 2 +- ge/common/op/ge_op_utils.cc | 2 +- ge/common/profiling/profiling_manager.cc | 36 +- ge/common/profiling/profiling_manager.h | 4 +- ge/common/properties_manager.cc | 2 +- ge/common/properties_manager.h | 2 +- ge/common/singleton.h | 3 +- ge/common/tbe_kernel_store.cc | 2 +- ge/common/tbe_kernel_store.h | 2 +- ge/common/thread_pool.cc | 2 +- ge/common/thread_pool.h | 2 +- ge/common/types.cc | 3 +- ge/common/util.cc | 4 +- ge/executor/ge_executor.cc | 2 +- ge/executor/module.mk | 11 + ge/ge_inference.mk | 6 +- ge/ge_local_engine/common/constant/constant.h | 2 +- ge/ge_local_engine/engine/ge_local_engine.cc | 2 +- ge/ge_local_engine/engine/ge_local_engine.h | 2 +- ge/ge_local_engine/engine/host_cpu_engine.cc | 55 +- ge/ge_local_engine/engine/host_cpu_engine.h | 5 +- ge/ge_local_engine/module.mk | 22 +- .../ge_local_ops_kernel_builder.cc | 2 +- .../ge_local_ops_kernel_builder.h | 2 +- .../ge_local_ops_kernel_info.cc | 2 +- .../ge_local_ops_kernel_info.h | 2 +- .../ops_kernel_store/op/ge_deleted_op.cc | 2 +- .../ops_kernel_store/op/ge_deleted_op.h | 2 +- .../ops_kernel_store/op/no_op.cc | 2 +- .../ops_kernel_store/op/no_op.h | 2 +- ge/ge_local_engine/ops_kernel_store/op/op.cc | 2 +- ge/ge_local_engine/ops_kernel_store/op/op.h | 2 +- .../ops_kernel_store/op/op_factory.cc | 2 +- .../ops_kernel_store/op/op_factory.h | 2 +- ge/ge_runner.mk | 3 + ge/ge_runtime/model_context.h | 11 +- ge/ge_runtime/model_runner.cc | 3 +- ge/ge_runtime/output.cc | 5 +- ge/ge_runtime/output.h | 6 +- ge/ge_runtime/runtime_model.cc | 55 +- ge/ge_runtime/runtime_model.h | 11 +- ge/ge_runtime/task/aicpu_task.cc | 52 +- ge/ge_runtime/task/aicpu_task.h | 1 + ge/ge_runtime/task/cce_task.cc | 15 +- ge/ge_runtime/task/event_record_task.h | 2 +- ge/ge_runtime/task/event_wait_task.cc | 2 +- ge/ge_runtime/task/event_wait_task.h | 2 +- ge/ge_runtime/task/hccl_task.cc | 3 - ge/ge_runtime/task/label_goto_task.cc | 70 ++ .../ge_runtime/task/label_goto_task.h | 32 +- ge/ge_runtime/task/label_set_task.cc | 70 ++ .../ge_runtime/task/label_set_task.h | 31 +- ge/ge_runtime/task/label_switch_task.cc | 131 +++ ge/ge_runtime/task/label_switch_task.h | 44 + ge/ge_runtime/task/stream_switch_task.cc | 2 +- ge/ge_runtime/task/stream_switch_task.h | 1 + ge/ge_runtime/task/task.h | 2 +- ge/ge_runtime/task/task_factory.h | 1 + ge/generator/ge_generator.cc | 8 +- ge/generator/generator_api.cc | 3 +- ge/graph/build/graph_builder.cc | 23 +- ge/graph/build/graph_builder.h | 2 +- ge/graph/build/label_allocator.cc | 2 +- ge/graph/build/label_allocator.h | 2 +- ge/graph/build/logical_stream_allocator.cc | 2 +- ge/graph/build/logical_stream_allocator.h | 2 +- .../build/memory/binary_block_mem_assigner.cc | 3 +- .../build/memory/binary_block_mem_assigner.h | 2 +- ge/graph/build/memory/block_mem_assigner.cc | 211 ++++- ge/graph/build/memory/block_mem_assigner.h | 64 +- ge/graph/build/memory/graph_mem_assigner.cc | 415 ++++++--- ge/graph/build/memory/graph_mem_assigner.h | 25 +- ge/graph/build/memory/hybrid_mem_assigner.cc | 5 +- ge/graph/build/memory/hybrid_mem_assigner.h | 4 +- .../build/memory/max_block_mem_assigner.cc | 2 +- .../build/memory/max_block_mem_assigner.h | 2 +- ge/graph/build/memory/mem_assigner.h | 2 +- ge/graph/build/memory/memory_assigner.cc | 4 +- ge/graph/build/memory/var_mem_assign_util.cc | 2 +- ge/graph/build/memory/var_mem_assign_util.h | 2 +- ge/graph/build/model_builder.cc | 18 +- ge/graph/build/model_builder.h | 5 +- ge/graph/build/run_context.cc | 39 +- ge/graph/build/run_context.h | 10 +- ge/graph/build/stream_allocator.cc | 2 +- ge/graph/build/stream_allocator.h | 2 +- ge/graph/build/stream_graph_optimizer.cc | 3 +- ge/graph/build/stream_graph_optimizer.h | 2 +- ge/graph/build/task_generator.cc | 2 +- ge/graph/build/task_generator.h | 2 +- ge/graph/common/ge_call_wrapper.h | 5 +- ge/graph/common/local_context.cc | 2 +- ge/graph/common/local_context.h | 2 +- ge/graph/execute/graph_execute.cc | 2 +- ge/graph/execute/graph_execute.h | 2 +- ge/graph/label/case_label_maker.cc | 3 +- ge/graph/label/case_label_maker.h | 2 +- ge/graph/label/if_label_maker.cc | 3 +- ge/graph/label/if_label_maker.h | 2 +- ge/graph/label/label_maker.cc | 2 +- ge/graph/label/label_maker.h | 2 +- ge/graph/label/label_maker_factory.h | 2 +- .../label/partitioned_call_label_maker.cc | 3 +- ge/graph/label/partitioned_call_label_maker.h | 2 +- ge/graph/label/while_label_maker.cc | 3 +- ge/graph/label/while_label_maker.h | 2 +- ge/graph/load/graph_loader.cc | 2 +- ge/graph/load/graph_loader.h | 2 +- ge/graph/load/new_model_manager/aipp_utils.cc | 2 +- ge/graph/load/new_model_manager/aipp_utils.h | 2 +- .../new_model_manager/cpu_queue_schedule.cc | 2 +- .../new_model_manager/cpu_queue_schedule.h | 3 +- .../load/new_model_manager/data_dumper.cc | 2 +- ge/graph/load/new_model_manager/data_dumper.h | 10 +- .../load/new_model_manager/data_inputer.cc | 2 +- .../load/new_model_manager/data_inputer.h | 2 +- .../load/new_model_manager/davinci_model.cc | 116 ++- .../load/new_model_manager/davinci_model.h | 19 +- .../new_model_manager/davinci_model_parser.cc | 2 +- .../new_model_manager/davinci_model_parser.h | 2 +- .../load/new_model_manager/model_manager.cc | 4 +- .../load/new_model_manager/model_manager.h | 2 +- .../load/new_model_manager/model_utils.cc | 164 ++-- ge/graph/load/new_model_manager/model_utils.h | 2 +- .../task_info/end_graph_task_info.cc | 2 +- .../task_info/end_graph_task_info.h | 2 +- .../task_info/event_record_task_info.cc | 2 +- .../task_info/event_record_task_info.h | 2 +- .../task_info/event_wait_task_info.cc | 2 +- .../task_info/event_wait_task_info.h | 2 +- .../task_info/fusion_start_task_info.cc | 2 +- .../task_info/fusion_start_task_info.h | 2 +- .../task_info/fusion_stop_task_info.cc | 2 +- .../task_info/fusion_stop_task_info.h | 2 +- .../task_info/hccl_task_info.cc | 2 +- .../task_info/hccl_task_info.h | 2 +- .../task_info/kernel_ex_task_info.cc | 10 +- .../task_info/kernel_ex_task_info.h | 2 +- .../task_info/kernel_task_info.cc | 22 +- .../task_info/kernel_task_info.h | 2 +- .../task_info/label_goto_ex_task_info.cc | 2 +- .../task_info/label_goto_ex_task_info.h | 2 +- .../task_info/label_set_task_info.cc | 2 +- .../task_info/label_set_task_info.h | 2 +- .../label_switch_by_index_task_info.cc | 6 +- .../label_switch_by_index_task_info.h | 2 +- .../task_info/memcpy_addr_async_task_info.cc | 17 +- .../task_info/memcpy_addr_async_task_info.h | 2 +- .../task_info/memcpy_async_task_info.cc | 79 +- .../task_info/memcpy_async_task_info.h | 16 +- .../task_info/profiler_trace_task_info.cc | 2 +- .../task_info/profiler_trace_task_info.h | 2 +- .../task_info/stream_active_task_info.cc | 2 +- .../task_info/stream_active_task_info.h | 2 +- .../task_info/stream_switch_task_info.cc | 2 +- .../task_info/stream_switch_task_info.h | 2 +- .../task_info/stream_switchn_task_info.cc | 36 +- .../task_info/stream_switchn_task_info.h | 2 +- .../task_info/super_kernel/super_kernel.cc | 2 +- .../task_info/super_kernel/super_kernel.h | 2 +- .../super_kernel/super_kernel_factory.cc | 2 +- .../super_kernel/super_kernel_factory.h | 2 +- .../new_model_manager/task_info/task_info.cc | 2 +- .../new_model_manager/task_info/task_info.h | 18 +- .../task_info/task_info_factory.h | 2 +- .../new_model_manager/tbe_handle_store.cc | 3 +- .../load/new_model_manager/tbe_handle_store.h | 2 +- ge/graph/load/new_model_manager/ts_mem_mall.h | 102 +++ .../new_model_manager/zero_copy_offset.cc | 2 +- .../load/new_model_manager/zero_copy_offset.h | 2 +- ge/graph/manager/graph_manager.cc | 266 ++++-- ge/graph/manager/graph_manager.h | 51 +- ge/graph/manager/host_mem_manager.cc | 3 +- ge/graph/manager/memory_api.cc | 39 +- ge/graph/manager/util/hcom_util.h | 4 + .../manager/util/variable_accelerate_ctrl.cc | 8 + .../manager/util/variable_accelerate_ctrl.h | 3 + ge/graph/optimize/common/params.h | 2 +- ge/graph/optimize/graph_optimize.cc | 6 +- ge/graph/optimize/graph_optimize.h | 8 +- ge/graph/optimize/mem_rw_conflict_optimize.cc | 9 +- .../optimizer/allreduce_fusion_pass.cc | 2 +- .../optimizer/allreduce_fusion_pass.h | 2 +- ge/graph/optimize/summary_optimize.cc | 2 +- ge/graph/partition/dynamic_shape_partition.h | 2 +- ge/graph/partition/engine_place.cc | 2 +- ge/graph/partition/engine_place.h | 2 +- ge/graph/partition/graph_partition.cc | 7 +- ge/graph/partition/graph_partition.h | 2 +- ge/graph/partition/stage_partition.cc | 377 ++++++++ ge/graph/partition/stage_partition.h | 67 ++ ge/graph/passes/addn_pass.cc | 2 +- ge/graph/passes/addn_pass.h | 2 +- .../passes/aicpu_constant_folding_pass.cc | 2 +- ge/graph/passes/aicpu_constant_folding_pass.h | 2 +- ge/graph/passes/assert_pass.cc | 2 +- ge/graph/passes/assert_pass.h | 3 +- ge/graph/passes/assign_pass.cc | 2 +- ge/graph/passes/assign_pass.h | 2 +- ge/graph/passes/atomic_addr_clean_pass.cc | 2 +- ge/graph/passes/atomic_addr_clean_pass.h | 2 +- ge/graph/passes/attach_stream_label_pass.cc | 23 +- ge/graph/passes/attach_stream_label_pass.h | 8 +- ge/graph/passes/base_pass.cc | 2 +- ge/graph/passes/base_pass.h | 2 +- ge/graph/passes/bitcast_pass.cc | 2 +- ge/graph/passes/bitcast_pass.h | 2 +- ge/graph/passes/cast_remove_pass.cc | 2 +- ge/graph/passes/cast_remove_pass.h | 2 +- ge/graph/passes/cast_translate_pass.cc | 2 +- ge/graph/passes/cast_translate_pass.h | 2 +- .../common_subexpression_elimination_pass.cc | 3 +- .../common_subexpression_elimination_pass.h | 3 +- ge/graph/passes/compile_nodes_pass.cc | 3 +- ge/graph/passes/compile_nodes_pass.h | 2 +- ge/graph/passes/cond_pass.cc | 3 +- ge/graph/passes/cond_pass.h | 3 +- ge/graph/passes/cond_remove_pass.cc | 3 +- ge/graph/passes/cond_remove_pass.h | 3 +- ge/graph/passes/constant_folding_pass.cc | 2 +- ge/graph/passes/constant_folding_pass.h | 2 +- ge/graph/passes/constant_fuse_same_pass.cc | 2 +- ge/graph/passes/constant_fuse_same_pass.h | 2 +- ge/graph/passes/control_trigger_pass.cc | 2 +- ge/graph/passes/control_trigger_pass.h | 2 +- ge/graph/passes/ctrl_edge_transfer_pass.cc | 2 +- ge/graph/passes/ctrl_edge_transfer_pass.h | 3 +- ge/graph/passes/data_pass.cc | 2 +- ge/graph/passes/data_pass.h | 2 +- ge/graph/passes/dimension_adjust_pass.cc | 2 +- ge/graph/passes/dimension_adjust_pass.h | 2 +- ge/graph/passes/dimension_compute_pass.cc | 3 +- ge/graph/passes/dimension_compute_pass.h | 2 +- ge/graph/passes/dropout_pass.cc | 2 +- ge/graph/passes/dropout_pass.h | 2 +- .../end_of_sequence_add_control_pass.cc | 2 +- .../passes/end_of_sequence_add_control_pass.h | 2 +- ge/graph/passes/enter_pass.cc | 67 +- ge/graph/passes/enter_pass.h | 5 +- ge/graph/passes/flow_ctrl_pass.cc | 2 +- ge/graph/passes/flow_ctrl_pass.h | 2 +- ge/graph/passes/folding_pass.h | 3 +- ge/graph/passes/for_pass.cc | 2 +- ge/graph/passes/for_pass.h | 3 +- ge/graph/passes/get_original_format_pass.cc | 2 +- ge/graph/passes/get_original_format_pass.h | 2 +- ge/graph/passes/global_step_insert_pass.cc | 2 +- ge/graph/passes/global_step_insert_pass.h | 2 +- ge/graph/passes/guarantee_const_pass.cc | 2 +- ge/graph/passes/guarantee_const_pass.h | 2 +- ge/graph/passes/hccl_group_pass.cc | 2 +- ge/graph/passes/hccl_group_pass.h | 2 +- ge/graph/passes/hccl_memcpy_pass.cc | 2 +- ge/graph/passes/hccl_memcpy_pass.h | 2 +- ge/graph/passes/identity_pass.cc | 2 +- ge/graph/passes/identity_pass.h | 2 +- ge/graph/passes/infershape_pass.cc | 2 +- ge/graph/passes/infershape_pass.h | 2 +- .../input_output_connection_identify_pass.cc | 2 +- .../input_output_connection_identify_pass.h | 2 +- ge/graph/passes/isolated_op_remove_pass.cc | 2 +- ge/graph/passes/isolated_op_remove_pass.h | 2 +- ge/graph/passes/iterator_op_pass.cc | 2 +- ge/graph/passes/iterator_op_pass.h | 2 +- ge/graph/passes/link_gen_mask_nodes_pass.cc | 2 +- ge/graph/passes/link_gen_mask_nodes_pass.h | 2 +- ge/graph/passes/mark_agnostic_pass.cc | 3 +- ge/graph/passes/mark_agnostic_pass.h | 3 +- .../passes/mark_graph_unknown_status_pass.cc | 2 +- .../passes/mark_graph_unknown_status_pass.h | 2 +- ge/graph/passes/mark_same_addr_pass.cc | 2 +- ge/graph/passes/mark_same_addr_pass.h | 2 +- ge/graph/passes/memcpy_addr_async_pass.cc | 2 +- ge/graph/passes/memcpy_addr_async_pass.h | 2 +- ge/graph/passes/merge_pass.cc | 2 +- ge/graph/passes/merge_pass.h | 2 +- ge/graph/passes/merge_to_stream_merge_pass.cc | 2 +- ge/graph/passes/merge_to_stream_merge_pass.h | 2 +- ge/graph/passes/multi_batch_clone_pass.cc | 2 +- ge/graph/passes/multi_batch_clone_pass.h | 2 +- ge/graph/passes/multi_batch_pass.cc | 2 +- ge/graph/passes/multi_batch_pass.h | 2 +- ge/graph/passes/net_output_pass.cc | 2 +- ge/graph/passes/net_output_pass.h | 2 +- ge/graph/passes/next_iteration_pass.cc | 2 +- ge/graph/passes/next_iteration_pass.h | 2 +- ge/graph/passes/no_use_reshape_remove_pass.cc | 2 +- ge/graph/passes/no_use_reshape_remove_pass.h | 2 +- .../passes/parallel_concat_start_op_pass.cc | 2 +- .../passes/parallel_concat_start_op_pass.h | 2 +- ge/graph/passes/pass_manager.cc | 2 +- ge/graph/passes/pass_utils.cc | 2 +- ge/graph/passes/pass_utils.h | 2 +- ge/graph/passes/permute_pass.cc | 2 +- ge/graph/passes/permute_pass.h | 2 +- .../passes/placeholder_with_default_pass.cc | 2 +- .../passes/placeholder_with_default_pass.h | 2 +- ge/graph/passes/prevent_gradient_pass.cc | 2 +- ge/graph/passes/prevent_gradient_pass.h | 2 +- ge/graph/passes/print_op_pass.cc | 2 +- ge/graph/passes/print_op_pass.h | 2 +- ge/graph/passes/prune_pass.cc | 2 +- ge/graph/passes/prune_pass.h | 2 +- .../passes/ref_identity_delete_op_pass.cc | 10 +- ge/graph/passes/ref_identity_delete_op_pass.h | 10 +- ge/graph/passes/remove_nodes_pass.cc | 3 +- ge/graph/passes/remove_nodes_pass.h | 3 +- ge/graph/passes/replace_transshape_pass.cc | 2 +- ge/graph/passes/replace_transshape_pass.h | 2 +- .../passes/replace_with_empty_const_pass.cc | 2 +- .../passes/replace_with_empty_const_pass.h | 2 +- ge/graph/passes/reshape_recovery_pass.cc | 3 +- ge/graph/passes/reshape_recovery_pass.h | 3 +- ge/graph/passes/reshape_remove_pass.cc | 2 +- ge/graph/passes/reshape_remove_pass.h | 2 +- .../passes/resource_pair_add_control_pass.cc | 2 +- .../passes/resource_pair_add_control_pass.h | 2 +- .../resource_pair_remove_control_pass.cc | 2 +- .../resource_pair_remove_control_pass.h | 2 +- .../same_transdata_breadth_fusion_pass.cc | 2 +- .../same_transdata_breadth_fusion_pass.h | 2 +- ge/graph/passes/save_pass.cc | 2 +- ge/graph/passes/save_pass.h | 2 +- .../passes/set_input_output_offset_pass.cc | 2 +- .../passes/set_input_output_offset_pass.h | 3 +- .../passes/shape_operate_op_remove_pass.cc | 2 +- .../passes/shape_operate_op_remove_pass.h | 2 +- ge/graph/passes/snapshot_pass.cc | 2 +- ge/graph/passes/snapshot_pass.h | 2 +- ge/graph/passes/stop_gradient_pass.cc | 2 +- ge/graph/passes/stop_gradient_pass.h | 2 +- .../passes/subexpression_migration_pass.cc | 3 +- .../passes/subexpression_migration_pass.h | 2 +- .../passes/subgraph_const_migration_pass.cc | 11 +- .../passes/subgraph_const_migration_pass.h | 2 +- ge/graph/passes/subgraph_pass.cc | 2 +- ge/graph/passes/subgraph_pass.h | 2 +- ge/graph/passes/switch_data_edges_bypass.cc | 2 +- ge/graph/passes/switch_data_edges_bypass.h | 3 +- .../passes/switch_dead_branch_elimination.cc | 2 +- .../passes/switch_dead_branch_elimination.h | 3 +- ge/graph/passes/switch_logic_remove_pass.cc | 2 +- ge/graph/passes/switch_logic_remove_pass.h | 3 +- .../passes/switch_to_stream_switch_pass.cc | 2 +- .../passes/switch_to_stream_switch_pass.h | 2 +- .../passes/transop_breadth_fusion_pass.cc | 2 +- ge/graph/passes/transop_breadth_fusion_pass.h | 2 +- ge/graph/passes/transop_depth_fusion_pass.cc | 2 +- ge/graph/passes/transop_depth_fusion_pass.h | 2 +- .../transop_nearby_allreduce_fusion_pass.cc | 2 +- .../transop_nearby_allreduce_fusion_pass.h | 2 +- .../transop_symmetry_elimination_pass.cc | 2 +- .../transop_symmetry_elimination_pass.h | 3 +- .../transop_without_reshape_fusion_pass.cc | 3 +- .../transop_without_reshape_fusion_pass.h | 3 +- ge/graph/passes/transpose_transdata_pass.cc | 2 +- ge/graph/passes/transpose_transdata_pass.h | 2 +- ge/graph/passes/unused_args_clean_pass.cc | 3 +- ge/graph/passes/unused_args_clean_pass.h | 3 +- ge/graph/passes/unused_const_pass.cc | 2 +- ge/graph/passes/unused_const_pass.h | 2 +- ge/graph/passes/unused_op_remove_pass.cc | 2 +- ge/graph/passes/unused_op_remove_pass.h | 2 +- ge/graph/passes/var_is_initialized_op_pass.cc | 2 +- ge/graph/passes/var_is_initialized_op_pass.h | 2 +- ge/graph/passes/variable_format_pass.cc | 2 +- ge/graph/passes/variable_format_pass.h | 2 +- ge/graph/passes/variable_op_pass.cc | 2 +- ge/graph/passes/variable_op_pass.h | 2 +- ge/graph/passes/variable_op_pass_bak.cc | 812 ++++++++++++++++++ ge/graph/passes/variable_op_pass_bak.h | 104 +++ ge/graph/passes/variable_prepare_op_pass.cc | 2 +- ge/graph/passes/variable_prepare_op_pass.h | 2 +- .../passes/variable_ref_delete_op_pass.cc | 2 +- ge/graph/passes/variable_ref_delete_op_pass.h | 2 +- ...ble_ref_useless_control_out_delete_pass.cc | 3 +- ...able_ref_useless_control_out_delete_pass.h | 3 +- ge/graph/preprocess/graph_preprocess.cc | 2 +- ge/graph/preprocess/graph_preprocess.h | 2 +- .../preprocess/insert_op/base_insert_op.h | 2 +- ge/graph/preprocess/insert_op/ge_aipp_op.cc | 14 +- ge/graph/preprocess/insert_op/ge_aipp_op.h | 2 +- .../insert_op/util_insert_aipp_op.cc | 2 +- .../insert_op/util_insert_aipp_op.h | 2 +- ge/graph/preprocess/multi_batch_copy_graph.cc | 66 +- ge/graph/preprocess/multi_batch_copy_graph.h | 5 +- ge/graph/preprocess/multi_batch_options.cc | 2 +- ge/graph/preprocess/multi_batch_options.h | 2 +- ge/host_cpu_engine/common/constant/constant.h | 2 +- ge/host_cpu_engine/engine/host_cpu_engine.cc | 2 +- ge/host_cpu_engine/engine/host_cpu_engine.h | 2 +- ge/host_cpu_engine/module.mk | 22 +- .../host_cpu_ops_kernel_builder.cc | 2 +- .../host_cpu_ops_kernel_builder.h | 2 +- .../host_cpu_ops_kernel_info.cc | 2 +- .../host_cpu_ops_kernel_info.h | 2 +- .../ops_kernel_store/op/host_op.cc | 2 +- .../ops_kernel_store/op/host_op.h | 2 +- ge/host_cpu_engine/ops_kernel_store/op/op.h | 2 +- .../ops_kernel_store/op/op_factory.cc | 2 +- .../ops_kernel_store/op/op_factory.h | 2 +- ge/host_kernels/add_kernel.cc | 2 +- ge/host_kernels/add_kernel.h | 2 +- ge/host_kernels/broadcast_args_kernel.cc | 2 +- ge/host_kernels/broadcast_args_kernel.h | 2 +- .../broadcast_gradient_args_kernel.cc | 3 +- .../broadcast_gradient_args_kernel.h | 2 +- ge/host_kernels/cast_kernel.cc | 2 +- ge/host_kernels/cast_kernel.h | 2 +- ge/host_kernels/concat_offset_kernel.cc | 2 +- ge/host_kernels/concat_offset_kernel.h | 2 +- ge/host_kernels/concat_v2_kernel.cc | 2 +- ge/host_kernels/concat_v2_kernel.h | 2 +- ge/host_kernels/dynamic_stitch_kernel.cc | 2 +- ge/host_kernels/dynamic_stitch_kernel.h | 2 +- ge/host_kernels/empty_kernel.cc | 2 +- ge/host_kernels/empty_kernel.h | 2 +- ge/host_kernels/expanddims_kernel.cc | 2 +- ge/host_kernels/expanddims_kernel.h | 2 +- ge/host_kernels/fill_kernel.cc | 2 +- ge/host_kernels/fill_kernel.h | 2 +- ge/host_kernels/floordiv_kernel.cc | 2 +- ge/host_kernels/floordiv_kernel.h | 2 +- ge/host_kernels/floormod_kernel.cc | 2 +- ge/host_kernels/floormod_kernel.h | 2 +- ge/host_kernels/gather_v2_kernel.cc | 2 +- ge/host_kernels/gather_v2_kernel.h | 2 +- ge/host_kernels/greater_kernel.cc | 2 +- ge/host_kernels/greater_kernel.h | 2 +- ge/host_kernels/identity_kernel.cc | 4 +- ge/host_kernels/identity_kernel.h | 4 +- ge/host_kernels/kernel_utils.cc | 2 +- ge/host_kernels/kernel_utils.h | 2 +- ge/host_kernels/maximum_kernel.cc | 2 +- ge/host_kernels/maximum_kernel.h | 2 +- ge/host_kernels/mul_kernel.cc | 2 +- ge/host_kernels/mul_kernel.h | 2 +- ge/host_kernels/pack_kernel.cc | 3 +- ge/host_kernels/pack_kernel.h | 3 +- ge/host_kernels/permute_kernel.cc | 2 +- ge/host_kernels/permute_kernel.h | 2 +- ge/host_kernels/range_kernel.cc | 2 +- ge/host_kernels/range_kernel.h | 2 +- ge/host_kernels/rank_kernel.cc | 2 +- ge/host_kernels/rank_kernel.h | 2 +- ge/host_kernels/reduce_prod_kernel.cc | 2 +- ge/host_kernels/reduce_prod_kernel.h | 2 +- ge/host_kernels/reformat_kernel.cc | 2 +- ge/host_kernels/reformat_kernel.h | 2 +- ge/host_kernels/reshape_kernel.cc | 2 +- ge/host_kernels/reshape_kernel.h | 2 +- ge/host_kernels/rsqrt_kernel.cc | 5 +- ge/host_kernels/rsqrt_kernel.h | 2 +- ge/host_kernels/shape_kernel.cc | 2 +- ge/host_kernels/shape_kernel.h | 2 +- ge/host_kernels/shape_n_kernel.cc | 2 +- ge/host_kernels/shape_n_kernel.h | 2 +- ge/host_kernels/size_kernel.cc | 2 +- ge/host_kernels/size_kernel.h | 2 +- ge/host_kernels/slice_d_kernel.cc | 2 +- ge/host_kernels/slice_d_kernel.h | 2 +- ge/host_kernels/slice_kernel.cc | 2 +- ge/host_kernels/slice_kernel.h | 2 +- ge/host_kernels/squeeze_kernel.cc | 2 +- ge/host_kernels/squeeze_kernel.h | 2 +- ge/host_kernels/ssd_prior_box_kernel.cc | 2 +- ge/host_kernels/ssd_prior_box_kernel.h | 2 +- ge/host_kernels/strided_slice_kernel.cc | 112 ++- ge/host_kernels/strided_slice_kernel.h | 8 +- ge/host_kernels/sub_kernel.cc | 2 +- ge/host_kernels/sub_kernel.h | 2 +- ge/host_kernels/transdata_kernel.cc | 2 +- ge/host_kernels/transdata_kernel.h | 2 +- ge/host_kernels/transpose_kernel.cc | 2 +- ge/host_kernels/transpose_kernel.h | 2 +- ge/host_kernels/unpack_kernel.cc | 2 +- ge/host_kernels/unpack_kernel.h | 2 +- ge/host_kernels/unsqueeze_kernel.cc | 2 +- ge/host_kernels/unsqueeze_kernel.h | 2 +- .../executor/hybrid_model_async_executor.cc | 2 + ge/hybrid/executor/subgraph_context.cc | 2 +- ge/hybrid/executor/subgraph_context.h | 2 +- ge/hybrid/executor/subgraph_executor.cc | 3 + ge/hybrid/model/hybrid_model.h | 6 +- ge/hybrid/model/node_item.cc | 4 +- .../compiledsubgraph/known_node_executor.cc | 2 +- .../compiledsubgraph/known_node_executor.h | 2 +- .../controlop/control_op_executor.cc | 1 + .../node_executor/hccl/hccl_node_executor.cc | 2 +- .../node_executor/hccl/hccl_node_executor.h | 2 +- .../host_cpu/kernel/assign_kernel.cc | 2 +- .../host_cpu/kernel/assign_kernel.h | 2 +- .../node_executor/host_cpu/kernel/kernel.h | 2 +- .../host_cpu/kernel/no_op_kernel.cc | 2 +- .../host_cpu/kernel/no_op_kernel.h | 2 +- .../host_cpu/kernel/random_uniform_kernel.cc | 2 +- .../host_cpu/kernel/random_uniform_kernel.h | 2 +- .../host_cpu/kernel/variable_kernel.cc | 2 +- .../host_cpu/kernel/variable_kernel.h | 2 +- .../node_executor/host_cpu/kernel_factory.cc | 2 +- .../node_executor/host_cpu/kernel_factory.h | 2 +- ge/inc/graph_pass.h | 186 ++-- ge/init/gelib.cc | 14 +- ge/init/gelib.h | 4 +- ge/ir_build/atc_ir_common.cc | 5 +- ge/ir_build/atc_ir_common.h | 3 +- ge/ir_build/ge_ir_build.cc | 12 +- ge/model/ge_model.cc | 1 + ge/model/ge_model.h | 2 +- ge/model/ge_root_model.cc | 2 +- ge/model/ge_root_model.h | 3 +- .../ops_kernel_builder_manager.cc | 165 ++++ .../ops_kernel_builder_manager.h | 57 ++ ge/opskernel_manager/ops_kernel_manager.cc | 10 +- ge/session/inner_session.cc | 21 +- ge/session/inner_session.h | 4 +- ge/session/omg.cc | 166 ++-- ge/session/readme.txt | 3 - ge/session/session_manager.cc | 2 +- ge/session/session_manager.h | 2 +- ge/single_op/single_op.cc | 2 +- ge/single_op/task/aicpu_task_builder.cc | 4 +- ge/single_op/task/aicpu_task_builder.h | 4 +- ge/single_op/task/op_task.cc | 162 ++-- ge/single_op/task/op_task.h | 28 +- ge/stub/gen_stubapi.py | 2 +- inc/common/blocking_queue.h | 141 --- inc/common/dynamic_aipp.h | 104 --- inc/common/npu_error_define.h | 94 -- inc/common/opskernel/ge_task_info.h | 74 -- inc/common/opskernel/ops_kernel_info_store.h | 88 -- inc/common/opskernel/ops_kernel_info_types.h | 66 -- inc/common/optimizer/graph_optimizer.h | 71 -- .../ai_core/common/aicore_util_attr_define.h | 41 - .../util/ai_core/common/aicore_util_types.h | 118 --- inc/common/util/ai_core/common/graph_comm.h | 107 --- .../util/ai_core/common/scope_allocator.h | 43 - .../param_calculate/tensorsize_calculator.h | 45 - inc/common/util/compress/compress.h | 37 - inc/common/util/error_manager/error_manager.h | 94 -- inc/common/util/platform_info.h | 101 --- inc/common/util/platform_info_def.h | 140 --- inc/external/ge/ge_api_error_codes.h | 2 +- inc/external/ge/ge_api_types.h | 125 +-- inc/external/ge/ge_ir_build.h | 35 +- inc/external/ge/ge_prof.h | 61 +- inc/framework/common/debug/log.h | 28 +- inc/framework/common/ge_inner_error_codes.h | 18 +- inc/framework/common/ge_types.h | 20 +- inc/framework/common/helper/model_helper.h | 16 +- inc/framework/common/helper/om_file_helper.h | 6 +- inc/framework/common/op/attr_value_util.h | 3 +- inc/framework/common/op/ge_op_utils.h | 3 +- inc/framework/common/scope_guard.h | 5 +- inc/framework/common/string_util.h | 6 +- inc/framework/common/types.h | 9 +- inc/framework/common/util.h | 122 +-- inc/framework/engine/dnnengine.h | 1 + inc/framework/executor/ge_executor.h | 24 +- inc/framework/generator/ge_generator.h | 15 +- inc/framework/memory/memory_api.h | 14 + inc/framework/memory/memory_assigner.h | 2 +- inc/framework/omg/omg.h | 3 - inc/framework/omg/omg_inner_types.h | 8 +- third_party/fwkacllib/inc/hccl/base.h | 10 + third_party/fwkacllib/inc/hccl/hccl_types.h | 2 + third_party/fwkacllib/inc/hccl/hcom.h | 9 + third_party/fwkacllib/inc/mmpa/mmpa_api.h | 4 +- third_party/fwkacllib/inc/ops/aipp.h | 8 +- third_party/fwkacllib/inc/ops/all_ops.h | 6 +- third_party/fwkacllib/inc/ops/array_ops.h | 39 +- third_party/fwkacllib/inc/ops/audio_ops.h | 6 +- third_party/fwkacllib/inc/ops/batch_ops.h | 6 +- third_party/fwkacllib/inc/ops/bitwise_ops.h | 6 +- .../fwkacllib/inc/ops/boosted_trees_ops.h | 6 +- .../inc/ops/candidate_sampling_ops.h | 6 +- third_party/fwkacllib/inc/ops/condtake_ops.h | 6 +- .../fwkacllib/inc/ops/control_flow_ops.h | 6 +- third_party/fwkacllib/inc/ops/ctc_ops.h | 6 +- third_party/fwkacllib/inc/ops/data_flow_ops.h | 6 +- .../inc/ops/elewise_calculation_ops.h | 61 +- .../fwkacllib/inc/ops/functional_ops.h | 6 +- third_party/fwkacllib/inc/ops/get_data_ops.h | 6 +- third_party/fwkacllib/inc/ops/hcom_ops.h | 43 +- third_party/fwkacllib/inc/ops/hvd_ops.h | 6 +- third_party/fwkacllib/inc/ops/image_ops.h | 6 +- third_party/fwkacllib/inc/ops/internal_ops.h | 9 +- third_party/fwkacllib/inc/ops/linalg_ops.h | 6 +- third_party/fwkacllib/inc/ops/logging_ops.h | 6 +- third_party/fwkacllib/inc/ops/lookup_ops.h | 6 +- third_party/fwkacllib/inc/ops/math_ops.h | 23 +- .../inc/ops/matrix_calculation_ops.h | 40 +- third_party/fwkacllib/inc/ops/max_pool_v3.h | 77 ++ .../fwkacllib/inc/ops/max_pool_v3_grad.h | 80 ++ .../fwkacllib/inc/ops/nn_batch_norm_ops.h | 6 +- .../fwkacllib/inc/ops/nn_calculation_ops.h | 322 +++++-- third_party/fwkacllib/inc/ops/nn_detect_ops.h | 122 ++- third_party/fwkacllib/inc/ops/nn_norm_ops.h | 65 +- third_party/fwkacllib/inc/ops/nn_ops.h | 6 +- .../fwkacllib/inc/ops/nn_pooling_ops.h | 148 +++- .../fwkacllib/inc/ops/nn_training_ops.h | 14 +- third_party/fwkacllib/inc/ops/no_op.h | 6 +- .../fwkacllib/inc/ops/nonlinear_fuc_ops.h | 6 +- .../fwkacllib/inc/ops/npu_loss_scale_ops.h | 25 +- third_party/fwkacllib/inc/ops/outfeed_ops.h | 6 +- third_party/fwkacllib/inc/ops/pad_ops.h | 79 +- third_party/fwkacllib/inc/ops/parsing_ops.h | 6 +- third_party/fwkacllib/inc/ops/quantize_ops.h | 6 +- .../fwkacllib/inc/ops/ragged_array_ops.h | 6 +- .../fwkacllib/inc/ops/ragged_conversion_ops.h | 6 +- .../fwkacllib/inc/ops/ragged_math_ops.h | 6 +- third_party/fwkacllib/inc/ops/random_ops.h | 27 +- third_party/fwkacllib/inc/ops/reduce_ops.h | 12 +- .../fwkacllib/inc/ops/resource_variable_ops.h | 60 +- third_party/fwkacllib/inc/ops/rnn.h | 82 +- third_party/fwkacllib/inc/ops/rpn_ops.h | 6 +- third_party/fwkacllib/inc/ops/save_ops.h | 6 +- third_party/fwkacllib/inc/ops/sdca_ops.h | 6 +- third_party/fwkacllib/inc/ops/selection_ops.h | 24 +- third_party/fwkacllib/inc/ops/set_ops.h | 6 +- third_party/fwkacllib/inc/ops/sparse_ops.h | 6 +- third_party/fwkacllib/inc/ops/spectral_ops.h | 6 +- .../fwkacllib/inc/ops/split_combination_ops.h | 6 +- third_party/fwkacllib/inc/ops/state_ops.h | 6 +- .../fwkacllib/inc/ops/stateful_random_ops.h | 6 +- .../fwkacllib/inc/ops/stateless_random_ops.h | 6 +- third_party/fwkacllib/inc/ops/string_ops.h | 6 +- third_party/fwkacllib/inc/ops/swap_co_ops.h | 6 +- .../fwkacllib/inc/ops/transformation_ops.h | 14 +- .../fwkacllib/inc/ops/warp_perspective_ops.h | 9 +- .../inc/register/op_kernel_registry.h | 1 - .../register/ops_kernel_builder_registry.h | 62 ++ third_party/fwkacllib/inc/runtime/base.h | 15 +- third_party/fwkacllib/inc/runtime/config.h | 4 +- third_party/fwkacllib/inc/runtime/context.h | 11 +- third_party/fwkacllib/inc/runtime/dev.h | 21 +- .../fwkacllib/inc/runtime/dvfsprofile.h | 4 +- third_party/fwkacllib/inc/runtime/event.h | 4 +- third_party/fwkacllib/inc/runtime/kernel.h | 4 +- third_party/fwkacllib/inc/runtime/mem.h | 59 +- third_party/fwkacllib/inc/runtime/rt_model.h | 4 +- third_party/fwkacllib/inc/runtime/stream.h | 4 +- third_party/fwkacllib/inc/tdt/status.h | 4 + .../fwkacllib/inc/tdt/tdt_host_interface.h | 87 ++ .../inc/toolchain/adx_datadump_server.h | 37 +- .../fwkacllib/inc/toolchain/prof_acl_api.h | 9 + third_party/patch/securec/securec.patch001 | 4 +- 730 files changed, 6452 insertions(+), 3663 deletions(-) create mode 100644 ge/ge_runtime/task/label_goto_task.cc rename inc/common/util/compress/compress_weight.h => ge/ge_runtime/task/label_goto_task.h (50%) create mode 100644 ge/ge_runtime/task/label_set_task.cc rename inc/common/optimizer/graph_optimizer_types.h => ge/ge_runtime/task/label_set_task.h (53%) create mode 100644 ge/ge_runtime/task/label_switch_task.cc create mode 100644 ge/ge_runtime/task/label_switch_task.h create mode 100644 ge/graph/load/new_model_manager/ts_mem_mall.h create mode 100644 ge/graph/partition/stage_partition.cc create mode 100644 ge/graph/partition/stage_partition.h create mode 100644 ge/graph/passes/variable_op_pass_bak.cc create mode 100644 ge/graph/passes/variable_op_pass_bak.h create mode 100644 ge/opskernel_manager/ops_kernel_builder_manager.cc create mode 100644 ge/opskernel_manager/ops_kernel_builder_manager.h delete mode 100644 ge/session/readme.txt delete mode 100644 inc/common/blocking_queue.h delete mode 100644 inc/common/dynamic_aipp.h delete mode 100644 inc/common/npu_error_define.h delete mode 100644 inc/common/opskernel/ge_task_info.h delete mode 100644 inc/common/opskernel/ops_kernel_info_store.h delete mode 100644 inc/common/opskernel/ops_kernel_info_types.h delete mode 100644 inc/common/optimizer/graph_optimizer.h delete mode 100644 inc/common/util/ai_core/common/aicore_util_attr_define.h delete mode 100644 inc/common/util/ai_core/common/aicore_util_types.h delete mode 100644 inc/common/util/ai_core/common/graph_comm.h delete mode 100644 inc/common/util/ai_core/common/scope_allocator.h delete mode 100644 inc/common/util/ai_core/param_calculate/tensorsize_calculator.h delete mode 100644 inc/common/util/compress/compress.h delete mode 100644 inc/common/util/error_manager/error_manager.h delete mode 100644 inc/common/util/platform_info.h delete mode 100644 inc/common/util/platform_info_def.h create mode 100644 third_party/fwkacllib/inc/ops/max_pool_v3.h create mode 100644 third_party/fwkacllib/inc/ops/max_pool_v3_grad.h create mode 100644 third_party/fwkacllib/inc/register/ops_kernel_builder_registry.h rename inc/common/util/ai_core/param_calculate/aicore_param_calculator.h => third_party/fwkacllib/inc/toolchain/adx_datadump_server.h (51%) diff --git a/ge/analyzer/analyzer.cc b/ge/analyzer/analyzer.cc index 9064da28..972aba7d 100755 --- a/ge/analyzer/analyzer.cc +++ b/ge/analyzer/analyzer.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -77,9 +77,8 @@ Status Analyzer::BuildJsonObject(uint64_t session_id, uint64_t graph_id) { std::lock_guard lg(mutex_); auto iter = graph_infos_.find(session_id); if (iter == graph_infos_.end()) { - auto p = new(std::nothrow) GraphInfo(); - GE_CHECK_NOTNULL(p); - std::shared_ptr graph_info(p); + std::shared_ptr graph_info(new(std::nothrow) GraphInfo()); + GE_CHECK_NOTNULL(graph_info); std::map> graph_map; graph_map[graph_id] = graph_info; graph_info->session_id = session_id; @@ -88,9 +87,8 @@ Status Analyzer::BuildJsonObject(uint64_t session_id, uint64_t graph_id) { } else { auto iter1 = (iter->second).find(graph_id); if (iter1 == (iter->second).end()) { - auto p = new(std::nothrow) GraphInfo(); - GE_CHECK_NOTNULL(p); - std::shared_ptr graph_info(p); + std::shared_ptr graph_info(new(std::nothrow) GraphInfo()); + GE_CHECK_NOTNULL(graph_info); graph_info->session_id = session_id; graph_info->graph_id = graph_id; (iter->second).insert({graph_id, graph_info}); diff --git a/ge/analyzer/analyzer.h b/ge/analyzer/analyzer.h index fd89b150..69b068cb 100755 --- a/ge/analyzer/analyzer.h +++ b/ge/analyzer/analyzer.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/client/ge_api.cc b/ge/client/ge_api.cc index 68c9fccd..522985fa 100644 --- a/ge/client/ge_api.cc +++ b/ge/client/ge_api.cc @@ -16,6 +16,7 @@ #include "ge/ge_api.h" #include +#include #include "common/debug/log.h" #include "framework/common/debug/ge_log.h" #include "common/ge/datatype_util.h" @@ -163,6 +164,9 @@ Status GEFinalize() { g_ge_initialized = false; } + // to avoid memory fragment, use malloc_trim to back free stack to system + malloc_trim(0); + GELOGT(TRACE_STOP, "GEFinalize finished"); return ret; } diff --git a/ge/client/ge_prof.cc b/ge/client/ge_prof.cc index f7fef4e9..97646f2a 100644 --- a/ge/client/ge_prof.cc +++ b/ge/client/ge_prof.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -324,10 +324,17 @@ Status aclgrphProfStop(aclgrphProfConfig *profiler_config) { return GE_PROF_NOT_INIT; } - Status ret = ProfStopProfiling(&profiler_config->config); - if (ret != SUCCESS) { - GELOGE(ret, "Stop profiling failed, prof result = %d", ret); - return ret; + for (uint32_t i = 0; i < profiler_config->config.devNums; i++) { + uint64_t data_type_config; + Status status = ProfGetDataTypeConfig(profiler_config->config.devIdList[i], data_type_config); + if (status != SUCCESS) { + GELOGE(status, "Prof get data type config failed, prof result = %d", status); + return status; + } + if (data_type_config != profiler_config->config.dataTypeConfig) { + GELOGE(FAILED, "data type config verify failed"); + return FAILED; + } } std::vector prof_params; @@ -344,12 +351,18 @@ Status aclgrphProfStop(aclgrphProfConfig *profiler_config) { command.module_index = profiler_config->config.dataTypeConfig; GELOGI("Profiling will stop, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str(), command.module_index); - ret = graph_loader.CommandHandle(command); + Status ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { GELOGE(ret, "Handle profiling command failed"); return FAILED; } + ret = ProfStopProfiling(&profiler_config->config); + if (ret != SUCCESS) { + GELOGE(ret, "Stop profiling failed, prof result = %d", ret); + return ret; + } + GELOGI("Successfully execute GraphProfStopProfiling."); return SUCCESS; } diff --git a/ge/common/auth/file_saver.cc b/ge/common/auth/file_saver.cc index 60d99c0b..e57a1eb2 100755 --- a/ge/common/auth/file_saver.cc +++ b/ge/common/auth/file_saver.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/auth/file_saver.h b/ge/common/auth/file_saver.h index 79e2126e..dc6b557f 100644 --- a/ge/common/auth/file_saver.h +++ b/ge/common/auth/file_saver.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/base64.h b/ge/common/base64.h index fb6c1870..0cb57e74 100644 --- a/ge/common/base64.h +++ b/ge/common/base64.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/context/ctx.cc b/ge/common/context/ctx.cc index 9fe2f8c7..f6ae364d 100755 --- a/ge/common/context/ctx.cc +++ b/ge/common/context/ctx.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/cust_aicpu_kernel_store.cc b/ge/common/cust_aicpu_kernel_store.cc index 86881b0e..c1f6157d 100755 --- a/ge/common/cust_aicpu_kernel_store.cc +++ b/ge/common/cust_aicpu_kernel_store.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/cust_aicpu_kernel_store.h b/ge/common/cust_aicpu_kernel_store.h index 033a636b..6dff0435 100755 --- a/ge/common/cust_aicpu_kernel_store.h +++ b/ge/common/cust_aicpu_kernel_store.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/debug/memory_dumper.cc b/ge/common/debug/memory_dumper.cc index d2b8d674..1a7d9db8 100644 --- a/ge/common/debug/memory_dumper.cc +++ b/ge/common/debug/memory_dumper.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/debug/memory_dumper.h b/ge/common/debug/memory_dumper.h index a71f86f4..4995f5f7 100755 --- a/ge/common/debug/memory_dumper.h +++ b/ge/common/debug/memory_dumper.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/dump/dump_manager.cc b/ge/common/dump/dump_manager.cc index 17019c5a..fbf9afe7 100644 --- a/ge/common/dump/dump_manager.cc +++ b/ge/common/dump/dump_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/dump/dump_manager.h b/ge/common/dump/dump_manager.h index 53a643f9..dbc89cc8 100644 --- a/ge/common/dump/dump_manager.h +++ b/ge/common/dump/dump_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/dump/dump_op.cc b/ge/common/dump/dump_op.cc index ca2dec98..e92ada05 100755 --- a/ge/common/dump/dump_op.cc +++ b/ge/common/dump/dump_op.cc @@ -172,18 +172,18 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { return RT_FAILED; } - constexpr int32_t ioAddrNum = 2; - constexpr uint32_t argsSize = sizeof(aicpu::AicpuParamHead) + ioAddrNum * sizeof(uint64_t); - char args[argsSize] = {0}; - auto paramHead = reinterpret_cast(args); - paramHead->length = argsSize; - paramHead->ioAddrNum = ioAddrNum; - auto ioAddr = reinterpret_cast(args + sizeof(aicpu::AicpuParamHead)); - ioAddr[0] = reinterpret_cast(proto_dev_mem_); - ioAddr[1] = reinterpret_cast(proto_size_dev_mem_); + constexpr int32_t io_addr_num = 2; + constexpr uint32_t args_size = sizeof(aicpu::AicpuParamHead) + io_addr_num * sizeof(uint64_t); + char args[args_size] = {0}; + auto param_head = reinterpret_cast(args); + param_head->length = args_size; + param_head->ioAddrNum = io_addr_num; + auto io_addr = reinterpret_cast(args + sizeof(aicpu::AicpuParamHead)); + io_addr[0] = reinterpret_cast(proto_dev_mem_); + io_addr[1] = reinterpret_cast(proto_size_dev_mem_); rt_ret = rtCpuKernelLaunch(nullptr, kDumpKernelsDumpOp, 1, // blockDim default 1 - args, argsSize, + args, args_size, nullptr, // no need smDesc stream_); if (rt_ret != RT_ERROR_NONE) { diff --git a/ge/common/dump/dump_op.h b/ge/common/dump/dump_op.h index d59962e6..b3042245 100755 --- a/ge/common/dump/dump_op.h +++ b/ge/common/dump/dump_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/dump/dump_properties.cc b/ge/common/dump/dump_properties.cc index a4540367..360cfcd8 100644 --- a/ge/common/dump/dump_properties.cc +++ b/ge/common/dump/dump_properties.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/dump/dump_properties.h b/ge/common/dump/dump_properties.h index 682d2d08..689611a0 100644 --- a/ge/common/dump/dump_properties.h +++ b/ge/common/dump/dump_properties.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/dump/dump_server.cc b/ge/common/dump/dump_server.cc index a3dc5804..1f95dc3a 100644 --- a/ge/common/dump/dump_server.cc +++ b/ge/common/dump/dump_server.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/fmk_error_codes.cc b/ge/common/fmk_error_codes.cc index ddb8089d..3ad8503a 100755 --- a/ge/common/fmk_error_codes.cc +++ b/ge/common/fmk_error_codes.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/datatype_transfer.cc b/ge/common/formats/format_transfers/datatype_transfer.cc index 725eed98..1a6ef167 100644 --- a/ge/common/formats/format_transfers/datatype_transfer.cc +++ b/ge/common/formats/format_transfers/datatype_transfer.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/datatype_transfer.h b/ge/common/formats/format_transfers/datatype_transfer.h index 22313e90..4d93fd6c 100755 --- a/ge/common/formats/format_transfers/datatype_transfer.h +++ b/ge/common/formats/format_transfers/datatype_transfer.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc index 12d13e44..88dd2c5d 100644 --- a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc +++ b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.h b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.h index 8ff704eb..d2156018 100644 --- a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.h +++ b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc index 4060a3b2..76d8696a 100644 --- a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc +++ b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h" #include diff --git a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h index 6a31a746..41581dec 100755 --- a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h +++ b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_COMMON_FORMATS_FORMAT_TRANSFERS_FORMAT_TRANSFER_DHWCN_FRACTAL_Z_3D_H_ #define GE_COMMON_FORMATS_FORMAT_TRANSFERS_FORMAT_TRANSFER_DHWCN_FRACTAL_Z_3D_H_ diff --git a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc index 457469c7..9de2e3a0 100644 --- a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc +++ b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h" #include diff --git a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h index 728cfbdc..1c4986b8 100755 --- a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h +++ b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_COMMON_FORMATS_FORMAT_TRANSFERS_FORMAT_TRANSFER_DHWNC_FRACTAL_Z_3D_TRANSPOSE_H_ #define GE_COMMON_FORMATS_FORMAT_TRANSFERS_FORMAT_TRANSFER_DHWNC_FRACTAL_Z_3D_TRANSPOSE_H_ diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc index cb4de6b5..bda027d4 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_nz.h b/ge/common/formats/format_transfers/format_transfer_fractal_nz.h index 68abdbc8..49e82884 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_nz.h +++ b/ge/common/formats/format_transfers/format_transfer_fractal_nz.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_z.cc b/ge/common/formats/format_transfers/format_transfer_fractal_z.cc index 0e941486..030e61f3 100644 --- a/ge/common/formats/format_transfers/format_transfer_fractal_z.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_z.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_z.h b/ge/common/formats/format_transfers/format_transfer_fractal_z.h index d640eb60..5ae83303 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_z.h +++ b/ge/common/formats/format_transfers/format_transfer_fractal_z.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc index 009bce2b..4ff381b0 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_zz.h b/ge/common/formats/format_transfers/format_transfer_fractal_zz.h index c1898e5b..93f40920 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_zz.h +++ b/ge/common/formats/format_transfers/format_transfer_fractal_zz.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc index 2076f6f9..49c74bca 100755 --- a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc +++ b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.h b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.h index 4cc393d3..a7efbfcb 100644 --- a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.h +++ b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc index 042559ca..6f1600a1 100755 --- a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc +++ b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.h b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.h index 9b22a7e0..af2cedd0 100755 --- a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.h +++ b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc b/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc index 98bd1807..755a947b 100755 --- a/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc +++ b/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.h b/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.h index efeb2506..41654304 100755 --- a/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.h +++ b/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc index d2f8cf30..ef1ef4be 100755 --- a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.h b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.h index 079cb449..81d7358e 100644 --- a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.h +++ b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc index 31744d86..cf6e2835 100755 --- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc +++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.h b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.h index 453c843e..6d599933 100755 --- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.h +++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc index ee3f9917..327c466b 100755 --- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc +++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.h b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.h index 8b456019..8ff60bb1 100755 --- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.h +++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc index 6f065fc5..2039da47 100644 --- a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc +++ b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.h b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.h index d9a3490c..4a0fce95 100755 --- a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.h +++ b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc index ebc15da7..cbb01798 100755 --- a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h index dd31574d..c6269579 100644 --- a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h +++ b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc index 3ae7a924..aeeb5dc4 100755 --- a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h index 47c0d50e..fb190f54 100755 --- a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h +++ b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_transpose.cc b/ge/common/formats/format_transfers/format_transfer_transpose.cc index 19f54040..4d034ecf 100755 --- a/ge/common/formats/format_transfers/format_transfer_transpose.cc +++ b/ge/common/formats/format_transfers/format_transfer_transpose.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_transpose.h b/ge/common/formats/format_transfers/format_transfer_transpose.h index 7fa19ff0..370f4368 100755 --- a/ge/common/formats/format_transfers/format_transfer_transpose.h +++ b/ge/common/formats/format_transfers/format_transfer_transpose.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/formats.cc b/ge/common/formats/formats.cc index 697e16ad..1456b58a 100755 --- a/ge/common/formats/formats.cc +++ b/ge/common/formats/formats.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/formats.h b/ge/common/formats/formats.h index 52ae84ad..ed8a74b9 100644 --- a/ge/common/formats/formats.h +++ b/ge/common/formats/formats.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/utils/formats_definitions.h b/ge/common/formats/utils/formats_definitions.h index 7f873f1b..48257664 100755 --- a/ge/common/formats/utils/formats_definitions.h +++ b/ge/common/formats/utils/formats_definitions.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/utils/formats_trans_utils.cc b/ge/common/formats/utils/formats_trans_utils.cc index e184a866..23da0f74 100755 --- a/ge/common/formats/utils/formats_trans_utils.cc +++ b/ge/common/formats/utils/formats_trans_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/utils/formats_trans_utils.h b/ge/common/formats/utils/formats_trans_utils.h index a480b814..8b6f0604 100755 --- a/ge/common/formats/utils/formats_trans_utils.h +++ b/ge/common/formats/utils/formats_trans_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/fp16_t.cc b/ge/common/fp16_t.cc index 2f94323d..7b111e63 100755 --- a/ge/common/fp16_t.cc +++ b/ge/common/fp16_t.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/fp16_t.h b/ge/common/fp16_t.h index 0c5cd17b..0fda2cd2 100755 --- a/ge/common/fp16_t.h +++ b/ge/common/fp16_t.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/ge/datatype_util.cc b/ge/common/ge/datatype_util.cc index d99f13c1..dc74a8a1 100755 --- a/ge/common/ge/datatype_util.cc +++ b/ge/common/ge/datatype_util.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ std::map> g_reverse_translatable_data_ty {ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}}, {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}}; -static const std::map g_dump_data_type_map = { +std::map g_dump_data_type_map = { // key:ge datatype,value:proto datatype {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, {ge::DT_FLOAT, ge::proto::DT_FLOAT}, diff --git a/ge/common/ge/datatype_util.h b/ge/common/ge/datatype_util.h index e42b25a7..170c54cd 100644 --- a/ge/common/ge/datatype_util.h +++ b/ge/common/ge/datatype_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/ge/ge_util.h b/ge/common/ge/ge_util.h index 52e7c370..c6319bd3 100644 --- a/ge/common/ge/ge_util.h +++ b/ge/common/ge/ge_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/ge/op_tiling_manager.cc b/ge/common/ge/op_tiling_manager.cc index 251634e2..38bb7955 100644 --- a/ge/common/ge/op_tiling_manager.cc +++ b/ge/common/ge/op_tiling_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/ge/op_tiling_manager.h b/ge/common/ge/op_tiling_manager.h index d4e7f34e..47279e72 100644 --- a/ge/common/ge/op_tiling_manager.h +++ b/ge/common/ge/op_tiling_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/ge/plugin_manager.cc b/ge/common/ge/plugin_manager.cc index 57d51223..1de4a386 100644 --- a/ge/common/ge/plugin_manager.cc +++ b/ge/common/ge/plugin_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/ge/plugin_manager.h b/ge/common/ge/plugin_manager.h index 903367a3..b35a631a 100755 --- a/ge/common/ge/plugin_manager.h +++ b/ge/common/ge/plugin_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/ge/tbe_plugin_manager.cc b/ge/common/ge/tbe_plugin_manager.cc index 92da8e14..3979b989 100755 --- a/ge/common/ge/tbe_plugin_manager.cc +++ b/ge/common/ge/tbe_plugin_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/ge/tbe_plugin_manager.h b/ge/common/ge/tbe_plugin_manager.h index 41db8ef9..b901c6ff 100755 --- a/ge/common/ge/tbe_plugin_manager.h +++ b/ge/common/ge/tbe_plugin_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/helper/model_cache_helper.cc b/ge/common/helper/model_cache_helper.cc index d1e76447..e84e0077 100755 --- a/ge/common/helper/model_cache_helper.cc +++ b/ge/common/helper/model_cache_helper.cc @@ -1496,7 +1496,6 @@ Status ModelCacheHelper::ParseMemResourceFromJson(const Json &json, map(); uint64_t var_mem_size = mem_resource_json[kVarMemSize].get(); diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index bb4502c7..36bb90cc 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/helper/om_file_helper.cc b/ge/common/helper/om_file_helper.cc index 39cd7ad7..74a9b58a 100644 --- a/ge/common/helper/om_file_helper.cc +++ b/ge/common/helper/om_file_helper.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/kernel_store.cc b/ge/common/kernel_store.cc index e339b30c..1bea58d8 100755 --- a/ge/common/kernel_store.cc +++ b/ge/common/kernel_store.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/kernel_store.h b/ge/common/kernel_store.h index b3f4a62e..d73f26c5 100755 --- a/ge/common/kernel_store.h +++ b/ge/common/kernel_store.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/math/fp16_math.cc b/ge/common/math/fp16_math.cc index e465c953..56183ced 100755 --- a/ge/common/math/fp16_math.cc +++ b/ge/common/math/fp16_math.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/math/fp16_math.h b/ge/common/math/fp16_math.h index 48559eb3..c3a4eb28 100755 --- a/ge/common/math/fp16_math.h +++ b/ge/common/math/fp16_math.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/math/math_util.h b/ge/common/math/math_util.h index 3255e3c1..b96ef5fa 100755 --- a/ge/common/math/math_util.h +++ b/ge/common/math/math_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/math_util.h b/ge/common/math_util.h index 913a1572..bcb7ebc4 100755 --- a/ge/common/math_util.h +++ b/ge/common/math_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/model_parser/base.cc b/ge/common/model_parser/base.cc index bc38cea8..3b6b9407 100644 --- a/ge/common/model_parser/base.cc +++ b/ge/common/model_parser/base.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/model_parser/base.h b/ge/common/model_parser/base.h index 75db8b11..22d58ace 100755 --- a/ge/common/model_parser/base.h +++ b/ge/common/model_parser/base.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/model_saver.cc b/ge/common/model_saver.cc index fb1cd0a7..1a623199 100755 --- a/ge/common/model_saver.cc +++ b/ge/common/model_saver.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/model_saver.h b/ge/common/model_saver.h index 6da0a78c..411d5e35 100644 --- a/ge/common/model_saver.h +++ b/ge/common/model_saver.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/op/attr_value_util.cc b/ge/common/op/attr_value_util.cc index 4315a25d..ae7576a8 100644 --- a/ge/common/op/attr_value_util.cc +++ b/ge/common/op/attr_value_util.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/op/ge_op_utils.cc b/ge/common/op/ge_op_utils.cc index 579190d6..d7d56ec5 100644 --- a/ge/common/op/ge_op_utils.cc +++ b/ge/common/op/ge_op_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 9a2b24a0..ebb50027 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,12 +51,15 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana return profiling_manager; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options, - bool convert_2_phy_device_id) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) { #ifdef DAVINCI_SUPPORT_PROFILING vector().swap(device_id_); job_id_ = options.job_id; + GELOGI("ProfilingManager::Init job_id:%s", job_id_.c_str()); + + + Status ret; if (!recv_profiling_config_.empty()) { GELOGI("Profiling json config from acl:%s", recv_profiling_config_.c_str()); @@ -64,18 +67,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In } else { ret = InitFromOptions(options); if (ret == SUCCESS && is_load_profiling_) { - // profiling need phy device id - if (!convert_2_phy_device_id) { - device_id_.push_back(options.device_id); - } else { - uint32_t phy_device_id = 0; - rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast(options.device_id), &phy_device_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); - return FAILED; - } - device_id_.push_back(phy_device_id); - } + device_id_.push_back(options.device_id); } } if (ret != SUCCESS) { @@ -554,25 +546,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr return; } GELOGI("current logic_device_id:%d", logic_device_id); - - uint32_t phy_device_id = 0; - rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); - return; - } - GELOGI("current phy_device_id:%d", phy_device_id); if (!is_acl_api_mode_) { - auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id); + auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id); if (ret == device_id_.end()) { GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); return; } } GELOGI("start ProfilingTaskDescInfo."); - ProfilingTaskDescInfo(task_desc_info, phy_device_id); + ProfilingTaskDescInfo(task_desc_info, logic_device_id); GELOGI("start ProfilingGraphDescInfo."); - ProfilingGraphDescInfo(compute_graph_desc_info, phy_device_id); + ProfilingGraphDescInfo(compute_graph_desc_info, logic_device_id); GELOGI("Report profiling data for GE end."); #endif } diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index a8f16deb..ed14f573 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,7 +69,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { ProfilingManager(); virtual ~ProfilingManager(); static ProfilingManager &Instance(); - ge::Status Init(const Options &options, bool convert_2_phy_device_id = false); + ge::Status Init(const Options &options); ge::Status InitFromOptions(const Options &options); ge::Status InitFromAclCfg(const std::string &config); ge::Status StartProfiling(int32_t iter, int32_t device_id); diff --git a/ge/common/properties_manager.cc b/ge/common/properties_manager.cc index 3ca5bd27..99aeefb1 100644 --- a/ge/common/properties_manager.cc +++ b/ge/common/properties_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/properties_manager.h b/ge/common/properties_manager.h index 634113a8..9ba7f88e 100644 --- a/ge/common/properties_manager.h +++ b/ge/common/properties_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/singleton.h b/ge/common/singleton.h index 314e824e..1a347bfe 100755 --- a/ge/common/singleton.h +++ b/ge/common/singleton.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_COMMON_SINGLETON_H_ #define GE_COMMON_SINGLETON_H_ diff --git a/ge/common/tbe_kernel_store.cc b/ge/common/tbe_kernel_store.cc index 2fb9a04a..d988d751 100755 --- a/ge/common/tbe_kernel_store.cc +++ b/ge/common/tbe_kernel_store.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/tbe_kernel_store.h b/ge/common/tbe_kernel_store.h index 6304af50..ab1ab9b4 100755 --- a/ge/common/tbe_kernel_store.h +++ b/ge/common/tbe_kernel_store.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/thread_pool.cc b/ge/common/thread_pool.cc index dead0127..700892f2 100644 --- a/ge/common/thread_pool.cc +++ b/ge/common/thread_pool.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/thread_pool.h b/ge/common/thread_pool.h index e173618f..92157275 100755 --- a/ge/common/thread_pool.h +++ b/ge/common/thread_pool.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/types.cc b/ge/common/types.cc index 1b96b094..b1847014 100755 --- a/ge/common/types.cc +++ b/ge/common/types.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -384,6 +384,7 @@ REGISTER_OPTYPE_DEFINE(HCOMREDUCESCATTER, "HcomReduceScatter"); REGISTER_OPTYPE_DEFINE(HCOMSEND, "HcomSend"); REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead"); +REGISTER_OPTYPE_DEFINE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite"); REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign"); diff --git a/ge/common/util.cc b/ge/common/util.cc index e41f3dbd..3aa1df7a 100644 --- a/ge/common/util.cc +++ b/ge/common/util.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -472,7 +472,7 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str return true; } - ret = regexec(®, str.c_str(), 0, nullptr, 0); + ret = regexec(®, str.c_str(), 0, NULL, 0); if (ret) { regerror(ret, ®, ebuff, kMaxBuffSize); GELOGE(ge::PARAM_INVALID, "regexec failed, reason: %s", ebuff); diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 6c22c38a..2a2719d0 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/executor/module.mk b/ge/executor/module.mk index bb642da9..8638ff91 100755 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -50,6 +50,7 @@ local_ge_executor_src_files := \ ../graph/load/new_model_manager/task_info/end_graph_task_info.cc \ ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ + ../opskernel_manager/ops_kernel_builder_manager.cc \ ../single_op/single_op_manager.cc \ ../single_op/single_op_model.cc \ ../single_op/single_op.cc \ @@ -104,6 +105,12 @@ LOCAL_SRC_FILES := $(local_ge_executor_src_files) LOCAL_C_INCLUDES := $(local_ge_executor_c_include) LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library) + +LOCAL_SHARED_LIBRARIES += libascend_hal + +LOCAL_STATIC_LIBRARIES := \ + libmsprofiler \ + ifeq ($(device_os),android) LOCAL_LDFLAGS += -ldl LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog @@ -140,6 +147,10 @@ LOCAL_SHARED_LIBRARIES := \ libregister \ libmsprof \ liberror_manager \ + stub/libascend_hal \ + +LOCAL_STATIC_LIBRARIES := \ + libmsprofiler \ LOCAL_LDFLAGS += $(local_ge_executor_ldflags) diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index 36bcb603..9a0bbd94 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -42,6 +42,7 @@ GRAPH_MANAGER_LOCAL_SRC_FILES := \ session/session_manager.cc \ engine_manager/dnnengine_manager.cc \ opskernel_manager/ops_kernel_manager.cc \ + opskernel_manager/ops_kernel_builder_manager.cc \ graph/manager/graph_manager.cc \ graph/manager/graph_manager_utils.cc \ graph/manager/graph_context.cc \ @@ -57,6 +58,7 @@ GRAPH_MANAGER_LOCAL_SRC_FILES := \ graph/partition/engine_place.cc \ graph/partition/graph_partition.cc \ graph/partition/dynamic_shape_partition.cc \ + graph/partition/stage_partition.cc \ generator/ge_generator.cc \ generator/generator_api.cc \ graph/manager/graph_var_manager.cc \ @@ -357,7 +359,7 @@ LOCAL_MODULE := libge_compiler LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 # from ome_inference.mk -LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP +LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE ifeq ($(DEBUG), 1) LOCAL_CFLAGS += -g -O0 endif @@ -420,7 +422,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := libge_compiler LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_FILE LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP +LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE LOCAL_CFLAGS += -DOMG_DEVICE_VERSION LOCAL_CFLAGS += -O2 LOCAL_MODULE_CLASS := SHARED_LIBRARIES diff --git a/ge/ge_local_engine/common/constant/constant.h b/ge/ge_local_engine/common/constant/constant.h index 42084f2a..c517d267 100644 --- a/ge/ge_local_engine/common/constant/constant.h +++ b/ge/ge_local_engine/common/constant/constant.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/engine/ge_local_engine.cc b/ge/ge_local_engine/engine/ge_local_engine.cc index 58f24d45..9525e81b 100755 --- a/ge/ge_local_engine/engine/ge_local_engine.cc +++ b/ge/ge_local_engine/engine/ge_local_engine.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/engine/ge_local_engine.h b/ge/ge_local_engine/engine/ge_local_engine.h index 65dfe65b..e5f9a24d 100644 --- a/ge/ge_local_engine/engine/ge_local_engine.h +++ b/ge/ge_local_engine/engine/ge_local_engine.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc index bee9db76..6bda6e51 100755 --- a/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "host_cpu_engine.h" #include #include "graph/common/omg_util.h" @@ -31,26 +32,6 @@ namespace { case (DTYPE): { \ GeTensorPtr ge_tensor = nullptr; \ if (need_create_flag) { \ - int64_t num_size = out_desc.GetShape().IsScalar() ? 1 : out_desc.GetShape().GetShapeSize(); \ - if (out_desc.GetShape().IsUnknownShape()) { \ - std::vector> range; \ - if (out_desc.GetShapeRange(range) != GRAPH_SUCCESS) { \ - GELOGE(INTERNAL_ERROR, "Get shape range failed, node:%s", op_desc->GetName().c_str()); \ - return INTERNAL_ERROR; \ - } \ - int64_t max_range_size = 1; \ - for (const auto &item : range) { \ - FMK_INT64_MULCHECK(max_range_size, item.second); \ - max_range_size *= item.second; \ - } \ - num_size = max_range_size; \ - } \ - if (num_size < 0) { \ - GELOGE(INTERNAL_ERROR, "node:%s, get size for output %zu failed, num=%lld", \ - op_desc->GetName().c_str(), i, num_size); \ - return INTERNAL_ERROR; \ - } \ - auto data_num = static_cast(num_size); \ GELOGI("node:%s allocate output %zu start, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE)); \ std::unique_ptr buf(new (std::nothrow) TYPE[data_num]()); \ if (buf == nullptr) { \ @@ -91,6 +72,29 @@ const char *kEnvKeyOppPath = "ASCEND_OPP_PATH"; const char *kHostCpuLibRelativePath = "/op_impl/built-in/host_cpu"; } +Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) { + int64_t num_size = out_desc.GetShape().IsScalar() ? 1 : out_desc.GetShape().GetShapeSize(); + if (out_desc.GetShape().IsUnknownShape()) { + std::vector> range; + if (out_desc.GetShapeRange(range) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Get shape range failed."); + return INTERNAL_ERROR; + } + int64_t max_range_size = 1; + for (const auto& item : range) { + FMK_INT64_MULCHECK(max_range_size, item.second); + max_range_size *= item.second; + } + num_size = max_range_size; + } + if (num_size < 0) { + GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%lld.", num_size); + return INTERNAL_ERROR; + } + data_num = static_cast(num_size); + return SUCCESS; +} + void HostCpuEngine::CloseSo() { for (auto handle : lib_handles_) { if (dlclose(handle) != 0) { @@ -173,13 +177,20 @@ Status HostCpuEngine::PrepareOutputs(const ge::ConstOpDescPtr &op_desc, vector &outputs, map &named_outputs) { if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) { - GELOGW("size of ouputs not match, size of outputs = %zu, exactly output_num=%zu.", + GELOGW("size of outputs not match, size of outputs = %zu, exactly output_num=%zu.", outputs.size(), op_desc->GetOutputsSize()); outputs.clear(); } bool need_create_flag = (outputs.size() != op_desc->GetOutputsSize()); for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { const auto &out_desc = op_desc->GetOutputDesc(i); + uint64_t data_num = 0; + if (need_create_flag) { + if (GetDataNumber(out_desc, data_num) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "node:%s, get size for output %zu failed", op_desc->GetName().c_str(), i); + return INTERNAL_ERROR; + } + } switch (out_desc.GetDataType()) { CREATE_OUTPUT_CASE(DT_BOOL, bool) CREATE_OUTPUT_CASE(DT_INT8, int8_t) diff --git a/ge/ge_local_engine/engine/host_cpu_engine.h b/ge/ge_local_engine/engine/host_cpu_engine.h index cc6b578c..1987138d 100644 --- a/ge/ge_local_engine/engine/host_cpu_engine.h +++ b/ge/ge_local_engine/engine/host_cpu_engine.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ #define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ @@ -20,7 +21,7 @@ #include "framework/common/ge_inner_error_codes.h" #include "graph/node.h" #include "graph/operator.h" -#include "register/register.h" +#include "inc/register/register.h" namespace ge { class HostCpuEngine { diff --git a/ge/ge_local_engine/module.mk b/ge/ge_local_engine/module.mk index 574f08b8..3daa9686 100755 --- a/ge/ge_local_engine/module.mk +++ b/ge/ge_local_engine/module.mk @@ -48,7 +48,7 @@ include ${BUILD_HOST_SHARED_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := atclib/libge_local_engine LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := @@ -124,3 +124,23 @@ LOCAL_SRC_FILES := $(ops_kernel_builder_src_files) LOCAL_C_INCLUDES := $(local_lib_inc_path) include ${BUILD_HOST_STATIC_LIBRARY} + +#compiler for device libge_local_opskernel_builder.a +include $(CLEAR_VARS) +LOCAL_MODULE := libge_local_opskernel_builder +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -std=c++11 +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := libprotobuf \ + libregister \ + libgraph \ + +LOCAL_SHARED_LIBRARIES := libc_sec \ + libslog \ + +LOCAL_SRC_FILES := $(ops_kernel_builder_src_files) + +LOCAL_C_INCLUDES := $(local_lib_inc_path) + +include ${BUILD_STATIC_LIBRARY} \ No newline at end of file diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc index 9496d0fc..27a6c01f 100644 --- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc +++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h index 8a7dafe2..8cb20451 100644 --- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h +++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc index 773abd21..73f51e51 100755 --- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc +++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h index 3dbef99e..00636859 100755 --- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h +++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc index b2f3d095..badca5a3 100755 --- a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc +++ b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h index 55587b2e..ebaeef2d 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h +++ b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/no_op.cc b/ge/ge_local_engine/ops_kernel_store/op/no_op.cc index 51c65ce0..62fe1b5d 100755 --- a/ge/ge_local_engine/ops_kernel_store/op/no_op.cc +++ b/ge/ge_local_engine/ops_kernel_store/op/no_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/no_op.h b/ge/ge_local_engine/ops_kernel_store/op/no_op.h index 40e5766b..31199b25 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/no_op.h +++ b/ge/ge_local_engine/ops_kernel_store/op/no_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/op.cc b/ge/ge_local_engine/ops_kernel_store/op/op.cc index 11229b2c..0a5625de 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/op.cc +++ b/ge/ge_local_engine/ops_kernel_store/op/op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/op.h b/ge/ge_local_engine/ops_kernel_store/op/op.h index c5a3df7a..cc73c01a 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/op.h +++ b/ge/ge_local_engine/ops_kernel_store/op/op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc b/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc index c57b4f4d..49fc1084 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc +++ b/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/op_factory.h b/ge/ge_local_engine/ops_kernel_store/op/op_factory.h index 0faab508..6d0c16f4 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/op_factory.h +++ b/ge/ge_local_engine/ops_kernel_store/op/op_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 6947e679..f654867d 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -111,6 +111,7 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/mark_same_addr_pass.cc \ graph/passes/mark_graph_unknown_status_pass.cc \ graph/partition/dynamic_shape_partition.cc \ + graph/partition/stage_partition.cc \ graph/passes/base_pass.cc \ graph/passes/bitcast_pass.cc \ graph/passes/cast_remove_pass.cc \ @@ -243,6 +244,7 @@ LIBGE_LOCAL_SRC_FILES := \ model/ge_root_model.cc \ omm/csa_interact.cc \ opskernel_manager/ops_kernel_manager.cc \ + opskernel_manager/ops_kernel_builder_manager.cc \ session/inner_session.cc \ session/session_manager.cc \ single_op/single_op.cc \ @@ -359,6 +361,7 @@ LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) LOCAL_STATIC_LIBRARIES := libge_memory \ libadump_server \ + libmsprofiler \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ diff --git a/ge/ge_runtime/model_context.h b/ge/ge_runtime/model_context.h index 8860f0da..259ff91f 100755 --- a/ge/ge_runtime/model_context.h +++ b/ge/ge_runtime/model_context.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,8 +27,13 @@ class ModelContext { ModelContext(uint32_t device_id, uint64_t session_id, int32_t priority, rtModel_t rt_model_handle, rtStream_t rt_model_stream, const std::vector &stream_list, const std::vector &label_list, const std::vector &event_list) - : device_id_(device_id), session_id_(session_id), priority_(priority), rt_model_handle_(rt_model_handle), - rt_model_stream_(rt_model_stream), stream_list_(stream_list), label_list_(label_list), + : device_id_(device_id), + session_id_(session_id), + priority_(priority), + rt_model_handle_(rt_model_handle), + rt_model_stream_(rt_model_stream), + stream_list_(stream_list), + label_list_(label_list), event_list_(event_list) {} ~ModelContext() {} diff --git a/ge/ge_runtime/model_runner.cc b/ge/ge_runtime/model_runner.cc index 2c2efde4..9961ab4e 100644 --- a/ge/ge_runtime/model_runner.cc +++ b/ge/ge_runtime/model_runner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ namespace ge { namespace model_runner { + using RuntimeModelPtr = std::shared_ptr; using DavinciModelPtr = std::shared_ptr; diff --git a/ge/ge_runtime/output.cc b/ge/ge_runtime/output.cc index eec8d170..5153f688 100644 --- a/ge/ge_runtime/output.cc +++ b/ge/ge_runtime/output.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -76,7 +76,7 @@ bool Output::CopyRslt(OutputData *rslt, uint32_t data_begin, uint32_t &data_inde DataBuffer data_buf = rslt->blobs[data_begin + data_count]; bool ret = SetDataBuf(data_buf, data_begin, data_count, i, support_mem_share); if (!ret) { - GELOGE(FAILED, "Copy data to host failed. index: %lu, addr: %p", i, v_input_data_addr_[i]); + GELOGE(FAILED, "Copy data to host error. index: %lu, addr: %p", i, v_input_data_addr_[i]); return ret; } data_index = data_begin + data_count; @@ -89,5 +89,6 @@ bool Output::SetDataBuf(DataBuffer &data_buf, uint32_t data_begin, uint32_t &dat bool support_mem_share) { return true; } + } // namespace model_runner } // namespace ge diff --git a/ge/ge_runtime/output.h b/ge/ge_runtime/output.h index 13ea956d..1f7f91ee 100755 --- a/ge/ge_runtime/output.h +++ b/ge/ge_runtime/output.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ namespace ge { namespace model_runner { + class Output { public: Output(const OpInfoPtr &op_info, const std::shared_ptr &model); @@ -32,8 +33,7 @@ class Output { bool CopyRslt(OutputData *rslt, uint32_t data_begin, uint32_t &data_index, bool support_mem_share); - bool SetDataBuf(DataBuffer &data_buf, uint32_t data_begin, uint32_t &data_count, size_t i, - bool support_mem_share); + bool SetDataBuf(DataBuffer &data_buf, uint32_t data_begin, uint32_t &data_count, size_t i, bool support_mem_share); // Copy assignment operator and copy constructor are deleted Output &operator=(const Output &output) = delete; diff --git a/ge/ge_runtime/runtime_model.cc b/ge/ge_runtime/runtime_model.cc index 0b76cbaf..f0405056 100644 --- a/ge/ge_runtime/runtime_model.cc +++ b/ge/ge_runtime/runtime_model.cc @@ -74,8 +74,8 @@ bool RuntimeModel::InitStream(std::shared_ptr &davinci_model) { for (uint32_t i = 0; i < davinci_model->GetStreamNum(); ++i) { rtStream_t stream = nullptr; uint32_t flag = (force_copy_streams.find(i) != force_copy_streams.end()) - ? (RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY) - : (RT_STREAM_PERSISTENT); + ? (RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY) + : (RT_STREAM_PERSISTENT); rtError_t rt_ret = rtStreamCreateWithFlags(&stream, davinci_model->GetPriority(), flag); if (rt_ret != RT_ERROR_NONE) { @@ -115,23 +115,34 @@ bool RuntimeModel::InitEvent(uint32_t event_num) { return true; } -bool RuntimeModel::InitLabel(uint32_t batch_num) { - GELOGI("batch number:%u.", batch_num); - for (uint32_t i = 0; (batch_num != 0 && i <= batch_num); ++i) { - rtLabel_t rt_lLabel = nullptr; - rtError_t rt_ret = rtLabelCreate(&rt_lLabel); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api rtLabelCreate failed, i; %u; ret: 0x%X", i, rt_ret); - return false; +bool RuntimeModel::InitLabel(std::shared_ptr &davinci_model) { + GELOGI("batch number:%u.", davinci_model->GetBatchNum()); + label_list_.resize(davinci_model->GetBatchNum()); + for (auto &task_info : davinci_model->GetTaskInfoList()) { + if (task_info == nullptr) { + GELOGE(PARAM_INVALID, "task_info is null."); + continue; } - if (rt_lLabel == nullptr) { - GELOGE(RT_FAILED, "rtLabel is nullptr!"); + if (task_info->type() != TaskInfoType::LABEL_SET) { + continue; + } + auto label_set_task_info = std::static_pointer_cast(task_info); + + if (label_set_task_info->stream_id() >= stream_list_.size()) { + GELOGE(PARAM_INVALID, "Invalid stream id."); return false; } - label_list_.emplace_back(rt_lLabel); + rtLabel_t rt_label = nullptr; + rtError_t rt_ret = rtLabelCreateEx(&rt_label, stream_list_[label_set_task_info->stream_id()]); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api rtLabelCreate failed, ret: 0x%X", rt_ret); + return false; + } + label_list_[label_set_task_info->label_id()] = rt_label; } + return true; } @@ -163,7 +174,7 @@ bool RuntimeModel::InitResource(std::shared_ptr &davinci_model) { return false; } - if (!InitLabel(davinci_model->GetBatchNum())) { + if (!InitLabel(davinci_model)) { return false; } @@ -281,7 +292,6 @@ bool RuntimeModel::DistributeTask() { GELOGE(FAILED, "DistributeTask failed"); return false; } - return true; } @@ -293,10 +303,14 @@ bool RuntimeModel::Run() { return false; } - GELOGI("Run rtModelExecute success"); + GELOGI("Run rtModelExecute success, ret = 0x%X", ret); ret = rtStreamSynchronize(rt_model_stream_); if (ret != RT_ERROR_NONE) { + if (ret == RT_ERROR_END_OF_SEQUENCE) { + GELOGI("Model stream RT_ERROR_END_OF_SEQUENCE signal received, ret = 0x%X", ret); + return true; + } GELOGE(RT_FAILED, "Model stream sync failed, ret = 0x%X", ret); return false; } @@ -456,7 +470,7 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr &davinci_model } if (constant->output_tensors[0].size < constant->weight_data.size()) { - GELOGE(PARAM_INVALID, "Output size:%u is less than weight data size:%zu", constant->output_tensors[0].size, + GELOGE(PARAM_INVALID, "Output size:%u less than weight data size:%zu", constant->output_tensors[0].size, constant->weight_data.size()); return false; } @@ -471,11 +485,8 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr &davinci_model /// The logic of GetShapeSize is wrong, the scaler tensor's GetShapeSize is zero /// and that of unknown shape is zero too. /// Unknown shape will not appear here, so we can use zero judge a tensor is scaler or not. - int64_t elem_num = constant->weight_tensors[0].GetShapeSize(); - if (elem_num == 0 && constant->weight_tensors[0].size == 0) { - elem_num = 1; - } - + int64_t elem_num = + (constant->weight_tensors[0].GetShapeSize() == 0) ? 1 : constant->weight_tensors[0].GetShapeSize(); if (constant->weight_data.size() < sizeof(uint64_t)) { GELOGE(FAILED, "weight_data size is smaller than sizeof(uint64_t)"); return false; diff --git a/ge/ge_runtime/runtime_model.h b/ge/ge_runtime/runtime_model.h index 6109915f..d0c466d4 100644 --- a/ge/ge_runtime/runtime_model.h +++ b/ge/ge_runtime/runtime_model.h @@ -40,13 +40,11 @@ class RuntimeModel { const std::vector &GetTaskIdList() const; const std::vector &GetStreamIdList() const; const std::map> &GetRuntimeInfoMap() const { return runtime_info_map_; } - const rtModel_t GetModelHandle() const { return rt_model_handle_; } + rtModel_t GetModelHandle() const { return rt_model_handle_; } bool Run(); bool CopyInputData(const InputData &input_data); - bool GetInputOutputDescInfo(bool zero_copy, - std::vector *input_desc, - std::vector *output_desc, - std::vector *input_format, + bool GetInputOutputDescInfo(bool zero_copy, std::vector *input_desc, + std::vector *output_desc, std::vector *input_format, std::vector *output_format); private: @@ -55,7 +53,7 @@ class RuntimeModel { bool LoadTask(); bool InitStream(std::shared_ptr &davinci_model); bool InitEvent(uint32_t event_num); - bool InitLabel(uint32_t batch_num); + bool InitLabel(std::shared_ptr &davinci_model); bool InitDataInfo(std::shared_ptr &davinci_model); bool InitOutputInfo(std::shared_ptr &davinci_model); bool InitConstantInfo(std::shared_ptr &davinci_model); @@ -87,6 +85,7 @@ class RuntimeModel { std::vector stream_id_list_{}; std::map> runtime_info_map_; }; + } // namespace model_runner } // namespace ge diff --git a/ge/ge_runtime/task/aicpu_task.cc b/ge/ge_runtime/task/aicpu_task.cc index 61ef7a3c..5b3d8e82 100755 --- a/ge/ge_runtime/task/aicpu_task.cc +++ b/ge/ge_runtime/task/aicpu_task.cc @@ -26,6 +26,7 @@ AicpuTask::AicpuTask(const ModelContext &model_context, const std::shared_ptr(io_addrs.size()); auto io_addrs_size = static_cast(io_addrs_num * sizeof(void *)); constexpr uint32_t io_addr_offset = sizeof(aicpu::AicpuParamHead); - uint32_t node_def_addr_offset = io_addr_offset + io_addrs_size; - uint32_t args_size = - sizeof(aicpu::AicpuParamHead) + io_addrs_size + static_cast(task_info_->node_def().size()); - aicpu::AicpuParamHead aicpu_param_head = {args_size, io_addrs_num}; + uint32_t node_def_len_offset = io_addr_offset + io_addrs_size; + uint32_t node_def_addr_offset = node_def_len_offset + sizeof(uint32_t); + uint32_t args_size = sizeof(aicpu::AicpuParamHead) + io_addrs_size + + static_cast(task_info_->node_def().size()) + sizeof(uint32_t); + + aicpu::AicpuParamHead aicpu_param_head; + aicpu_param_head.length = args_size; + aicpu_param_head.ioAddrNum = io_addrs_num; + auto ext_info = task_info_->ext_info(); + uint32_t ext_size = ext_info.size(); + if (ext_info.empty()) { + aicpu_param_head.extInfoLength = 0; + aicpu_param_head.extInfoAddr = 0; + } else { + rtError_t flag = rtMalloc(&ext_info_, ext_size, RT_MEMORY_HBM); + if (flag != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api(rtMalloc) failed, ret: 0x%X.", flag); + return false; + } + + flag = rtMemcpy(ext_info_, ext_size, const_cast(reinterpret_cast(ext_info.data())), ext_size, + RT_MEMCPY_HOST_TO_DEVICE); + if (flag != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api(rtMemCpy) failed, ret: 0x%X.", flag); + return false; + } + + GELOGI("ext info size:", ext_size); + aicpu_param_head.extInfoLength = ext_size; + aicpu_param_head.extInfoAddr = reinterpret_cast(ext_info_); + } // Malloc device memory for args rtError_t rt_ret = rtMalloc(&args_, args_size, RT_MEMORY_HBM); @@ -80,6 +111,17 @@ bool AicpuTask::Distribute() { return false; } } + + // Memcpy node def + auto size = task_info_->node_def().size(); + rt_ret = + rtMemcpy(reinterpret_cast(reinterpret_cast(args_) + node_def_len_offset), sizeof(uint32_t), + reinterpret_cast(&size), sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X.", rt_ret); + return false; + } + // Memcpy node def rt_ret = rtMemcpy(reinterpret_cast(reinterpret_cast(args_) + node_def_addr_offset), task_info_->node_def().size(), reinterpret_cast(task_info_->node_def().data()), diff --git a/ge/ge_runtime/task/aicpu_task.h b/ge/ge_runtime/task/aicpu_task.h index cc21af8a..2d3c5040 100755 --- a/ge/ge_runtime/task/aicpu_task.h +++ b/ge/ge_runtime/task/aicpu_task.h @@ -41,6 +41,7 @@ class AicpuTask : public TaskRepeater { std::shared_ptr task_info_; void *stream_; void *args_; + void *ext_info_; void *input_output_addr_; }; } // namespace model_runner diff --git a/ge/ge_runtime/task/cce_task.cc b/ge/ge_runtime/task/cce_task.cc index 1c1807b5..04fd5610 100755 --- a/ge/ge_runtime/task/cce_task.cc +++ b/ge/ge_runtime/task/cce_task.cc @@ -103,9 +103,9 @@ bool CceTask::Distribute() { // Modify flowtable addr in args auto args = const_cast(task_info_->args().data()); auto task_offset = reinterpret_cast(const_cast(task_info_->args_offset().data())); + if (task_info_->args().size() < (task_offset[0] + sizeof(uint64_t))) { - GELOGE(FAILED, - "(context.args_offset().data()))[0]:%u + sizeof(uint64_t):%zu > kernelDef.args().size():%zu", + GELOGE(FAILED, "(context.args_offset().data()))[0]:%u + sizeof(uint64_t):%zu > kernelDef.args().size():%zu", static_cast(task_offset[0]), sizeof(uint64_t), task_info_->args().size()); return false; } @@ -136,8 +136,7 @@ bool CceTask::Distribute() { return false; } - rt_ret = rtMemcpy(sm_desc_, task_info_->sm_desc().size(), - task_info_->sm_desc().data(), + rt_ret = rtMemcpy(sm_desc_, task_info_->sm_desc().size(), task_info_->sm_desc().data(), task_info_->sm_desc().size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); @@ -146,12 +145,8 @@ bool CceTask::Distribute() { } // Kernel launch - rt_ret = rtKernelLaunch(stub_func_, - task_info_->block_dim(), - args_, - task_info_->args_size(), - static_cast(sm_desc_), - stream_); + rt_ret = rtKernelLaunch(stub_func_, task_info_->block_dim(), args_, task_info_->args_size(), + static_cast(sm_desc_), stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return false; diff --git a/ge/ge_runtime/task/event_record_task.h b/ge/ge_runtime/task/event_record_task.h index b9ae5dba..7c1d4f80 100755 --- a/ge/ge_runtime/task/event_record_task.h +++ b/ge/ge_runtime/task/event_record_task.h @@ -33,7 +33,7 @@ class EventRecordTask : public TaskRepeater { private: std::shared_ptr task_info_; rtStream_t stream_; - rtEvent_t event_; + rtEvent_t event_; }; } // namespace model_runner } // namespace ge diff --git a/ge/ge_runtime/task/event_wait_task.cc b/ge/ge_runtime/task/event_wait_task.cc index 5f1ffaad..558c2a59 100644 --- a/ge/ge_runtime/task/event_wait_task.cc +++ b/ge/ge_runtime/task/event_wait_task.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_runtime/task/event_wait_task.h b/ge/ge_runtime/task/event_wait_task.h index 685be897..9104bbf8 100755 --- a/ge/ge_runtime/task/event_wait_task.h +++ b/ge/ge_runtime/task/event_wait_task.h @@ -33,7 +33,7 @@ class EventWaitTask : public TaskRepeater { private: std::shared_ptr task_info_; rtStream_t stream_; - rtEvent_t event_; + rtEvent_t event_; }; } // namespace model_runner } // namespace ge diff --git a/ge/ge_runtime/task/hccl_task.cc b/ge/ge_runtime/task/hccl_task.cc index 771341c1..3d5f8504 100644 --- a/ge/ge_runtime/task/hccl_task.cc +++ b/ge/ge_runtime/task/hccl_task.cc @@ -115,7 +115,6 @@ bool HcclTask::Distribute() { rt_ret = rtModelBindStream(rt_model_handle_, stream, RT_HEAD_STREAM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - (void)rtStreamDestroy(stream); return false; } @@ -129,8 +128,6 @@ bool HcclTask::Distribute() { ge_task.type = static_cast(RT_MODEL_TASK_HCCL); ge_task.stream = stream_; - GETaskKernelHcclInfo kernel_hccl_info; - ge_task.kernelHcclInfo.emplace_back(kernel_hccl_info); ge_task.kernelHcclInfo[0].hccl_type = task_info_->hccl_type(); ge_task.kernelHcclInfo[0].inputDataAddr = task_info_->input_data_addr(); ge_task.kernelHcclInfo[0].outputDataAddr = task_info_->output_data_addr(); diff --git a/ge/ge_runtime/task/label_goto_task.cc b/ge/ge_runtime/task/label_goto_task.cc new file mode 100644 index 00000000..d357accb --- /dev/null +++ b/ge/ge_runtime/task/label_goto_task.cc @@ -0,0 +1,70 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ge_runtime/task/label_goto_task.h" +#include "ge_runtime/task/task_factory.h" + +namespace ge { +namespace model_runner { +LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr &task_info) + : TaskRepeater(model_context, task_info), + task_info_(task_info), + stream_(nullptr), + label_(nullptr) { + if (task_info_ == nullptr) { + GELOGW("task_info_ is null!"); + return; + } + auto stream_list = model_context.stream_list(); + auto label_list = model_context.label_list(); + uint32_t stream_id = task_info->stream_id(); + uint32_t label_id = task_info->label_id(); + GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); + GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id); + if (stream_id >= stream_list.size() || label_id >= label_list.size()) { + GELOGW("Stream/Label id invalid."); + return; + } + stream_ = stream_list[stream_id]; + label_ = label_list[label_id]; +} + +LabelGotoTask::~LabelGotoTask() {} + +bool LabelGotoTask::Distribute() { + GELOGI("LabelGotoTask Distribute start."); + if (stream_ == nullptr) { + GELOGE(PARAM_INVALID, "stream is null!"); + return false; + } + if (label_ == nullptr) { + GELOGE(PARAM_INVALID, "label is null!"); + return false; + } + rtError_t rt_ret = rtLabelGotoEx(label_, stream_); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return false; + } + + GELOGI("DistributeTask end."); + return true; +} + +REGISTER_TASK(TaskInfoType::LABEL_GOTO, LabelGotoTask, LabelGotoTaskInfo); + +} // namespace model_runner +} // namespace ge diff --git a/inc/common/util/compress/compress_weight.h b/ge/ge_runtime/task/label_goto_task.h similarity index 50% rename from inc/common/util/compress/compress_weight.h rename to ge/ge_runtime/task/label_goto_task.h index 34ea47d1..4fd6d1bc 100644 --- a/inc/common/util/compress/compress_weight.h +++ b/ge/ge_runtime/task/label_goto_task.h @@ -14,20 +14,28 @@ * limitations under the License. */ -#ifndef COMPRESS_WEIGHT_H -#define COMPRESS_WEIGHT_H +#ifndef GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ +#define GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ -#include "compress.h" +#include +#include "ge_runtime/task/task.h" -const int SHAPE_SIZE_WEIGHT = 4; +namespace ge { +namespace model_runner { +class LabelGotoTask : public TaskRepeater { + public: + LabelGotoTask(const ModelContext &model_context, const std::shared_ptr &task_info); -struct CompressOpConfig { - int64_t wShape[SHAPE_SIZE_WEIGHT]; - size_t compressTilingK; - size_t compressTilingN; - struct CompressConfig compressConfig; + ~LabelGotoTask() override; + + bool Distribute() override; + + private: + std::shared_ptr task_info_; + void *stream_; + void *label_; }; +} // namespace model_runner +} // namespace ge -extern "C" CmpStatus CompressWeightsConv2D(const char *const input, char *const zipBuffer, char *const infoBuffer, - CompressOpConfig *const param); -#endif // COMPRESS_WEIGHT_H +#endif // GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ diff --git a/ge/ge_runtime/task/label_set_task.cc b/ge/ge_runtime/task/label_set_task.cc new file mode 100644 index 00000000..3ab5802c --- /dev/null +++ b/ge/ge_runtime/task/label_set_task.cc @@ -0,0 +1,70 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ge_runtime/task/label_set_task.h" +#include "ge_runtime/task/task_factory.h" + +namespace ge { +namespace model_runner { +LabelSetTask::LabelSetTask(const ModelContext &model_context, const std::shared_ptr &task_info) + : TaskRepeater(model_context, task_info), + task_info_(task_info), + stream_(nullptr), + label_(nullptr) { + if (task_info_ == nullptr) { + GELOGW("task_info_ is null!"); + return; + } + auto stream_list = model_context.stream_list(); + auto label_list = model_context.label_list(); + uint32_t stream_id = task_info->stream_id(); + uint32_t label_id = task_info->label_id(); + GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); + GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id); + if (stream_id >= stream_list.size() || label_id >= label_list.size()) { + GELOGW("Stream/Label id invalid."); + return; + } + stream_ = stream_list[stream_id]; + label_ = label_list[label_id]; +} + +LabelSetTask::~LabelSetTask() {} + +bool LabelSetTask::Distribute() { + GELOGI("LabelSetTask Distribute start."); + if (stream_ == nullptr) { + GELOGE(PARAM_INVALID, "stream is null!"); + return false; + } + if (label_ == nullptr) { + GELOGE(PARAM_INVALID, "label is null!"); + return false; + } + rtError_t rt_ret = rtLabelSet(label_, stream_); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return false; + } + + GELOGI("DistributeTask end."); + return true; +} + +REGISTER_TASK(TaskInfoType::LABEL_SET, LabelSetTask, LabelSetTaskInfo); + +} // namespace model_runner +} // namespace ge diff --git a/inc/common/optimizer/graph_optimizer_types.h b/ge/ge_runtime/task/label_set_task.h similarity index 53% rename from inc/common/optimizer/graph_optimizer_types.h rename to ge/ge_runtime/task/label_set_task.h index 9e1ec96b..70bf1584 100644 --- a/inc/common/optimizer/graph_optimizer_types.h +++ b/ge/ge_runtime/task/label_set_task.h @@ -14,21 +14,28 @@ * limitations under the License. */ -#ifndef INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_TYPES_H_ -#define INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_TYPES_H_ +#ifndef GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_ +#define GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_ + +#include +#include "ge_runtime/task/task.h" -#include -#include namespace ge { -enum OPTIMIZER_SCOPE { - UNIT = 0, - ENGINE, -}; +namespace model_runner { +class LabelSetTask : public TaskRepeater { + public: + LabelSetTask(const ModelContext &model_context, const std::shared_ptr &task_info); + + ~LabelSetTask() override; + + bool Distribute() override; -struct GraphOptimizerAttribute { - std::string engineName; - OPTIMIZER_SCOPE scope; + private: + std::shared_ptr task_info_; + void *stream_; + void *label_; }; +} // namespace model_runner } // namespace ge -#endif // INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_TYPES_H_ +#endif // GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_ diff --git a/ge/ge_runtime/task/label_switch_task.cc b/ge/ge_runtime/task/label_switch_task.cc new file mode 100644 index 00000000..a3c2d41a --- /dev/null +++ b/ge/ge_runtime/task/label_switch_task.cc @@ -0,0 +1,131 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ge_runtime/task/label_switch_task.h" +#include "ge_runtime/task/task_factory.h" + +namespace ge { +namespace model_runner { +LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context, + const std::shared_ptr &task_info) + : TaskRepeater(model_context, task_info), + task_info_(task_info), + stream_(nullptr), + all_label_resource_(), + label_info_(nullptr) { + if (task_info_ == nullptr) { + GELOGW("task_info_ is null!"); + return; + } + + all_label_resource_ = model_context.label_list(); + auto stream_list = model_context.stream_list(); + uint32_t stream_id = task_info->stream_id(); + GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); + if (stream_id >= stream_list.size()) { + GELOGW("Stream id invalid."); + return; + } + stream_ = stream_list[stream_id]; +} + +LabelSwitchTask::~LabelSwitchTask() { + if (label_info_ != nullptr) { + rtError_t rt_ret = rtFree(label_info_); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "rtFree fwkOpBuf failed! ret: 0x%X.", rt_ret); + } + label_info_ = nullptr; + } +} + +bool LabelSwitchTask::Distribute() { + GELOGI("LabelSwitchTask Distribute start."); + if (!CheckParamValid()) { + return false; + } + + const std::vector &label_index_list = task_info_->label_list(); + std::vector label_list(task_info_->label_size(), nullptr); + + for (size_t i = 0; i < task_info_->label_size(); ++i) { + uint32_t label_index = label_index_list[i]; + if (label_index >= all_label_resource_.size()) { + GELOGE(PARAM_INVALID, "label %zu index is %u, but there are %zu labels in total.", i, label_index, + all_label_resource_.size()); + return false; + } + label_list[i] = all_label_resource_[label_index]; + GELOGI("Case %zu: label id %zu.", i, label_index); + } + + uint32_t label_info_size = sizeof(rtLabelDevInfo) * task_info_->label_size(); + rtError_t rt_ret = rtMalloc(&label_info_, label_info_size, RT_MEMORY_HBM); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return false; + } + + rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info_, label_info_size); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return false; + } + + rt_ret = rtLabelSwitchByIndex(task_info_->cond(), label_list.size(), label_info_, stream_); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return false; + } + + GELOGI("DistributeTask end."); + return true; +} + +bool LabelSwitchTask::CheckParamValid() { + if (stream_ == nullptr) { + GELOGE(PARAM_INVALID, "stream is null!"); + return false; + } + + if (task_info_->label_list().empty()) { + GELOGE(PARAM_INVALID, "label_list is empty."); + return false; + } + + if (task_info_->label_size() != task_info_->label_list().size()) { + GELOGE(PARAM_INVALID, "label_list size %zu but label_size is %u.", task_info_->label_list().size(), + task_info_->label_size()); + return false; + } + + if (task_info_->label_size() >= UINT32_MAX / sizeof(rtLabelDevInfo)) { + GELOGE(PARAM_INVALID, "label_size %u will overflow.", task_info_->label_size()); + return false; + } + + if (label_info_ != nullptr) { + GELOGE(PARAM_INVALID, "label_info_ has dirty data."); + return false; + } + + return true; +} + +REGISTER_TASK(TaskInfoType::LABEL_SWITCH, LabelSwitchTask, LabelSwitchTaskInfo); + +} // namespace model_runner +} // namespace ge diff --git a/ge/ge_runtime/task/label_switch_task.h b/ge/ge_runtime/task/label_switch_task.h new file mode 100644 index 00000000..463faa31 --- /dev/null +++ b/ge/ge_runtime/task/label_switch_task.h @@ -0,0 +1,44 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_ +#define GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_ + +#include +#include "ge_runtime/task/task.h" + +namespace ge { +namespace model_runner { +class LabelSwitchTask : public TaskRepeater { + public: + LabelSwitchTask(const ModelContext &model_context, const std::shared_ptr &task_info); + + ~LabelSwitchTask() override; + + bool Distribute() override; + + private: + bool CheckParamValid(); + + std::shared_ptr task_info_; + void *stream_; + std::vector all_label_resource_; + void *label_info_; +}; +} // namespace model_runner +} // namespace ge + +#endif // GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_ diff --git a/ge/ge_runtime/task/stream_switch_task.cc b/ge/ge_runtime/task/stream_switch_task.cc index 91141139..2adcb4bd 100644 --- a/ge/ge_runtime/task/stream_switch_task.cc +++ b/ge/ge_runtime/task/stream_switch_task.cc @@ -51,7 +51,7 @@ bool StreamSwitchTask::Distribute() { } if (static_cast(task_info_->true_stream_id()) >= stream_list_.size()) { - GELOGE(PARAM_INVALID, "true_stream_id %ld must be less than stream_list_ size %zu!", task_info_->true_stream_id(), + GELOGE(PARAM_INVALID, "true_stream_id %ld must less than stream_list_ size %zu!", task_info_->true_stream_id(), stream_list_.size()); return false; } diff --git a/ge/ge_runtime/task/stream_switch_task.h b/ge/ge_runtime/task/stream_switch_task.h index 2caad200..81c12507 100755 --- a/ge/ge_runtime/task/stream_switch_task.h +++ b/ge/ge_runtime/task/stream_switch_task.h @@ -37,6 +37,7 @@ class StreamSwitchTask : public TaskRepeater { void *stream_; std::vector stream_list_; }; + } // namespace model_runner } // namespace ge #endif // GE_GE_RUNTIME_TASK_STREAM_SWITCH_TASK_H_ diff --git a/ge/ge_runtime/task/task.h b/ge/ge_runtime/task/task.h index b8a937b7..6c4df248 100755 --- a/ge/ge_runtime/task/task.h +++ b/ge/ge_runtime/task/task.h @@ -42,7 +42,7 @@ class Task { template class TaskRepeater : public Task { - static_assert(std::is_base_of(), "Wrong TaskInfo Type!"); /*lint !e30*/ + static_assert(std::is_base_of(), "Wrong TaskInfo Type!"); public: TaskRepeater(const ModelContext &model_context, std::shared_ptr task_info) {} diff --git a/ge/ge_runtime/task/task_factory.h b/ge/ge_runtime/task/task_factory.h index 29da1388..670d1fef 100644 --- a/ge/ge_runtime/task/task_factory.h +++ b/ge/ge_runtime/task/task_factory.h @@ -81,6 +81,7 @@ class TaskFactory { std::shared_ptr concrete_task_info = std::static_pointer_cast(task_info); \ return std::make_shared(model_context, concrete_task_info); \ }); + } // namespace model_runner } // namespace ge #endif // GE_GE_RUNTIME_TASK_TASK_FACTORY_H_ diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 1f91ae08..6f5c1fb4 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -222,7 +222,7 @@ static void GetOpsProtoPath(string &opsproto_path) { class GeGenerator::Impl { public: - Impl(OmgContext &omg_context) : omg_context_(omg_context), graph_manager_(omg_context) {} + Impl(OmgContext &omg_context) : omg_context_(omg_context) {} ~Impl() = default; Status BuildModel(const Graph &graph, const vector &inputs, GeRootModelPtr &ge_models); @@ -683,7 +683,7 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector static std::atomic atomic_graph_id(0); auto graph_id = atomic_graph_id.fetch_add(1); const std::map options; - Status ret = graph_manager_.AddGraph(graph_id, graph, options); + Status ret = graph_manager_.AddGraph(graph_id, graph, options, omg_context_); if (ret != SUCCESS) { GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph fail, graph id: %u", graph_id); (void)graph_manager_.Finalize(); @@ -716,7 +716,7 @@ Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph) { static std::atomic atomic_graph_id(0); auto graph_id = atomic_graph_id.fetch_add(1); const std::map options; - Status ret = graph_manager_.AddGraph(graph_id, graph, options); + Status ret = graph_manager_.AddGraph(graph_id, graph, options, omg_context_); if (ret != SUCCESS) { GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph failed, graph id: %u", graph_id); (void)graph_manager_.Finalize(); diff --git a/ge/generator/generator_api.cc b/ge/generator/generator_api.cc index 675b8811..0f003e90 100644 --- a/ge/generator/generator_api.cc +++ b/ge/generator/generator_api.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "generator/generator_api.h" #include "common/ge/ge_util.h" #include "common/util.h" diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index 670e929d..5653fe32 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -370,6 +370,11 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr GELOGE(INTERNAL_ERROR, "Get memory size fail."); return INTERNAL_ERROR; } + int64_t p2p_memory_size = 0; + if (!AttrUtils::GetInt(model_ptr, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_memory_size)) { + GELOGE(INTERNAL_ERROR, "Get p2p memory size fail."); + return INTERNAL_ERROR; + } int64_t weight_size = 0; if (!AttrUtils::GetInt(model_ptr, ATTR_MODEL_WEIGHT_SIZE, weight_size)) { GELOGE(INTERNAL_ERROR, "Get weight memory size fail."); @@ -380,11 +385,21 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr auto *get_mem_base = reinterpret_cast(reinterpret_cast(var_manager->GetVarMemMaxSize())); uint8_t *get_weight_mem_base = get_mem_base; if (weight_size > 0) { - get_weight_mem_base = get_mem_base + memory_size; + get_weight_mem_base = get_mem_base + memory_size + p2p_memory_size; } - + std::map mem_type_to_data_mem_base; + mem_type_to_data_mem_base[RT_MEMORY_HBM] = get_mem_base; + if (p2p_memory_size == 0) { + mem_type_to_data_mem_base[RT_MEMORY_P2P_DDR] = nullptr; + } else { + mem_type_to_data_mem_base[RT_MEMORY_P2P_DDR] = get_mem_base + memory_size; + } + std::map mem_type_to_data_mem_size; + mem_type_to_data_mem_size[RT_MEMORY_HBM] = memory_size; + mem_type_to_data_mem_size[RT_MEMORY_P2P_DDR] = p2p_memory_size; RunContextUtil run_context; - Status ret = run_context.InitMemInfo(get_mem_base, memory_size, get_weight_mem_base, weight_size); + Status ret = run_context.InitMemInfo(get_mem_base, memory_size, mem_type_to_data_mem_base, mem_type_to_data_mem_size, + get_weight_mem_base, weight_size); if (ret != SUCCESS) { GELOGE(ret, "task_generator init mem info fail."); return ret; diff --git a/ge/graph/build/graph_builder.h b/ge/graph/build/graph_builder.h index 329f3ebc..a70a5464 100644 --- a/ge/graph/build/graph_builder.h +++ b/ge/graph/build/graph_builder.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/label_allocator.cc b/ge/graph/build/label_allocator.cc index 0f3eff16..f8fbe28b 100644 --- a/ge/graph/build/label_allocator.cc +++ b/ge/graph/build/label_allocator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/label_allocator.h b/ge/graph/build/label_allocator.h index 7c7b2f00..01811e1d 100644 --- a/ge/graph/build/label_allocator.h +++ b/ge/graph/build/label_allocator.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/logical_stream_allocator.cc b/ge/graph/build/logical_stream_allocator.cc index 5b8ce824..9dff5fc4 100644 --- a/ge/graph/build/logical_stream_allocator.cc +++ b/ge/graph/build/logical_stream_allocator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/logical_stream_allocator.h b/ge/graph/build/logical_stream_allocator.h index e09d7cd6..280a4104 100644 --- a/ge/graph/build/logical_stream_allocator.h +++ b/ge/graph/build/logical_stream_allocator.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/binary_block_mem_assigner.cc b/ge/graph/build/memory/binary_block_mem_assigner.cc index 61dd3462..8668e81e 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.cc +++ b/ge/graph/build/memory/binary_block_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/build/memory/binary_block_mem_assigner.h" #include #include "framework/common/debug/ge_log.h" diff --git a/ge/graph/build/memory/binary_block_mem_assigner.h b/ge/graph/build/memory/binary_block_mem_assigner.h index 96a31aac..de6cae0d 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.h +++ b/ge/graph/build/memory/binary_block_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 64d5aa95..c44625c9 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -399,13 +399,14 @@ string MemoryBlock::String() { for (const auto& symbol : SymbolList()) { ss << "__symbol: " << symbol << " "; } + ss << "memory_type: " << memory_type_ << " "; return ss.str(); } BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map &anchor_to_symbol, const map> &symbol_to_anchors) - : mem_offset_(0), compute_graph_(std::move(compute_graph)), symbol_to_anchors_(symbol_to_anchors), - anchor_to_symbol_(anchor_to_symbol), life_time_(0) {} + : mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)), + symbol_to_anchors_(symbol_to_anchors), anchor_to_symbol_(anchor_to_symbol), life_time_(0) {} BlockMemAssigner::~BlockMemAssigner() { GELOGD("blocks_store_ size : %lu", blocks_store_.size()); @@ -503,6 +504,7 @@ bool IsDirectOutputNode(const NodePtr &node, int idx) { void AddReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts) { string key = std::to_string(mem_block.Size()); key += "_" + std::to_string(mem_block.stream_id_); + key += "_" + std::to_string(mem_block.memory_type_); auto it = reusable_block_counts.find(key); if (it != reusable_block_counts.end()) { it->second++; @@ -514,6 +516,7 @@ void AddReusableBlockCount(const MemoryBlock &mem_block, map & void ReduceReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts) { string key = std::to_string(mem_block.Size()); key += "_" + std::to_string(mem_block.stream_id_); + key += "_" + std::to_string(mem_block.memory_type_); auto it = reusable_block_counts.find(key); if (it != reusable_block_counts.end()) { if (it->second > 0) { @@ -530,6 +533,7 @@ bool CanReuseBySize(const map &reusable_block_counts, const Me } else { string key = std::to_string(reusable_block.Size()); key += "_" + std::to_string(reusable_block.stream_id_); + key += "_" + std::to_string(reusable_block.memory_type_); auto it = reusable_block_counts.find(key); GE_IF_BOOL_EXEC((it != reusable_block_counts.end() && (it->second > kReuseMaxCount)) && (reusable_block.Size() > block_size), @@ -610,6 +614,18 @@ void BlockMemAssigner::InitReuseFlag() { std::string symbol = pair.first; bool pre_reuse_flag = true; bool post_reuse_flag = true; + // default memory type + int64_t mem_type = RT_MEMORY_HBM; + GetSymbolMemType(pair.second, mem_type); + GELOGD("The memory type of symbol[%s] is [%ld]].", symbol.c_str(), mem_type); + if (mem_type != RT_MEMORY_HBM) { + UpdateOpTensorMemType(pair.second, mem_type); + } + // Only the memory with special requirements is processed. The HBM uses the default processing mode. + if (mem_type != RT_MEMORY_HBM) { + symbol_to_mem_type_[symbol] = mem_type; + } + for (const auto &node_index_io : pair.second) { if (node_index_io.io_type_ == kIn) { continue; @@ -725,6 +741,66 @@ void BlockMemAssigner::PrintSymbolMap() { } } +void BlockMemAssigner::GetSymbolMemType(std::list node_index_io_list, int64_t &memory_type) { + memory_type = RT_MEMORY_HBM; + vector memory_types; + for (auto &node_index_io : node_index_io_list) { + auto op_desc = node_index_io.node_->GetOpDesc(); + if (op_desc == nullptr) { + GELOGW("Node[%s] op desc is null.", node_index_io.node_->GetName().c_str()); + return; + } + + if (node_index_io.io_type_ == kIn) { + vector input_memory_types; + (void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_types); + if (!input_memory_types.empty() && node_index_io.index_ < input_memory_types.size()) { + int64_t input_memory_type = input_memory_types[node_index_io.index_]; + GELOGD("Node[%s]: the memory type of input index [%u] is [%ld]].", op_desc->GetName().c_str(), + node_index_io.index_, input_memory_type); + memory_types.emplace_back(input_memory_type); + } + } + if (node_index_io.io_type_ == kOut) { + vector output_memory_types; + (void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, output_memory_types); + if (!output_memory_types.empty() && node_index_io.index_ < output_memory_types.size()) { + int64_t output_memory_type = output_memory_types[node_index_io.index_]; + GELOGD("Node[%s]: the memory type of output index [%u] is [%ld]].", op_desc->GetName().c_str(), + node_index_io.index_, output_memory_type); + memory_types.emplace_back(output_memory_type); + } + } + } + + // memory priority + for (auto node_memory_type : memory_types) { + if (node_memory_type > memory_type) { + memory_type = node_memory_type; + } + } +} + +void BlockMemAssigner::UpdateOpTensorMemType(std::list node_index_io_list, int64_t memory_type) { + for (auto &node_index_io : node_index_io_list) { + auto op_desc = node_index_io.node_->GetOpDesc(); + if (op_desc == nullptr) { + GELOGW("Node[%s] op desc is null.", node_index_io.node_->GetName().c_str()); + return; + } + + if (node_index_io.io_type_ == kIn) { + auto input_desc = op_desc->GetInputDesc(node_index_io.index_); + (void) AttrUtils::SetInt(input_desc, ATTR_NAME_TENSOR_MEM_TYPE, memory_type); + } + + if (node_index_io.io_type_ == kOut) { + auto output_desc = op_desc->GetOutputDesc(node_index_io.index_); + (void) AttrUtils::SetInt(output_desc, ATTR_NAME_TENSOR_MEM_TYPE, memory_type); + } + } +} + bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) { if (n == nullptr) { GELOGE(FAILED, "Node is null."); @@ -771,9 +847,9 @@ bool BlockMemAssigner::IsZeroCopyBlock(const NodePtr &node, bool continuous) { } MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, - MemoryType mem_type, const NodePtr &n, uint32_t out_index, + OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, const vector &workspace_reuse_flag, const bool is_op_reuse_mem, - const bool continuous) { + const bool continuous, int64_t memory_type) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "Input parameter n is null."); auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr); @@ -789,8 +865,9 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, is_op_reuse_mem && (IsPreReuse(n, out_index)); auto stream_id = node_op_desc->GetStreamId(); - if (is_reuse_memory && !continuous) { - for (auto it = reusable_blocks_[stream_id].begin(); it != reusable_blocks_[stream_id].end(); ++it) { + if (is_reuse_memory && !continuous && !reusable_blocks_[memory_type].empty()) { + for (auto it = reusable_blocks_[memory_type][stream_id].begin(); + it != reusable_blocks_[memory_type][stream_id].end(); ++it) { MemoryBlock *reusable_block = *it; if (!IsPostReuse(reusable_block)) { reusable_block->reuse_mem_ = false; @@ -810,14 +887,14 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, reusable_block->continuous_block_ = continuous; reusable_block->ref_count_++; ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); - reusable_blocks_[stream_id].erase(it); + reusable_blocks_[memory_type][stream_id].erase(it); return reusable_block; } } } } - auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory); + auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory, memory_type); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "new an object failed."); // Data and netoutput need zero copy block @@ -847,11 +924,13 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); MemoryBlock *block = nullptr; int64_t total_size = 0; + int64_t memory_type = RT_MEMORY_HBM; for (uint32_t index = 0; index < static_cast(node_op_desc->GetOutputsSize()); index++) { auto output_op_desc = node_op_desc->GetOutputDescPtr(index); if (output_op_desc == nullptr) { return nullptr; } + int64_t size = 0; if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { GELOGI("Get size failed"); @@ -865,6 +944,18 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec if (index != 0) { zero_memory_list_.emplace_back(n, kOutput, index); } + + if (index == 0) { + NodeIndexIO node_index_io(n, index, kOut); + auto iter = anchor_to_symbol_.find(node_index_io.ToString()); + if (iter != anchor_to_symbol_.end()) { + string symbol = iter->second; + if (symbol_to_mem_type_.find(symbol) != symbol_to_mem_type_.end()) { + memory_type = symbol_to_mem_type_[symbol]; + GELOGD("Continuous out memory symbol is [%s], memory type is [%ld]", symbol.c_str(), memory_type); + } + } + } } auto block_size = GetBlockSize(total_size, ranges); @@ -872,7 +963,8 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec total_size, block_size); vector workspace_reuse_flag; - block = ApplyMemory(block_size, total_size, total_size, kOutput, n, 0, workspace_reuse_flag, is_op_reuse_mem, true); + block = ApplyMemory(block_size, total_size, total_size, kOutput, n, 0, workspace_reuse_flag, is_op_reuse_mem, true, + memory_type); if (block != nullptr) { // hccl task need align header and tail block->first_continuous_block_ = true; @@ -904,17 +996,23 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, block->ref_count_++; } else { int64_t max_size = size; + int64_t memory_type = RT_MEMORY_HBM; auto iter1 = anchor_to_symbol_.find(node_index_io.ToString()); if (iter1 != anchor_to_symbol_.end()) { auto iter2 = symbol_size_.find(iter1->second); if (iter2 != symbol_size_.end()) { max_size = iter2->second; } + auto iter3 = symbol_to_mem_type_.find(iter1->second); + if (iter3 != symbol_to_mem_type_.end()) { + memory_type = iter3->second; + } } + auto block_size = GetBlockSize(max_size, ranges); vector workspace_reuse_flag; block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index, - workspace_reuse_flag, is_op_reuse_mem, continuous); + workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); } GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr."); int out_count_reuse_input = block->ref_count_; @@ -1129,7 +1227,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector (void)ge::AttrUtils::GetBool(op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); // Allocate memory for the current node and release node memory of the same size in the workspace GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", - ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_[stream_id])); + for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); + ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); if (IsContinuousOutput(node)) { (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); return SUCCESS; @@ -1167,6 +1266,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector if (need_change) { is_op_reuse_mem_ = false; } + MemoryBlock *mem_block = ApplyOutMemory(node, i, ranges, is_op_reuse_mem_, out_node_set_continuous_input); if (mem_block != nullptr) { node_out_blocks_[node->GetName()].emplace_back(mem_block); @@ -1206,46 +1306,54 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { if (AssignOutputMemoryWithReuse(n, ranges) != SUCCESS) { return; } - - stream_workspace_blocks_[stream_id].clear(); + for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); ++iter) { + iter->second[stream_id].clear(); + } vector temp; GetNodeWorkSpaceSize(n, temp); vector workspace_bytes; - vector workspace_memory_type; - bool has_workspace_mem_type_attr = - ge::AttrUtils::GetListInt(node_op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type); + vector tvm_workspace_memory_type; + bool has_tvm_workspace_mem_type_attr = + ge::AttrUtils::GetListInt(node_op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, tvm_workspace_memory_type); vector workspace_reuse_flag; GE_IF_BOOL_EXEC(!ge::AttrUtils::GetListBool(node_op_desc, kAttrNameWorkspaceReuseFlag, workspace_reuse_flag), GELOGD("OP %s get workspace_reuse_flag attr failed", node_op_desc->GetName().c_str())); GELOGI("Assign memory node[%s], size [temp:%zu, memory type size:%zu]", node_op_desc->GetName().c_str(), - temp.size(), workspace_memory_type.size()); + temp.size(), tvm_workspace_memory_type.size()); - if (has_workspace_mem_type_attr && (temp.size() != workspace_memory_type.size())) { - GELOGE(INTERNAL_ERROR, "fusion: node[%s], workspace_memory size err![v_temp:%zu, workspace:%zu]", - n->GetName().c_str(), temp.size(), workspace_memory_type.size()); + if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) { + GELOGE(INTERNAL_ERROR, "fusion: node[%s], tvm workspace memory size error![v_temp:%zu, workspace:%zu]", + n->GetName().c_str(), temp.size(), tvm_workspace_memory_type.size()); return; } for (size_t i = 0; i < temp.size(); i++) { // fusion: other type's size not means malloc HBM memory bool workspace_skip_flag = false; - if (has_workspace_mem_type_attr && workspace_memory_type[i] == RT_MEMORY_L1) { + if (has_tvm_workspace_mem_type_attr && tvm_workspace_memory_type[i] == RT_MEMORY_L1) { GELOGI( - "fusion: node[%s]workspace index[%d] is not hbm type, add to zero_memory_list, workspace memory type [%ld]", - node_op_desc->GetName().c_str(), i, workspace_memory_type[i]); + "fusion: node[%s]workspace index[%d] is not hbm type, add to zero_memory_list, workspace memory type [%ld]", + node_op_desc->GetName().c_str(), i, tvm_workspace_memory_type[i]); workspace_skip_flag = true; } if (temp[i] == 0 || workspace_skip_flag) { zero_memory_list_.emplace_back(n, kWorkspace, static_cast(i), false); continue; } + int64_t memory_type = RT_MEMORY_HBM; + if (!GetWorkSpaceMemoryType(n, i, memory_type)) { + GELOGW("Get workspace memory type failed."); + return; + } MemoryBlock *mem_block = ApplyMemory(GetBlockSize(static_cast(temp[i]), ranges), static_cast(temp[i]), static_cast(temp[i]), kWorkspace, n, static_cast(i), workspace_reuse_flag, - is_op_reuse_mem_, false); + is_op_reuse_mem_, false, memory_type); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block."); - CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block); + CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block, memory_type); + } + for (auto it = reusable_blocks_.begin(); it != reusable_blocks_.end(); ++it) { + ReleaseInputNodeOutMemory(node_out_blocks_, it->second[stream_id], n); } - ReleaseInputNodeOutMemory(node_out_blocks_, reusable_blocks_[stream_id], n); } GELOGD("Assigned memory blocks:"); @@ -1268,11 +1376,11 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { } void BlockMemAssigner::CheckWorkspaceReuse(const vector &workspace_reuse_flag, uint32_t index, int64_t stream_id, - MemoryBlock *mem_block) { + MemoryBlock *mem_block, int64_t memory_type) { bool reuse_mem_flag = - ((workspace_reuse_flag.size() > index) && (workspace_reuse_flag[index] == false)) ? false : true; + ((workspace_reuse_flag.size() > index) && (workspace_reuse_flag[index] == false)) ? false : true; if (reuse_mem_flag) { - stream_workspace_blocks_[stream_id].emplace_back(mem_block); + stream_workspace_blocks_[memory_type][stream_id].emplace_back(mem_block); } } @@ -1481,16 +1589,28 @@ void BlockMemAssigner::ResizeMemoryBlocks() { if (memory_block == nullptr || memory_block->deleted_block_ || memory_block->is_zero_copy_) { continue; } - if (memory_block->first_continuous_block_) { - mem_offset_ += MEM_ALIGN_SIZE; - } + if (memory_block->memory_type_ == RT_MEMORY_HBM) { + if (memory_block->first_continuous_block_) { + mem_offset_ += MEM_ALIGN_SIZE; + } + + memory_block->Resize(); + memory_block->SetHeadOffset(mem_offset_); + mem_offset_ += memory_block->Size(); + memory_block->SetTailOffset(mem_offset_ - 1); + } else { + if (memory_block->first_continuous_block_) { + p2p_mem_offset_ += MEM_ALIGN_SIZE; + } - memory_block->Resize(); - memory_block->SetHeadOffset(mem_offset_); - mem_offset_ += memory_block->Size(); - memory_block->SetTailOffset(mem_offset_ - 1); + memory_block->Resize(); + memory_block->SetHeadOffset(p2p_mem_offset_); + p2p_mem_offset_ += memory_block->Size(); + memory_block->SetTailOffset(p2p_mem_offset_ - 1); + } } - GELOGI("mem_offset_ exclude zero_copy_memory is %zu.", mem_offset_); + GELOGI("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu.", + mem_offset_, p2p_mem_offset_); } /// @@ -1620,4 +1740,19 @@ bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || (node_type == HVDCALLBACKBROADCAST); } + +bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { + memory_type = RT_MEMORY_HBM; + vector workspace_memory_type; + auto op_desc = node->GetOpDesc(); + bool has_workspace_mem_type_attr = + ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type); + if (has_workspace_mem_type_attr && (workspace_memory_type.size() <= index)) { + GELOGE(INTERNAL_ERROR, "node[%s], workspace_memory size error![index:%zu, workspace:%zu]", + node->GetName().c_str(), index, workspace_memory_type.size()); + return false; + } + memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; + return true; +} } // namespace ge diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index d1a5e69d..14b91b93 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,14 +36,14 @@ const size_t kMaxLifeTime = 0xffffffff; using DependStreamLife = std::map>; -enum MemoryType { kOutput, kWorkspace }; +enum OpMemoryType { kOutput, kWorkspace }; struct NodeTypeIndex { - NodeTypeIndex(ge::NodePtr node, MemoryType mem_type, uint32_t index, bool ref_input = false) + NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false) : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {} ge::NodePtr node = nullptr; - MemoryType mem_type = kOutput; + OpMemoryType mem_type = kOutput; uint32_t index = 0; size_t life_time_end = kMaxLifeTime; bool ref_input = false; @@ -59,7 +59,8 @@ struct NodeTypeIndex { class MemoryBlock { public: - explicit MemoryBlock(size_t block_size, int64_t stream_id = 0, bool reuse_mem = true) + explicit MemoryBlock(size_t block_size, int64_t stream_id = 0, bool reuse_mem = true, + int64_t memory_type = RT_MEMORY_HBM) : ref_count_(0), stream_id_(stream_id), deleted_block_(false), @@ -69,6 +70,7 @@ class MemoryBlock { first_continuous_block_(false), last_continuous_block_(false), is_zero_copy_(false), + memory_type_(memory_type), block_size_(block_size), head_offset_(0), tail_offset_(0), @@ -83,7 +85,7 @@ class MemoryBlock { symbol_list_.clear(); } - void Init(size_t real_size, MemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) { + void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) { real_size_list_.emplace_back(real_size); no_align_size_list_.emplace_back(no_align_size); node_type_index_list_.emplace_back(node, type, out_index, false); @@ -146,7 +148,7 @@ class MemoryBlock { bool last_continuous_block_; bool is_zero_copy_; std::map depend_stream_life_; - + int64_t memory_type_; private: size_t block_size_; std::vector real_size_list_; @@ -174,6 +176,8 @@ class BlockMemAssigner : public MemAssigner { size_t GetMemOffset() const { return mem_offset_; } + size_t GetP2PMemOffset() const { return p2p_mem_offset_; } + int64_t GetAtomicAddrCleanId() const { return atomic_addr_clean_id_; } std::vector GetMemoryBlocks() const { return memory_blocks_; } @@ -256,7 +260,26 @@ class BlockMemAssigner : public MemAssigner { /// void PrintSymbolMap(); + /// + /// @ingroup GE + /// @brief Get the memory type corresponding to the current symbol. + /// @param [in] node_index_io_list + /// @param [out] memory_type + /// @return void + /// + void GetSymbolMemType(std::list node_index_io_list, int64_t &memory_type); + + /// + /// @ingroup GE + /// @brief Update input tensor or output tensor of op to new memory type attr. + /// @param [in] node_index_io_list + /// @param [in] memory_type + /// @return void + /// + void UpdateOpTensorMemType(std::list node_index_io_list, int64_t memory_type); + size_t mem_offset_; + size_t p2p_mem_offset_; ge::ComputeGraphPtr compute_graph_; @@ -271,14 +294,17 @@ class BlockMemAssigner : public MemAssigner { std::map pre_reuse_flag_; std::map post_reuse_flag_; std::map symbol_size_; + std::map symbol_to_mem_type_; private: /// /// @ingroup GE /// @brief Traversing the compute_graph_ to apply for output memory while considering reuse - /// @param [in] n node in compute_graph_ - /// @param [in] index output node index - /// @param [in] ranges available memory specifications + /// @param [in] n: node in compute_graph_ + /// @param [in] index: output node index + /// @param [in] ranges: available memory specifications + /// @param [in] is_op_reuse_mem: Whether the op reuses the memory, true: reuse; false: not reuse + /// @param [in] continuous: Whether the op uses continuous memory /// @return MemoryBlock* /// @author /// @@ -295,12 +321,15 @@ class BlockMemAssigner : public MemAssigner { /// @param [in] n node in compute_graph_ /// @param [in] out_index output node index /// @param [in] workspace_reuse_flag reuse flag for workspace + /// @param [in] is_op_reuse_mem whether the op reuses memory + /// @param [in] continuous whether the memory of op is continuous + /// @param [in] memory_type device memory type /// @return MemoryBlock* /// @author /// - MemoryBlock *ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, MemoryType mem_type, + MemoryBlock *ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, OpMemoryType mem_type, const ge::NodePtr &n, uint32_t out_index, const std::vector &workspace_reuse_flag, - const bool is_op_reuse_mem, const bool continuous); + const bool is_op_reuse_mem, const bool continuous, int64_t memory_type); /// /// @ingroup GE @@ -309,11 +338,12 @@ class BlockMemAssigner : public MemAssigner { /// @param [in] index out index /// @param [in] stream_id which stream op in /// @param [in] mem_block node workspace mem_block + /// @param [in] memory_type workspace memory type /// @return void /// @author /// - void CheckWorkspaceReuse(const vector &workspace_reuse_flag, uint32_t index, - int64_t stream_id, MemoryBlock *mem_block); + void CheckWorkspaceReuse(const vector &workspace_reuse_flag, uint32_t index, int64_t stream_id, + MemoryBlock *mem_block, int64_t memory_type); /// /// @ingroup GE @@ -375,13 +405,15 @@ class BlockMemAssigner : public MemAssigner { bool IsContinuousOutput(const NodePtr &n); + bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type); + MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem); - std::unordered_map> reusable_blocks_; + std::unordered_map>> reusable_blocks_; std::map reusable_block_counts_; - std::unordered_map> stream_workspace_blocks_; + std::unordered_map>> stream_workspace_blocks_; std::unordered_map> node_out_blocks_; diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index b5f415ed..a13b1e35 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -90,18 +90,23 @@ Status VariableMemoryAssigner::AssignVarAttr2Nodes() { } Status GraphMemoryAssigner::AssignMemory() { - ge::HybridMemAssignerPtr mem_assigner(new (std::nothrow) HybridMemAssigner(compute_graph_)); + ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_)); if (mem_assigner->Assign() != ge::SUCCESS) { GELOGE(ge::FAILED, "Memory assigner failed"); return ge::FAILED; } MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); - memory_offset_.push_back(memory_offset); + memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); + + if (mem_assigner->GetP2PMemOffset() > 0) { + MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset()); + memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset); + } auto session_id = compute_graph_->GetSessionID(); int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM); auto variable_assigner = - std::unique_ptr(new (std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); + std::unique_ptr(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); if (variable_assigner == nullptr) { GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); return ge::FAILED; @@ -120,7 +125,7 @@ Status GraphMemoryAssigner::AssignMemory() { ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { auto variable_assigner = - std::unique_ptr(new (std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); + std::unique_ptr(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); if (variable_assigner == nullptr) { GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); return ge::FAILED; @@ -198,7 +203,7 @@ Status GraphMemoryAssigner::GetMaxBatchLabel(const map> if (i == 0) { // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); max_shape_dims = input_output_desc->GetShape().GetDims(); } else { vector current_shape_dims = input_output_desc->GetShape().GetDims(); @@ -219,7 +224,7 @@ Status GraphMemoryAssigner::GetMaxBatchLabel(const map> if (current_shape_dims[j] > max_shape_dims[j]) { max_shape_dims[j] = current_shape_dims[j]; // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); } // Only compare the first different dim in shape. break; @@ -232,7 +237,7 @@ Status GraphMemoryAssigner::GetMaxBatchLabel(const map> return SUCCESS; } -Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, size_t &mem_offset) { +Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset) { if (memory_offset_.empty()) { GELOGE(FAILED, "memory_offset_ is empty."); return ge::FAILED; @@ -248,24 +253,31 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, size_t &mem_offse GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph), "ReAssignAtomicMemory Failed!"); - mem_offset = memory_offset_[0].mem_offset_; + size_t total_mem_offset = 0; + for (auto pair : memory_offset_) { + mem_type_to_offset[pair.first] = pair.second.mem_offset_; + total_mem_offset += pair.second.mem_offset_; + } auto session_id = compute_graph_->GetSessionID(); - if (mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { - GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", mem_offset, + if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { + GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", total_mem_offset, VarManager::Instance(session_id)->GetGraphMemoryMaxSize()); - ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"size", "item", "maxsize"}, {std::to_string(mem_offset), - "featuremap", std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())}); + for (auto iter : mem_type_to_offset) { + ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"}, + {std::to_string(iter.first), std::to_string(iter.second), "featuremap", + std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())}); + } return ge::FAILED; } return SUCCESS; } -Status GraphMemoryAssigner::AssignZeroCopyMemory(size_t &mem_offset, size_t &zero_mem_copy_size) { +Status GraphMemoryAssigner::AssignZeroCopyMemory(map &mem_offset, size_t &zero_mem_copy_size) { BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger()); GE_IF_BOOL_EXEC(priority_assigner == nullptr, GELOGE(FAILED, "Get priority_assigner failed."); return ge::FAILED;); - size_t mem_offset_tmp = mem_offset; + size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM]; // set offset for zero copy block for (auto &memory_block : priority_assigner->GetMemoryBlocks()) { @@ -273,18 +285,24 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(size_t &mem_offset, size_t &zer continue; } memory_block->Resize(); - memory_block->SetHeadOffset(mem_offset); - mem_offset += memory_block->Size(); - memory_block->SetTailOffset(mem_offset - 1); + memory_block->SetHeadOffset(mem_offset[RT_MEMORY_HBM]); + mem_offset[RT_MEMORY_HBM] += memory_block->Size(); + memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1); } - GELOGI("mem_offset_ include zero_copy_memory is %zu.", mem_offset); + GELOGI("mem_offset_ include zero_copy_memory is %zu.", mem_offset[RT_MEMORY_HBM]); // set offset for zero copy nodes priority_assigner->SetOpMemOffset(true); - zero_mem_copy_size = mem_offset - mem_offset_tmp; - memory_offset_[0].mem_offset_ = mem_offset; + zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp; + auto iter = memory_offset_.find(RT_MEMORY_HBM); + if (iter == memory_offset_.end()) { + GELOGE(FAILED, "Memory offset don't have memory type[hbm]."); + return FAILED; + } + iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM]; - GELOGI("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset, mem_offset_tmp, zero_mem_copy_size); + GELOGI("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp, + zero_mem_copy_size); return SUCCESS; } @@ -297,13 +315,15 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { bool is_input_continuous = false; GE_CHECK_NOTNULL(node->GetOpDesc()); // If GetBool fail, is_input_continuous is false. - (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); // Assign continuous input memory if (is_input_continuous) { + int64_t memory_type = RT_MEMORY_HBM; + GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); int64_t mem_clean_start = 0; int64_t mem_clean_size = 0; - ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size); + ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type); if (ret != ge::SUCCESS) { GELOGE(ret, "Assign continuous input memory failed!"); return ret; @@ -312,7 +332,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { // Clean up atomic address, eg, hcom node vector input_indexes; // If GetListInt fail, input_indexes is empty. - (void)ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes); + (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes); if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { // check whether there is an atomic conflict between the current node and the peer out node @@ -340,12 +360,12 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { // Get the reference type of the node, default is false bool is_ref = false; // If GetBool fail, is_ref is false. - (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); // Get the continuous output type of the node, default is false bool is_output_continuous = false; // If GetBool fail, is_output_continuous is false. - (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); // If the output is ref type and refers to the ref of an input, the name of the output // and the input are the same. Ge encounters ref type, finds matching relationship according @@ -358,17 +378,23 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } } } - - GELOGI("After reassign continuous memory, memoffset = %zu.", memory_offset_[0].mem_offset_); + for (auto pair : memory_offset_) { + GELOGI("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first, + pair.second.mem_offset_); + } return ge::SUCCESS; } Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size) { + int64_t &continuous_mem_size, int64_t memory_type) { GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); - continuous_mem_start = memory_offset_[0].mem_offset_; bool continuous_input_alloc = false; - (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc); + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc); + auto iter = memory_offset_.find(memory_type); + if (iter == memory_offset_.end()) { + GELOGE(FAILED, "Memory offset don't have memory type[%ld].", memory_type); + return FAILED; + } for (auto &in_data_anchor : node->GetAllInDataAnchors()) { auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue); @@ -377,7 +403,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue); bool is_peer_output_continuous = false; // If GetBool fail, is_peer_output_continuous is false. - (void)ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); + (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and // continuous output of the previous node is the same, we can support it. If size != 1, there may be @@ -388,17 +414,17 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, "Current node %s requires continuous input, while the previous node %s requires " "continuous output. There may be conflict between the two. This node is not supported now.", node->GetOpDesc()->GetName().c_str(), peer_op_desc->GetName().c_str()); - return PARAM_INVALID;); + return PARAM_INVALID;); bool is_peer_reference = false; // If GetBool fail, is_peer_reference is false. - (void)AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); + (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); GE_IF_BOOL_EXEC(is_peer_reference, GELOGE(PARAM_INVALID, "Current node %s requires continuous input, while the previous node %s requires " "reference. There may be conflict between the two. This node is not supported now.", node->GetOpDesc()->GetName().c_str(), peer_op_desc->GetName().c_str()); - return PARAM_INVALID;); + return PARAM_INVALID;); vector output_list = peer_op_desc->GetOutputOffset(); std::vector offsets_for_fusion = {}; @@ -418,29 +444,30 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, tensor_desc_size = (tensor_desc_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; continuous_mem_size = - output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE; + output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE; } GELOGI( - "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " - "real_size[%u].", - node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), - peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), - 0, 0); + "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " + "real_size[%u].", + node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), + peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), + 0, 0); continue; } - output_list.at(peer_out_data_anchor->GetIdx()) = memory_offset_[0].mem_offset_; + + output_list.at(peer_out_data_anchor->GetIdx()) = iter->second.mem_offset_; } else { GELOGE(FAILED, "index : %d is out of range.", peer_out_data_anchor->GetIdx()); return FAILED; } peer_op_desc->SetOutputOffset(output_list); - size_t pre_mem_offset = memory_offset_[0].mem_offset_; + size_t pre_mem_offset = iter->second.mem_offset_; int64_t tensor_desc_size = 0; if (has_offset_attr) { if (peer_out_data_anchor->GetIdx() < static_cast(offsets_for_fusion.size())) { auto offset_for_fusion = offsets_for_fusion[peer_out_data_anchor->GetIdx()]; - memory_offset_[0].mem_offset_ += offset_for_fusion; + iter->second.mem_offset_ += offset_for_fusion; } else { GELOGE(FAILED, "fusion: peer node %s index : %d is out of range.", peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx()); @@ -448,28 +475,28 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, } } else { Status ret = - TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size); + TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size); GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;); - memory_offset_[0].mem_offset_ += tensor_desc_size; + iter->second.mem_offset_ += tensor_desc_size; } // If set tensor_actual_size, Memory alignment is not required. int32_t is_tensor_actual_size = 0; ge::AttrUtils::GetInt(peer_op_desc, ATTR_NAME_GET_TENSOR_ACTUAL_SIZE, is_tensor_actual_size); if (is_tensor_actual_size == 0) { - AlignMemOffset(MEM_ALIGN_SIZE); + AlignMemOffset(MEM_ALIGN_SIZE, memory_type); } GELOGI( - "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " - "real_size[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx(), - pre_mem_offset, peer_op_desc->GetStreamId(), (memory_offset_[0].mem_offset_ - pre_mem_offset), tensor_desc_size); + "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " + "real_size[%ld].", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), + peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), + (iter->second.mem_offset_ - pre_mem_offset), tensor_desc_size); } - memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE; + iter->second.mem_offset_ += MEM_ALIGN_SIZE; if (!continuous_input_alloc) { - continuous_mem_size = memory_offset_[0].mem_offset_ - continuous_mem_start; + continuous_mem_size = iter->second.mem_offset_ - continuous_mem_start; } return SUCCESS; } @@ -504,7 +531,7 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " "real_size[%ld].", node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), - output_list[out_data_anchor->GetIdx()] , out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); + output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); } out_op_desc->SetOutputOffset(output_list); return ge::SUCCESS; @@ -574,6 +601,7 @@ Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { map> mem_reuse_virtual_input_nodes_map; + int64_t memory_type = RT_MEMORY_HBM; for (const auto &n : compute_graph_->GetAllNodes()) { OpDescPtr op_desc = n->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -583,7 +611,6 @@ Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { bool attr_reuse = false; bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); GE_IF_BOOL_EXEC(!get_reuse_flag, continue); - if (attr_reuse && attr_continuous) { if (op_desc->GetOutputsSize() != kVirtualInputNodeOutputSize) { // When current virtual node has several outputs, can't directly determine which input is the tensor for reuse. @@ -591,13 +618,19 @@ Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { op_desc->GetOutputsSize()); return FAILED; } - - GELOGD("Start to reassign memory for virtual input node, memory offset = %zu.", memory_offset_[0].mem_offset_); + GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed."); + auto iter = memory_offset_.find(memory_type); + if (iter == memory_offset_.end()) { + GELOGE(FAILED, "Memory offset don't have memory type[%ld].", memory_type); + return FAILED; + } + GELOGD("Start to reassign memory for virtual input node, memory offset = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); string batch_label_string; // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter - (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); if (batch_label_string.empty()) { - size_t node_mem_offset = memory_offset_[0].mem_offset_; + size_t node_mem_offset = iter->second.mem_offset_; // No ATTR_NAME_BATCH_LABEL, no need to reuse memory. Status status = ReAssignVirtualInputNodeMemory(n, node_mem_offset); if (status != SUCCESS) { @@ -605,9 +638,10 @@ Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { return FAILED; } - memory_offset_[0].mem_offset_ = node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE); - GELOGD("After reassign memory for virtual input node, align memory = %zu.", memory_offset_[0].mem_offset_); + iter->second.mem_offset_ = node_mem_offset; + AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + GELOGD("After reassign memory for virtual input node, align memory = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); } else { // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. string current_node_full_name = op_desc->GetName(); @@ -619,7 +653,7 @@ Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { } string fixed_name = current_node_full_name.substr(0, pos); vector parallel_virtual_input_nodes; - if(mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) { + if (mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) { parallel_virtual_input_nodes = mem_reuse_virtual_input_nodes_map[fixed_name]; } parallel_virtual_input_nodes.emplace_back(n); @@ -707,6 +741,7 @@ Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() { map> mem_reuse_virtual_output_nodes_map; + int64_t memory_type = RT_MEMORY_HBM; for (const auto &n : compute_graph_->GetAllNodes()) { OpDescPtr op_desc = n->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -725,22 +760,29 @@ Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() { in_data_anchor_list.size()); return FAILED; } - - GELOGD("Start to reassign memory for virtual output node, memory offset = %zu.", memory_offset_[0].mem_offset_); + GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed."); + auto iter = memory_offset_.find(memory_type); + if (iter == memory_offset_.end()) { + GELOGE(FAILED, "Memory offset don't have memory type[%ld].", memory_type); + return FAILED; + } + GELOGD("Start to reassign memory for virtual output node, memory offset = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); string batch_label_string; // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter - (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); if (batch_label_string.empty()) { - size_t node_mem_offset = memory_offset_[0].mem_offset_; + size_t node_mem_offset = iter->second.mem_offset_; // No ATTR_NAME_BATCH_LABEL, no need to reuse memory. Status status = ReAssignVirtualOutputNodeMemory(n, node_mem_offset); if (status != SUCCESS) { GELOGE(FAILED, "Reassign memory of virtual output node failed, node name: %s.", n->GetName().c_str()); return FAILED; } - memory_offset_[0].mem_offset_ = node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE); - GELOGD("After reassign memory for virtual output node, align memory = %zu.", memory_offset_[0].mem_offset_); + iter->second.mem_offset_ = node_mem_offset; + AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + GELOGD("After reassign memory for virtual output node, align memory = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); } else { // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. string current_node_full_name = op_desc->GetName(); @@ -752,7 +794,7 @@ Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() { } string fixed_name = current_node_full_name.substr(0, pos); vector parallel_virtual_output_nodes; - if(mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) { + if (mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) { parallel_virtual_output_nodes = mem_reuse_virtual_output_nodes_map[fixed_name]; } parallel_virtual_output_nodes.emplace_back(n); @@ -773,26 +815,29 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map nodes_mem_offset_list; for (auto &i_map : mem_reuse_nodes_map) { - size_t max_batch_node_mem_offset = memory_offset_[0].mem_offset_; - nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset); - vector virtual_nodes_list = i_map.second; + int64_t memory_type = RT_MEMORY_HBM; + GE_CHK_STATUS_RET(GetNodeListMemoryType(virtual_nodes_list, mem_reuse_model, memory_type), + "Get node list memory type failed."); + auto iter = memory_offset_.find(memory_type); + if (iter == memory_offset_.end()) { + GELOGE(FAILED, "Memory offset don't have memory type[%ld].", memory_type); + return FAILED; + } + size_t max_batch_node_mem_offset = iter->second.mem_offset_; + nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset); for (auto &i_node : virtual_nodes_list) { // Op_desc is not nullptr, it has been checked. OpDescPtr op_desc = i_node->GetOpDesc(); string batch_label_string; // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); + (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); if (batch_label_string == max_batch_label) { Status status = SUCCESS; if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { @@ -808,18 +853,16 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(mapGetName().c_str()); return FAILED; } - memory_offset_[0].mem_offset_ = max_batch_node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE); - GELOGD("After reassign memory for virtual node, align memory = %zu.", memory_offset_[0].mem_offset_); + iter->second.mem_offset_ = max_batch_node_mem_offset; + AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + GELOGD("After reassign memory for virtual node, align memory = %zu, memory type = %ld.", + iter->second.mem_offset_, memory_type); // Only assign memory of max batch nodes. break; } } } - - // Assign memory of remaining nodes that have the same fixed_name. - GELOGD("Start to reassign memory for remaining batch virtual nodes, memory offset = %zu.", - memory_offset_[0].mem_offset_); + PrintMemoryOffset(); size_t memory_reuse_index = 0; for (auto &i_map : mem_reuse_nodes_map) { vector virtual_nodes_list = i_map.second; @@ -854,8 +897,14 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { return status; } + auto mem_iter = memory_offset_.find(RT_MEMORY_HBM); + if (mem_iter == memory_offset_.end()) { + GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM); + return FAILED; + } + for (auto &iter : normal_atomic_and_clean_nodes_map) { - int64_t atomic_mem_start = static_cast(memory_offset_[0].mem_offset_); + int64_t atomic_mem_start = static_cast(mem_iter->second.mem_offset_); GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start); for (auto &atomic_node : iter.second) { @@ -868,11 +917,10 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { } } - int64_t atomic_mem_size = static_cast(memory_offset_[0].mem_offset_) - atomic_mem_start; - status = SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}); - if (status != SUCCESS) { - GELOGE(status, "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); - return status; + int64_t atomic_mem_size = static_cast(mem_iter->second.mem_offset_) - atomic_mem_start; + if (atomic_mem_size != 0) { + GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}), + "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); } } @@ -949,7 +997,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP if (!atomic_workspace_info.empty()) { bool is_fusion_node = false; // If GetBool fail, is_fusion_node is false. - (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node); + (void) ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node); if (is_fusion_node) { // Assign fusion atomic node workspace memory @@ -970,6 +1018,11 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP } Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector &connect_netoutput_nodes) { + auto iter = memory_offset_.find(RT_MEMORY_HBM); + if (iter == memory_offset_.end()) { + GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM); + return FAILED; + } for (auto &node : connect_netoutput_nodes) { GE_CHECK_NOTNULL(node); if (node->GetOpDesc() == nullptr) { @@ -978,7 +1031,7 @@ Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector & } // Atomic memory start addr - int64_t original_atomic_mem_start = static_cast(memory_offset_[0].mem_offset_); + int64_t original_atomic_mem_start = static_cast(iter->second.mem_offset_); GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.", node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start); vector mem_offset_end; @@ -1001,7 +1054,7 @@ Status GraphMemoryAssigner::AssignReferenceMemory() { // Get the reference type of the node, default is false bool is_ref = false; // If GetBool fail, is_ref is false. - (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); if (!is_ref) { continue; } @@ -1085,7 +1138,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve vector atomic_output_index; // If GetListInt fail, atomic_output_index is empty. - (void)ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index); + (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index); // Check atomic output vector output_list = op_desc->GetOutputOffset(); @@ -1094,6 +1147,11 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve return ge::FAILED; } auto output_list_size = static_cast(output_list.size()); + auto iter = memory_offset_.find(RT_MEMORY_HBM); + if (iter == memory_offset_.end()) { + GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM); + return FAILED; + } for (auto &output_index : atomic_output_index) { if (output_index >= output_list_size) { GELOGE(ge::PARAM_INVALID, "The output index %ld is more than the size %ld of output_list.", output_index, @@ -1111,9 +1169,9 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve // If you have already assigned an atomic address, skip it, and you don't need to reassign it. if (is_assigned_mem) { GELOGI( - "Node %s atomic output : we have assigned atomic memory as the input of next node in " - "ReAssignContinuousMemory function.", - op_desc->GetName().c_str()); + "Node %s atomic output : we have assigned atomic memory as the input of next node in " + "ReAssignContinuousMemory function.", + op_desc->GetName().c_str()); continue; } @@ -1123,14 +1181,14 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve GELOGI("Get size failed"); } - output_list[output_index] = memory_offset_[0].mem_offset_; + output_list[output_index] = iter->second.mem_offset_; GELOGI("[IMAS]Atomic output : Set %s name[%s] output[%ld] offset to [%zu] stream_id[%ld] size[%ld] real_size[%ld].", - compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), output_index, memory_offset_[0].mem_offset_, - op_desc->GetStreamId(), size, size); + compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), output_index, + iter->second.mem_offset_, op_desc->GetStreamId(), size, size); - memory_offset_[0].mem_offset_ += size; - AlignMemOffset(MEM_ALIGN_SIZE); - mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); + iter->second.mem_offset_ += size; + AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM); + mem_offset_end.emplace_back(iter->second.mem_offset_); } op_desc->SetOutputOffset(output_list); @@ -1153,7 +1211,7 @@ Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, i /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address /// has been assigned vector atomic_input_index; - (void)ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index); + (void) ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index); if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) { is_mem_assigned = true; break; @@ -1166,6 +1224,11 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc map> &workspace_info, vector &mem_offset_end) { GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str()); + auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM); + if (mem_type_iter == memory_offset_.end()) { + GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM); + return FAILED; + } vector workspace_vector = op_desc->GetWorkspace(); for (auto iter = workspace_info.begin(); iter != workspace_info.end(); ++iter) { @@ -1188,15 +1251,15 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc return ge::PARAM_INVALID; } - workspace_vector[workspace_index] = memory_offset_[0].mem_offset_; + workspace_vector[workspace_index] = mem_type_iter->second.mem_offset_; GELOGI( - "[IMAS]Atomic ordinary workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " - "size[%ld] real_size[%ld].", - compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, memory_offset_[0].mem_offset_, - op_desc->GetStreamId(), workspace_size, workspace_size); + "[IMAS]Atomic ordinary workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " + "size[%ld] real_size[%ld].", + compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, + mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size); - memory_offset_[0].mem_offset_ += workspace_size; - mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); + mem_type_iter->second.mem_offset_ += workspace_size; + mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_); } } op_desc->SetWorkspace(workspace_vector); @@ -1208,6 +1271,11 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt map> &workspace_info, vector &mem_offset_end) { GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str()); + auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM); + if (mem_type_iter == memory_offset_.end()) { + GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM); + return FAILED; + } map> sub_node_workspace_offset; for (auto &iter : workspace_info) { @@ -1220,15 +1288,14 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt auto workspace_index = static_cast(info_iter.first); auto workspace_size = info_iter.second; - size_t workspace_offset = memory_offset_[0].mem_offset_; + size_t workspace_offset = mem_type_iter->second.mem_offset_; GELOGI( - "[IMAS]Atomic fusion workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] size[%ld] " - "real_size[%ld].", - compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, memory_offset_[0].mem_offset_, - op_desc->GetStreamId(), workspace_size, workspace_size); + "[IMAS]Atomic fusion workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] size[%ld] " + "real_size[%ld].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, + mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size); - memory_offset_[0].mem_offset_ += workspace_size; - mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); + mem_type_iter->second.mem_offset_ += workspace_size; + mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_); index_offset.insert(std::make_pair(workspace_index, workspace_offset)); } sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset)); @@ -1293,8 +1360,11 @@ ge::Status GraphMemoryAssigner::SetInputOffset() { GELOGE(FAILED, "memory_offset_ is empty."); return FAILED; } - GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu]", compute_graph_->GetName().c_str(), - memory_offset_[0].mem_offset_); + for (auto pair : memory_offset_) { + GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memory type[%ld]", compute_graph_->GetName().c_str(), + pair.second.mem_offset_, pair.first); + } + for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { if (UpdateOpInputOffset(node) != ge::SUCCESS) { GELOGE(ge::FAILED, "Update op input offset failed"); @@ -1345,7 +1415,7 @@ ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vecto return FAILED; } - input_list = { parent_inputs[parent_index] }; + input_list = {parent_inputs[parent_index]}; node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input. return SUCCESS; } @@ -1372,7 +1442,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< auto out_index = static_cast(peer_out_anchor->GetIdx()); if (output_list.size() > static_cast(out_index)) { int64_t input_offset = output_list.at(out_index); - if (has_mem_type_attr) { + if (has_mem_type_attr && !origin_input_list.empty()) { auto input_size = tmp_op_desc->GetInputsSize(); auto ori_input_offset_list_size = origin_input_list.size(); auto mem_type_size = memory_type.size(); @@ -1471,7 +1541,6 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in memory_offset_size.emplace_back(size); } memory_offset_start.pop_back(); - const auto &in_control_anchor = node->GetInControlAnchor(); if (!memory_offset_size.empty() && in_control_anchor != nullptr) { for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { @@ -1544,11 +1613,93 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve return SUCCESS; } -void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size) { +void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size, int64_t memory_type) { if (mem_align_size <= 0) { return; } - memory_offset_[0].mem_offset_ = - (memory_offset_[0].mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size; + auto iter = memory_offset_.find(memory_type); + if (iter == memory_offset_.end()) { + GELOGW("Memory offset don't have memory type[%ld].", memory_type); + return; + } + iter->second.mem_offset_ = + (iter->second.mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size; +} + +ge::Status GraphMemoryAssigner::GetNodeListMemoryType(const vector &nodes, int32_t mem_reuse_model, + int64_t &memory_type) { + memory_type = RT_MEMORY_HBM; + // In the dynamic batch scenario, the memory attributes of nodes are the same. + for (auto &n : nodes) { + if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { + GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed.") + break; + } + + if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { + GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed."); + break; + } + } + return SUCCESS; +} + +ge::Status GraphMemoryAssigner::GetNodeMemoryType(const NodePtr &node, int64_t &memory_type, string input_or_output) { + memory_type = RT_MEMORY_HBM; + vector mem_type_list; + if (input_or_output == "input") { + (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_INPUT_MEM_TYPE_LIST, mem_type_list); + } + if (input_or_output == "output") { + (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_OUTPUT_MEM_TYPE_LIST, mem_type_list); + } + if (mem_type_list.empty()) { + if (memory_offset_.find(memory_type) == memory_offset_.end()) { + GELOGE(FAILED, "Memory offset map does not have memory type[%ld].", memory_type); + return FAILED; + } + return SUCCESS; + } + + if (mem_type_list.size() != node->GetAllInDataAnchorsSize()) { + GELOGE(FAILED, "The size[%zu] of mem type list is not equal to the size of in data anchor[%u].", + mem_type_list.size(), node->GetAllInDataAnchorsSize()); + return FAILED; + } + + if (!CheckContinuousMemType(mem_type_list)) { + GELOGE(FAILED, "Check continuous memory type failed."); + return FAILED; + } + // It is continuous memory and memory type is the same, so use the first memory. + memory_type = mem_type_list[0]; + return SUCCESS; +} + +bool GraphMemoryAssigner::CheckContinuousMemType(vector mem_type_list) { + if (mem_type_list.size() == 0) { + return true; + } + int64_t mem_type_tmp = mem_type_list[0]; + for (auto mem_type : mem_type_list) { + if (mem_type != mem_type_tmp) { + GELOGW("The memory is continuous, but the type of the input memory is inconsistent. They are [%ld] and [%ld].", + mem_type_tmp, mem_type); + return false; + } + } + if (memory_offset_.find(mem_type_tmp) == memory_offset_.end()) { + GELOGW("Memory offset map does not have memory type[%ld].", mem_type_tmp); + return false; + } + return true; +} + +void GraphMemoryAssigner::PrintMemoryOffset() { + for (auto pair : memory_offset_) { + // Assign memory of max batch nodes that have the same batch label. + GELOGD("Reassign memory for max batch virtual nodes, memory type = %ld, memory offset = %zu.", + pair.first, pair.second.mem_offset_); + } } } // namespace ge diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index 3864a967..b12d8b4c 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ struct MemoryOffset { size_t mem_offset_; }; -using MemoryOffsetList = vector; +using MemoryOffsetMap = std::map; class VariableMemoryAssigner { public: @@ -99,9 +99,9 @@ class GraphMemoryAssigner { /// ge::Status AssignVarAttr2Nodes(); - ge::Status ReAssignMemory(bool is_loop_graph, size_t &mem_offset); + ge::Status ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset); - ge::Status AssignZeroCopyMemory(size_t &mem_offset, size_t &zero_mem_copy_size); + ge::Status AssignZeroCopyMemory(map &mem_offset, size_t &zero_mem_copy_size); ge::Status SetInputOffset(); @@ -136,12 +136,12 @@ class GraphMemoryAssigner { int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size); ge::Status ReAssignAtomicMemory(bool is_loop_graph); - + ge::Status FilterAtomicNodesForMemoryAssign(std::map> &normal_atomic_nodes_map, std::vector &connecting_output_atomic_nodes); - ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, - int64_t &continuous_mem_start, int64_t &continuous_mem_size); + ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, + int64_t &continuous_mem_size, int64_t memory_type); ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node); @@ -176,7 +176,7 @@ class GraphMemoryAssigner { ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); - void AlignMemOffset(const int64_t &mem_align_size); + void AlignMemOffset(const int64_t &mem_align_size, int64_t memory_type); ge::Status UpdateOpInputOffset(const NodePtr &node, vector &input_list) const; @@ -184,7 +184,14 @@ class GraphMemoryAssigner { NodePtr GetKnownInputNode(const NodePtr &node) const; - MemoryOffsetList memory_offset_; + ge::Status GetNodeMemoryType(const NodePtr &node, int64_t &memory_type, string input_or_output); + ge::Status GetNodeListMemoryType(const vector &nodes, int32_t mem_reuse_model, int64_t &memory_type); + + bool CheckContinuousMemType(vector mem_type_list); + + void PrintMemoryOffset(); + + MemoryOffsetMap memory_offset_; ge::ComputeGraphPtr compute_graph_; HybridMemAssignerPtr mem_assigner_; }; diff --git a/ge/graph/build/memory/hybrid_mem_assigner.cc b/ge/graph/build/memory/hybrid_mem_assigner.cc index 6538b0f2..18a9a5a5 100755 --- a/ge/graph/build/memory/hybrid_mem_assigner.cc +++ b/ge/graph/build/memory/hybrid_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ namespace ge { HybridMemAssigner::HybridMemAssigner(ge::ComputeGraphPtr compute_graph) - : mem_offset_(0), compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {} + : mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {} Status HybridMemAssigner::AssignMemory(std::unique_ptr &block_assigner, size_t &mem_size) { vector ranges; @@ -73,6 +73,7 @@ Status HybridMemAssigner::Assign() { priority_assigner->SetOpMemOffset(false); mem_offset_ = priority_assigner->GetMemOffset(); + p2p_mem_offset_ = priority_assigner->GetP2PMemOffset(); priority_assigner_ = std::move(priority_assigner); return SUCCESS; diff --git a/ge/graph/build/memory/hybrid_mem_assigner.h b/ge/graph/build/memory/hybrid_mem_assigner.h index 6673c0ef..3913fea1 100755 --- a/ge/graph/build/memory/hybrid_mem_assigner.h +++ b/ge/graph/build/memory/hybrid_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,6 +43,7 @@ class HybridMemAssigner : public MemAssigner { Status Assign() override; size_t GetMemOffset() const { return mem_offset_; } + size_t GetP2PMemOffset() const { return p2p_mem_offset_; } BlockMemAssignerPtr GetPriorityAssinger() const { return priority_assigner_; } @@ -50,6 +51,7 @@ class HybridMemAssigner : public MemAssigner { Status AssignMemory(std::unique_ptr &block_assigner, size_t &mem_size); size_t mem_offset_; + size_t p2p_mem_offset_; ge::ComputeGraphPtr compute_graph_; diff --git a/ge/graph/build/memory/max_block_mem_assigner.cc b/ge/graph/build/memory/max_block_mem_assigner.cc index 15edae3d..db6befeb 100644 --- a/ge/graph/build/memory/max_block_mem_assigner.cc +++ b/ge/graph/build/memory/max_block_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/max_block_mem_assigner.h b/ge/graph/build/memory/max_block_mem_assigner.h index c4d67953..f5626ebf 100644 --- a/ge/graph/build/memory/max_block_mem_assigner.h +++ b/ge/graph/build/memory/max_block_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/mem_assigner.h b/ge/graph/build/memory/mem_assigner.h index 7d0252d9..b1cb4627 100755 --- a/ge/graph/build/memory/mem_assigner.h +++ b/ge/graph/build/memory/mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/memory_assigner.cc b/ge/graph/build/memory/memory_assigner.cc index 91051edc..271d5633 100755 --- a/ge/graph/build/memory/memory_assigner.cc +++ b/ge/graph/build/memory/memory_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ #include "graph/build/memory/graph_mem_assigner.h" namespace ge { -Status MemoryAssigner::AssignMemory(bool is_loop_graph, size_t &mem_offset, size_t &zero_copy_mem_size) { +Status MemoryAssigner::AssignMemory(bool is_loop_graph, map &mem_offset, size_t &zero_copy_mem_size) { GraphMemoryAssigner graph_mem_assigner(compute_graph_); if (graph_mem_assigner.AssignMemory() != ge::SUCCESS) { diff --git a/ge/graph/build/memory/var_mem_assign_util.cc b/ge/graph/build/memory/var_mem_assign_util.cc index 639bfaa0..ad9e1cd4 100755 --- a/ge/graph/build/memory/var_mem_assign_util.cc +++ b/ge/graph/build/memory/var_mem_assign_util.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/var_mem_assign_util.h b/ge/graph/build/memory/var_mem_assign_util.h index f0e6270d..b34e3646 100644 --- a/ge/graph/build/memory/var_mem_assign_util.h +++ b/ge/graph/build/memory/var_mem_assign_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 9c2e4836..e7e59ac8 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/build/model_builder.h" #include #include @@ -92,7 +93,6 @@ ModelBuilder::ModelBuilder(uint64_t session_id, ge::ComputeGraphPtr compute_grap const Graph2SubGraphInfoList &subgraphs, const map &stream_max_parallel_num, bool hcom_parallel, int mode) : session_id_(session_id), - mem_offset_(0), weight_offset_(kWeightsStartOffset), compute_graph_(std::move(compute_graph)), subgraphs_(subgraphs), @@ -103,6 +103,7 @@ ModelBuilder::ModelBuilder(uint64_t session_id, ge::ComputeGraphPtr compute_grap hcom_parallel_(hcom_parallel), build_mode_(mode), max_mem_offset_(0), + p2p_mem_offset_(0), zero_copy_mem_size_(0), platform_type_(0), is_loop_graph_(false), @@ -385,10 +386,16 @@ void ModelBuilder::InitL1FusionOption() { Status ModelBuilder::BuildModelDef(ge::Model &model) { ClearOriginalFormat(); - max_mem_offset_ = mem_offset_; + max_mem_offset_ = mem_type_to_mem_offset_[RT_MEMORY_HBM]; GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_MEMORY_SIZE, max_mem_offset_), GELOGE(FAILED, "SetInt of ATTR_MODEL_MEMORY_SIZE failed."); return FAILED); + if (mem_type_to_mem_offset_.find(RT_MEMORY_P2P_DDR) != mem_type_to_mem_offset_.end()) { + p2p_mem_offset_ = mem_type_to_mem_offset_[RT_MEMORY_P2P_DDR]; + } + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_mem_offset_), + GELOGE(FAILED, "SetInt of ATTR_MODEL_P2P_MEMORY_SIZE failed."); + return FAILED); GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_WEIGHT_SIZE, weight_offset_), GELOGE(FAILED, "SetInt of ATTR_MODEL_WEIGHT_SIZE failed."); return FAILED); @@ -410,7 +417,8 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, ATTR_MODEL_OUT_NODES_NAME, GetLocalOmgContext().net_out_nodes), GELOGE(FAILED, "SetListStr of ATTR_MODEL_OUT_NODES_NAME failed."); return FAILED); - GELOGI("For model, max_mem_offset_: %zu, zero_copy_mem_size_: %zu", max_mem_offset_, zero_copy_mem_size_); + GELOGI("For model, max_mem_offset_: %zu, p2p_mem_size: %zu, zero_copy_mem_size_: %zu", max_mem_offset_, + p2p_mem_offset_, zero_copy_mem_size_); string ge_core_type; Status ret = ge::GetContext().GetOption(kCoreType, ge_core_type); @@ -711,7 +719,7 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { GE_TIMESTAMP_START(AssignMemory); MemoryAssigner mem_assigner(compute_graph_); - GE_CHK_STATUS_RET(mem_assigner.AssignMemory(is_loop_graph_, mem_offset_, zero_copy_mem_size_), + GE_CHK_STATUS_RET(mem_assigner.AssignMemory(is_loop_graph_, mem_type_to_mem_offset_, zero_copy_mem_size_), "Assign Memory Failed!"); GE_TIMESTAMP_END(AssignMemory, "GraphBuilder::AssignMemory"); diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h index 04827c30..b2f58f6e 100644 --- a/ge/graph/build/model_builder.h +++ b/ge/graph/build/model_builder.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -85,7 +85,7 @@ class ModelBuilder { uint64_t session_id_; - size_t mem_offset_; + map mem_type_to_mem_offset_; size_t weight_offset_; @@ -106,6 +106,7 @@ class ModelBuilder { int build_mode_; size_t max_mem_offset_; + size_t p2p_mem_offset_; size_t zero_copy_mem_size_; TBEKernelStore tbe_kernel_store_; diff --git a/ge/graph/build/run_context.cc b/ge/graph/build/run_context.cc index 10da061c..2c99c8f9 100644 --- a/ge/graph/build/run_context.cc +++ b/ge/graph/build/run_context.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/build/run_context.h" #include "common/util.h" @@ -22,7 +23,9 @@ namespace ge { RunContextUtil::~RunContextUtil() { DestroyRtModelResources(); } -Status RunContextUtil::InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_size, uint8_t *weight_mem_base, +Status RunContextUtil::InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_size, + std::map mem_type_to_data_mem_base, + std::map mem_type_to_data_mem_size, uint8_t *weight_mem_base, uint64_t weight_mem_size) { if ((data_mem_size > 0) && (data_mem_base == nullptr)) { GELOGE(PARAM_INVALID, "InitMemInfo param data_mem_base is null but data_mem_size = %lu.", data_mem_size); @@ -32,10 +35,20 @@ Status RunContextUtil::InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_siz GELOGE(PARAM_INVALID, "InitMemInfo param weight_mem_base is null but weight_mem_size = %lu.", weight_mem_size); return PARAM_INVALID; } + if (mem_type_to_data_mem_base.empty() || mem_type_to_data_mem_size.empty() || + mem_type_to_data_mem_base.size() != mem_type_to_data_mem_size.size()) { + GELOGE(PARAM_INVALID, + "InitMemInfo param mem_type_to_data_mem_base size[%zu] is not equal to the size of " + "mem_type_to_data_mem_size[%zu].", + mem_type_to_data_mem_base.size(), mem_type_to_data_mem_size.size()); + return PARAM_INVALID; + } data_mem_base_ = data_mem_base; data_mem_size_ = data_mem_size; weight_mem_base_ = weight_mem_base; weight_mem_size_ = weight_mem_size; + mem_type_to_data_mem_base_ = mem_type_to_data_mem_base; + mem_type_to_data_mem_size_ = mem_type_to_data_mem_size; return SUCCESS; } @@ -166,8 +179,26 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra GELOGI("CreateRunContext: data_mem_base_ = %p, weight_mem_base_ = %p, memory_size = %lu, weight_size = %lu", data_mem_base_, weight_mem_base_, data_mem_size_, weight_mem_size_); - run_context_ = {rt_model_, nullptr, session_id, data_mem_size_, data_mem_base_, - weight_mem_size_, weight_mem_base_, buffer, stream_list_, event_list_, + for (auto iter : mem_type_to_data_mem_base_) { + GELOGI("CreateRunContext: memory type = %ld, data memory base = %p", iter.first, iter.second); + } + + for (auto iter : mem_type_to_data_mem_size_) { + GELOGI("CreateRunContext: memory type = %ld, data memory size = %lu", iter.first, iter.second); + } + + run_context_ = {rt_model_, + nullptr, + session_id, + data_mem_size_, + data_mem_base_, + mem_type_to_data_mem_size_, + mem_type_to_data_mem_base_, + weight_mem_size_, + weight_mem_base_, + buffer, + stream_list_, + event_list_, label_list_}; return SUCCESS; } diff --git a/ge/graph/build/run_context.h b/ge/graph/build/run_context.h index 0190f134..a85a281d 100755 --- a/ge/graph/build/run_context.h +++ b/ge/graph/build/run_context.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,8 +33,10 @@ class RunContextUtil { virtual ~RunContextUtil(); // Init mem info. - ge::Status InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_size, uint8_t *weight_mem_base, - uint64_t weight_mem_size); + ge::Status InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_size, + std::map mem_type_to_data_mem_base, + std::map mem_type_to_data_mem_size, + uint8_t *weight_mem_base, uint64_t weight_mem_size); ge::Status CreateRunContext(Model &model_def, const ComputeGraphPtr &graph, Buffer &buffer, const uint64_t session_id); @@ -61,6 +63,8 @@ class RunContextUtil { uint64_t data_mem_size_ = 0; uint8_t *weight_mem_base_ = nullptr; uint64_t weight_mem_size_ = 0; + std::map mem_type_to_data_mem_base_; + std::map mem_type_to_data_mem_size_; }; } // namespace ge #endif // GE_GRAPH_BUILD_RUN_CONTEXT_H_ diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc index 3aba8fd1..9ee2903e 100644 --- a/ge/graph/build/stream_allocator.cc +++ b/ge/graph/build/stream_allocator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/stream_allocator.h b/ge/graph/build/stream_allocator.h index a21b2f77..0158e6b0 100644 --- a/ge/graph/build/stream_allocator.h +++ b/ge/graph/build/stream_allocator.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/stream_graph_optimizer.cc b/ge/graph/build/stream_graph_optimizer.cc index 21625a1e..49ecc674 100644 --- a/ge/graph/build/stream_graph_optimizer.cc +++ b/ge/graph/build/stream_graph_optimizer.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "stream_graph_optimizer.h" #include "common/util.h" #include "framework/common/debug/ge_log.h" diff --git a/ge/graph/build/stream_graph_optimizer.h b/ge/graph/build/stream_graph_optimizer.h index b0eea135..3133d32d 100644 --- a/ge/graph/build/stream_graph_optimizer.h +++ b/ge/graph/build/stream_graph_optimizer.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index 225ddb88..372be819 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/task_generator.h b/ge/graph/build/task_generator.h index c93b2007..b976e569 100755 --- a/ge/graph/build/task_generator.h +++ b/ge/graph/build/task_generator.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/common/ge_call_wrapper.h b/ge/graph/common/ge_call_wrapper.h index 55a93951..5e73532f 100644 --- a/ge/graph/common/ge_call_wrapper.h +++ b/ge/graph/common/ge_call_wrapper.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,12 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GE_CALL_WRAPPER_H_ #define GE_GE_CALL_WRAPPER_H_ #include "framework/common/debug/ge_log.h" -/*lint --emacro((773),GE_TIMESTAMP_START)*/ -/*lint -esym(773,GE_TIMESTAMP_START)*/ #define GE_TIMESTAMP_START(stage) uint64_t startUsec_##stage = ge::GetCurrentTimestamp() #define GE_TIMESTAMP_END(stage, stage_name) \ diff --git a/ge/graph/common/local_context.cc b/ge/graph/common/local_context.cc index d3e66861..d302de28 100644 --- a/ge/graph/common/local_context.cc +++ b/ge/graph/common/local_context.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/common/local_context.h b/ge/graph/common/local_context.h index 83367766..1cdd2ca1 100644 --- a/ge/graph/common/local_context.h +++ b/ge/graph/common/local_context.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/execute/graph_execute.cc b/ge/graph/execute/graph_execute.cc index 052d20a0..11f4de71 100755 --- a/ge/graph/execute/graph_execute.cc +++ b/ge/graph/execute/graph_execute.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/execute/graph_execute.h b/ge/graph/execute/graph_execute.h index efc30743..242103f8 100755 --- a/ge/graph/execute/graph_execute.h +++ b/ge/graph/execute/graph_execute.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/case_label_maker.cc b/ge/graph/label/case_label_maker.cc index ea4b2a03..88b7ee8b 100644 --- a/ge/graph/label/case_label_maker.cc +++ b/ge/graph/label/case_label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "case_label_maker.h" #include "common/util.h" diff --git a/ge/graph/label/case_label_maker.h b/ge/graph/label/case_label_maker.h index 1078a906..2e3b584b 100644 --- a/ge/graph/label/case_label_maker.h +++ b/ge/graph/label/case_label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/if_label_maker.cc b/ge/graph/label/if_label_maker.cc index d07f7984..62722e7c 100644 --- a/ge/graph/label/if_label_maker.cc +++ b/ge/graph/label/if_label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "if_label_maker.h" #include "common/util.h" diff --git a/ge/graph/label/if_label_maker.h b/ge/graph/label/if_label_maker.h index 0807f549..9ffe8fca 100644 --- a/ge/graph/label/if_label_maker.h +++ b/ge/graph/label/if_label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/label_maker.cc b/ge/graph/label/label_maker.cc index 3f643fb2..88b90199 100644 --- a/ge/graph/label/label_maker.cc +++ b/ge/graph/label/label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/label_maker.h b/ge/graph/label/label_maker.h index 847c7904..759bf5cf 100644 --- a/ge/graph/label/label_maker.h +++ b/ge/graph/label/label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/label_maker_factory.h b/ge/graph/label/label_maker_factory.h index e0a48c6b..6bfc1e33 100644 --- a/ge/graph/label/label_maker_factory.h +++ b/ge/graph/label/label_maker_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/partitioned_call_label_maker.cc b/ge/graph/label/partitioned_call_label_maker.cc index 0be738f0..39a317a3 100644 --- a/ge/graph/label/partitioned_call_label_maker.cc +++ b/ge/graph/label/partitioned_call_label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "partitioned_call_label_maker.h" #include "common/util.h" diff --git a/ge/graph/label/partitioned_call_label_maker.h b/ge/graph/label/partitioned_call_label_maker.h index b89cb94c..1c0f0890 100644 --- a/ge/graph/label/partitioned_call_label_maker.h +++ b/ge/graph/label/partitioned_call_label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/while_label_maker.cc b/ge/graph/label/while_label_maker.cc index 83aad7c9..3f5b3863 100644 --- a/ge/graph/label/while_label_maker.cc +++ b/ge/graph/label/while_label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "while_label_maker.h" #include "common/util.h" diff --git a/ge/graph/label/while_label_maker.h b/ge/graph/label/while_label_maker.h index 0eb0deee..42e6a490 100644 --- a/ge/graph/label/while_label_maker.h +++ b/ge/graph/label/while_label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index cffd07e5..554bd461 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/graph_loader.h b/ge/graph/load/graph_loader.h index b581f2fa..c887c06b 100755 --- a/ge/graph/load/graph_loader.h +++ b/ge/graph/load/graph_loader.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/aipp_utils.cc b/ge/graph/load/new_model_manager/aipp_utils.cc index e0e60d2b..67d67771 100755 --- a/ge/graph/load/new_model_manager/aipp_utils.cc +++ b/ge/graph/load/new_model_manager/aipp_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/aipp_utils.h b/ge/graph/load/new_model_manager/aipp_utils.h index 78107f3e..2534b9fb 100755 --- a/ge/graph/load/new_model_manager/aipp_utils.h +++ b/ge/graph/load/new_model_manager/aipp_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc b/ge/graph/load/new_model_manager/cpu_queue_schedule.cc index 7f406985..01e1cfa8 100644 --- a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc +++ b/ge/graph/load/new_model_manager/cpu_queue_schedule.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.h b/ge/graph/load/new_model_manager/cpu_queue_schedule.h index 8999e975..fcbb4993 100644 --- a/ge/graph/load/new_model_manager/cpu_queue_schedule.h +++ b/ge/graph/load/new_model_manager/cpu_queue_schedule.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_CPU_QUEUE_SCHEDULE_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_CPU_QUEUE_SCHEDULE_H_ diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/new_model_manager/data_dumper.cc index c6100129..b6833317 100644 --- a/ge/graph/load/new_model_manager/data_dumper.cc +++ b/ge/graph/load/new_model_manager/data_dumper.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/data_dumper.h b/ge/graph/load/new_model_manager/data_dumper.h index 17cb16f8..8aa94b3a 100755 --- a/ge/graph/load/new_model_manager/data_dumper.h +++ b/ge/graph/load/new_model_manager/data_dumper.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,10 +36,10 @@ namespace ge { class DataDumper { public: - DataDumper() + explicit DataDumper(const RuntimeParam &rsh) : model_name_(), model_id_(0), - runtime_param_(), + runtime_param_(rsh), dev_mem_load_(nullptr), dev_mem_unload_(nullptr), op_list_(), @@ -58,8 +58,6 @@ class DataDumper { void SetModelId(uint32_t model_id) { model_id_ = model_id; } - void SetMemory(const RuntimeParam &runtime_param) { runtime_param_ = runtime_param; } - void SetDeviceId(uint32_t device_id) { device_id_ = device_id; } void SetComputeGraph(const ComputeGraphPtr &compute_graph) { compute_graph_ = compute_graph; }; @@ -105,7 +103,7 @@ class DataDumper { std::string om_name_; uint32_t model_id_; - RuntimeParam runtime_param_; + const RuntimeParam &runtime_param_; void *dev_mem_load_; void *dev_mem_unload_; diff --git a/ge/graph/load/new_model_manager/data_inputer.cc b/ge/graph/load/new_model_manager/data_inputer.cc index 5efc710e..594a7bcd 100755 --- a/ge/graph/load/new_model_manager/data_inputer.cc +++ b/ge/graph/load/new_model_manager/data_inputer.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/data_inputer.h b/ge/graph/load/new_model_manager/data_inputer.h index 14ebcea5..cc511c36 100755 --- a/ge/graph/load/new_model_manager/data_inputer.h +++ b/ge/graph/load/new_model_manager/data_inputer.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 85ef4d83..ddc15b81 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -42,8 +42,8 @@ #include "graph/ge_context.h" #include "graph/graph.h" #include "graph/load/new_model_manager/cpu_queue_schedule.h" -#include "graph/load/new_model_manager/tbe_handle_store.h" #include "graph/load/new_model_manager/model_manager.h" +#include "graph/load/new_model_manager/tbe_handle_store.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/trans_var_data_utils.h" @@ -107,6 +107,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptrGetWeight(); std::size_t weights_size = weights.GetSize(); GE_CHECK_LE(weights_size, ALLOC_MEMORY_MAX_SIZE); @@ -282,6 +285,7 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p } mem_base_ = static_cast(dev_ptr); + p2p_mem_base_ = static_cast(dev_ptr); weights_mem_base_ = static_cast(dev_ptr); is_inner_mem_base_ = false; is_inner_weight_base_ = false; @@ -294,13 +298,23 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p } GEEVENT("[IMAS]InitModelMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, mem_base_, data_size); - weights_mem_base_ = mem_base_; is_inner_mem_base_ = true; is_inner_weight_base_ = true; } + if (p2p_data_size != 0) { + p2p_mem_base_ = MallocP2PMem(p2p_data_size); + if (p2p_mem_base_ == nullptr) { + GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size); + return GE_EXEC_ALLOC_P2P_MEM_FAILED; + } + GELOGI("InitModelMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, + p2p_mem_base_, p2p_data_size); + is_inner_p2p_mem_base_ = true; + } + if (weights_size != 0) { weights_mem_base_ = static_cast(weight_ptr); is_inner_weight_base_ = false; @@ -321,6 +335,7 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); runtime_param_.mem_base = mem_base_; runtime_param_.weight_base = weights_mem_base_; + runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_; return SUCCESS; } @@ -344,6 +359,7 @@ Status DavinciModel::InitVariableMem() { void DavinciModel::InitRuntimeParams() { int64_t value = 0; bool ret; + MemInfo p2p_mem_info; ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_MEMORY_SIZE, value); runtime_param_.mem_size = ret ? (uint64_t)value : 0; ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_WEIGHT_SIZE, value); @@ -367,6 +383,9 @@ void DavinciModel::InitRuntimeParams() { ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_VAR_SIZE, value); runtime_param_.var_size = ret ? (uint64_t)value : 0; session_id_ = runtime_param_.session_id; + ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_P2P_MEMORY_SIZE, value); + p2p_mem_info.memory_size = ret ? (uint64_t)value : 0; + runtime_param_.memory_infos[RT_MEMORY_P2P_DDR] = std::move(p2p_mem_info); GELOGI( "InitRuntimeParams(), session_id:%lu, stream_num:%u, event_num:%u, label_num:%u, " @@ -519,6 +538,7 @@ void DavinciModel::OpDebugUnRegister() { debug_reg_mutex_.unlock(); rtError_t rt_ret = RT_ERROR_NONE; if (rt_model_handle_ != nullptr) { + GELOGD("start call debug_unregister."); rt_ret = rtDebugUnRegister(rt_model_handle_); if (rt_ret != RT_ERROR_NONE) { GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret); @@ -603,11 +623,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size // create model_handle to load model GE_CHK_RT_RET(rtModelCreate(&rt_model_handle_, 0)); GE_CHK_RT_RET(rtModelGetId(rt_model_handle_, &runtime_model_id_)); - // malloc 2M for dump l1fusion op - GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR)); - - // send l1fusion dump addr to rts - GE_CHK_RT_RET(rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion)); // inference will use default graph_id 0; runtime_param_.graph_id = compute_graph->GetGraphID(); @@ -657,6 +672,17 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size auto ret = DoTaskSink(); GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink"); + auto all_dump_model = GetDumpProperties().GetAllDumpModel(); + bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); + bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); + if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) { + // malloc 2M for dump l1fusion op + GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR)); + + // send l1fusion dump addr to rts + GE_CHK_RT_RET(rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion)); + } + /// In zero copy model, if a aicpu operator is connected to the first or last layer, before model execution, /// the aicpu opertor needs to destroy history record, and update operator memory address. /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel(). @@ -1132,7 +1158,7 @@ Status DavinciModel::InitOutputZeroCopyNodes(const NodePtr &node) { Status DavinciModel::InitInputBatchLabel(const NodePtr &node) { string batch_label; if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { - return SUCCESS; // Not Multi-batch. + return SUCCESS; // Not Multi-batch. } const auto &out_data_anchor = node->GetOutDataAnchor(kDataIndex); @@ -1145,8 +1171,8 @@ Status DavinciModel::InitInputBatchLabel(const NodePtr &node) { if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; - GELOGD("Init input zero copy nodes success, op name: %s, op id: %ld, batch label: %s", - op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); + GELOGD("Init input zero copy nodes success, op name: %s, op id: %ld, batch label: %s", op_desc->GetName().c_str(), + op_desc->GetId(), batch_label.c_str()); } } @@ -1162,7 +1188,7 @@ Status DavinciModel::InitInputBatchLabel(const NodePtr &node) { Status DavinciModel::InitOutputBatchLabel(const NodePtr &node) { string batch_label; if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { - return SUCCESS; // Not Multi-batch. + return SUCCESS; // Not Multi-batch. } for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { @@ -1638,7 +1664,7 @@ Status DavinciModel::GetAippType(uint32_t index, InputAippType &type, size_t &ai GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); // Set default value type = DATA_WITHOUT_AIPP; - aipp_index = 0xFFFFFFFF; // default invalid value + aipp_index = 0xFFFFFFFF; // default invalid value OpDescPtr data_op = data_op_list_[index]; GE_CHECK_NOTNULL(data_op); if (!data_op->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) { @@ -1816,7 +1842,7 @@ void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputD uint32_t &format_result) { /// netoutput input tensor desc GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr"); - return); + return ); Format format = op_desc->GetInputDescPtr(index)->GetFormat(); GeShape shape = op_desc->GetInputDescPtr(index)->GetShape(); DataType data_type = op_desc->GetInputDescPtr(index)->GetDataType(); @@ -1989,13 +2015,7 @@ Status DavinciModel::SinkModelProfile() { name = name_; } size_t name_len = name.size(); - // phy device id - uint32_t phy_device_id = 0; - rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, - GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); - return FAILED); - reporter_data.deviceId = phy_device_id; + reporter_data.deviceId = device_id_; reporter_data.data = (unsigned char *)&name_len; reporter_data.dataLen = sizeof(int32_t); GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", @@ -2164,12 +2184,7 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, return FAILED, "Sink model tag memcpy error."); // device id - uint32_t phy_device_id = 0; - rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, - GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); - return FAILED); - reporter_data.deviceId = phy_device_id; + reporter_data.deviceId = device_id_; // Model Header string name; @@ -2750,17 +2765,15 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector &inputs, const vec GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed."); if (total_args_size_ == 0) { - GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", - args_, total_args_size_); + GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); } else { uint32_t total_addr_size = total_io_addrs_.size() * sizeof(uint64_t); - GELOGI("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, src size %u", - args_, total_args_size_, total_addr_size); + GELOGI("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, src size %u", args_, total_args_size_, + total_addr_size); - Status rt_ret = rtMemcpy(args_, total_args_size_, total_io_addrs_.data(), - total_addr_size, RT_MEMCPY_HOST_TO_DEVICE); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, - GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) + Status rt_ret = + rtMemcpy(args_, total_args_size_, total_io_addrs_.data(), total_addr_size, RT_MEMCPY_HOST_TO_DEVICE); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) } GELOGI("DavinciModel::UpdateKnownNodeArgs success"); @@ -2868,7 +2881,6 @@ Status DavinciModel::DistributeTask() { SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); } } - // get op_name by task_index if (task->GetCtx() != nullptr) { auto iter = op_name_map_.find(task_index); @@ -3632,6 +3644,19 @@ uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) { return mem_base; } +uint8_t *DavinciModel::MallocP2PMem(size_t p2p_data_size) { + uint8_t *p2p_mem_base = nullptr; + const string purpose("p2p memory, used for some op related to hcom"); + if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { + string p2p_memory_key = std::to_string(0) + "_p"; + p2p_mem_base = + MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_memory_key, p2p_data_size, GetDeviceId()); + } else { + p2p_mem_base = MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_data_size, GetDeviceId()); + } + return p2p_mem_base; +} + uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) { uint8_t *weights_mem_base = nullptr; const string purpose("weights memory in inference network."); @@ -3661,6 +3686,22 @@ void DavinciModel::FreeFeatureMapMem() { } } +void DavinciModel::FreeP2PMem() { + if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { + std::string p2p_memory_key = std::to_string(0) + "_p"; + if (MemManager::Instance(RT_MEMORY_P2P_DDR)->GetMemoryAddr(p2p_memory_key) != nullptr) { + GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_memory_key, GetDeviceId()), + "failed to free p2p memory"); + } + p2p_mem_base_ = nullptr; + } else { + GE_IF_BOOL_EXEC(p2p_mem_base_ != nullptr && is_inner_mem_base_, + GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_mem_base_, GetDeviceId()), + "failed to free p2p memory"); + p2p_mem_base_ = nullptr); + } +} + void DavinciModel::FreeWeightsMem() { if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { string memory_key = std::to_string(0) + "_w"; @@ -3708,7 +3749,6 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) { GELOGI("set data dumper args, name: %s, id: %u.", name_.c_str(), model_id_); data_dumper_.SetModelName(name_); data_dumper_.SetModelId(model_id_); - data_dumper_.SetMemory(runtime_param_); data_dumper_.SetOmName(om_name_); data_dumper_.SetComputeGraph(compute_graph); data_dumper_.SetRefInfo(saved_task_addrs_); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 5bdee9b5..713cb1da 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -189,6 +189,8 @@ class DavinciModel { // get total mem size size_t TotalMemSize() const { return runtime_param_.mem_size; } + const std::map &P2PMemInfos() const {return runtime_param_.memory_infos;} + // model name string Name() const { return name_; } @@ -410,6 +412,8 @@ class DavinciModel { void DisableZeroCopy(const void *addr); + bool GetOpDugReg() const { return is_op_debug_reg_; } + /// /// @ingroup ge /// @brief Save outside address of Data or NetOutput used info for ZeroCopy. @@ -500,11 +504,6 @@ class DavinciModel { void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); } const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } - void SetMemcpyOffsetAndAddr(map &memcpy_4g_offset_addr) { - memcpy_4g_offset_addr_.insert(memcpy_4g_offset_addr.begin(), memcpy_4g_offset_addr.end()); - } - const map &GetMemcpyOffsetAndAddr() const { return memcpy_4g_offset_addr_; } - bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info); } @@ -516,8 +515,10 @@ class DavinciModel { uint8_t *var_mem_base_; // memory address of model uint8_t *mem_base_; + uint8_t *p2p_mem_base_; bool is_inner_mem_base_; bool is_inner_weight_base_; + bool is_inner_p2p_mem_base_; // input data manager DataInputer *data_inputer_; @@ -599,10 +600,14 @@ class DavinciModel { uint8_t *MallocWeightsMem(size_t weights_size); + uint8_t* MallocP2PMem(size_t p2p_data_size); + void FreeFeatureMapMem(); void FreeWeightsMem(); + void FreeP2PMem(); + void ReleaseTask(); void UnbindTaskSinkStream(); @@ -988,8 +993,6 @@ class DavinciModel { void *op_debug_addr_ = nullptr; void *p2p_debug_addr_ = nullptr; bool is_new_model_desc_{false}; - - std::map memcpy_4g_offset_addr_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.cc b/ge/graph/load/new_model_manager/davinci_model_parser.cc index 34180d08..b744f907 100644 --- a/ge/graph/load/new_model_manager/davinci_model_parser.cc +++ b/ge/graph/load/new_model_manager/davinci_model_parser.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.h b/ge/graph/load/new_model_manager/davinci_model_parser.h index 83eb4cc3..8907c97d 100755 --- a/ge/graph/load/new_model_manager/davinci_model_parser.h +++ b/ge/graph/load/new_model_manager/davinci_model_parser.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index b7486a64..5b83d20d 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -236,7 +236,6 @@ ModelManager::~ModelManager() { std::lock_guard lock(map_mutex_); model_map_.clear(); model_aicpu_kernel_.clear(); - cust_aicpu_so_.clear(); GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0))); } @@ -400,6 +399,7 @@ Status ModelManager::Unload(uint32_t model_id) { } std::lock_guard lock(exeception_infos_mutex_); exception_infos_.clear(); + cust_aicpu_so_.clear(); return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index d4852a53..94b0b75a 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/new_model_manager/model_utils.cc index 2ef1c42b..1848f283 100755 --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/new_model_manager/model_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,14 +29,13 @@ #include "framework/common/debug/ge_log.h" #include "graph/manager/graph_var_manager.h" -#define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ -do { \ - if (SIZE <= static_cast(OFFSET)) { \ - GELOGE(OUT_OF_MEMORY, "Node: %s, memory out of range[%lu: %ld]", \ - OP->GetName().c_str(), SIZE, OFFSET); \ - return {}; \ - } \ -} while (0) +#define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ + do { \ + if (SIZE <= static_cast(OFFSET)) { \ + GELOGE(OUT_OF_MEMORY, "Node: %s, memory out of range[%lu: %ld]", OP->GetName().c_str(), SIZE, OFFSET); \ + return {}; \ + } \ + } while (0) namespace ge { /// @@ -47,10 +46,8 @@ namespace ge { vector ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { vector v_input_size; GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_size); - const size_t inputs_size = op_desc->GetAllInputsSize(); - const string op_type = op_desc->GetType(); - const vector v_is_input_const = op_desc->GetIsInputConst(); + const size_t inputs_size = op_desc->GetAllInputsSize(); for (size_t i = 0; i < inputs_size; ++i) { const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i); if (tensor_desc == nullptr) { @@ -59,22 +56,12 @@ vector ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { } int64_t tensor_size = 0; - if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) { - // TBE: add weights size to input - GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size)); - if (tensor_size) { - v_input_size.push_back(tensor_size); - } - GELOGI("[IMAS]GetInputSize op: %s, index: %lu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); - continue; - } - - GE_IF_BOOL_EXEC(TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS, - GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); - continue); - - GELOGI("[IMAS]GetInputSize op: %s, index: %lu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); + GE_IF_BOOL_EXEC( + TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS, + GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); + continue); + GELOGI("[IMAS]GetInputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); v_input_size.push_back(tensor_size); } @@ -93,8 +80,8 @@ vector ModelUtils::GetOutputSize(ConstOpDescPtr op_desc) { const size_t outputs_size = op_desc->GetOutputsSize(); const vector v_output_offset = op_desc->GetOutputOffset(); GE_IF_BOOL_EXEC(v_output_offset.size() != outputs_size, - GELOGW("Output param invalid: output_offset=%zu, outputs=%zu.", v_output_offset.size(), outputs_size); - return v_output_size;); + GELOGW("Output param invalid: output_offset=%zu, outputs=%zu.", v_output_offset.size(), outputs_size); + return v_output_size;); for (size_t i = 0; i < outputs_size; ++i) { const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); @@ -104,10 +91,12 @@ vector ModelUtils::GetOutputSize(ConstOpDescPtr op_desc) { } int64_t tensor_size = 0; - GE_IF_BOOL_EXEC(TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS, - GELOGI("Get size from TensorDesc failed, op : %s, output index : %zu", op_desc->GetName().c_str(), i); - continue); + GE_IF_BOOL_EXEC( + TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS, + GELOGI("Get size from TensorDesc failed, op : %s, output index : %zu", op_desc->GetName().c_str(), i); + continue); + GELOGI("[IMAS]GetOutputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); v_output_size.push_back(tensor_size); } @@ -343,13 +332,21 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co VALIDATE_MEM_RANGE(op_desc, model_param.weight_size, data_offset); uint8_t *weight_addr = model_param.weight_base + data_offset; v_input_data_addr.push_back(weight_addr); - GELOGI("[IMAS]GetInputDataAddrs graph_%u type[C] name[%s] input[%zu] memaddr[%p]", - model_param.graph_id, op_desc->GetName().c_str(), i, weight_addr); + GELOGI("[IMAS]GetInputDataAddrs graph_%u type[C] name[%s] input[%zu] memaddr[%p]", model_param.graph_id, + op_desc->GetName().c_str(), i, weight_addr); } non_const_index++; continue; } + int64_t mem_type; + bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); + if (tensor_has_mem_type) { + uint8_t *p2p_mem_addr = model_param.memory_infos.at(RT_MEMORY_P2P_DDR).memory_base + v_input_offset[i]; + v_input_data_addr.push_back(p2p_mem_addr); + continue; + } + GE_IF_BOOL_EXEC(non_const_index >= v_input_offset.size(), GELOGW("offsets=%zu, inputs=%zu, index=%zu.", v_input_offset.size(), inputs_size, non_const_index); break); @@ -357,19 +354,24 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co int64_t input_offset = v_input_offset[non_const_index]; non_const_index++; GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base); - uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; - v_input_data_addr.push_back(variable_addr); - GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", - model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); - continue); + VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base); + uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; + v_input_data_addr.push_back(variable_addr); + GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", + model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); + continue); // feature maps - uint8_t *mem_addr = nullptr; - // fusion - if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { + void *mem_addr = nullptr; + if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion mem_addr = reinterpret_cast(reinterpret_cast(input_offset)); v_input_data_addr.push_back(mem_addr); + } else if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_TS_4G) { + int64_t tensor_size = 0; + GE_CHK_STATUS_EXEC(TensorUtils::GetSize(*tensor_desc, tensor_size), return {}); + VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, input_offset); + mem_addr = model_param.ts_mem_mall->Acquire(input_offset, static_cast(tensor_size)); + v_input_data_addr.push_back(mem_addr); } else { VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, input_offset); mem_addr = model_param.mem_base + input_offset; @@ -395,8 +397,8 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C const size_t outputs_size = op_desc->GetOutputsSize(); const vector v_output_offset = op_desc->GetOutputOffset(); GE_IF_BOOL_EXEC(v_output_offset.size() != outputs_size, - GELOGW("Output param invalid: output_offset=%zu, outputs=%zu.", v_output_offset.size(), outputs_size); - return v_output_data_addr); + GELOGW("Output param invalid: output_offset=%zu, outputs=%zu.", v_output_offset.size(), outputs_size); + return v_output_data_addr); vector v_memory_type; bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, v_memory_type); if (has_mem_type_attr && (v_memory_type.size() != outputs_size)) { @@ -407,18 +409,37 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C } for (size_t i = 0; i < outputs_size; ++i) { GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base); - uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; - v_output_data_addr.push_back(variable_addr); - GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", - model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); - continue); + VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base); + uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; + v_output_data_addr.push_back(variable_addr); + GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", + model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); + continue); + const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); + if (tensor_desc == nullptr) { + GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); + continue; + } + int64_t mem_type; + bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); + if (tensor_has_mem_type) { + uint8_t *p2p_mem_addr = model_param.memory_infos.at(RT_MEMORY_P2P_DDR).memory_base + v_output_offset[i]; + v_output_data_addr.push_back(p2p_mem_addr); + continue; + } // feature maps - uint8_t *mem_addr = nullptr; - // fusion - if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { + void *mem_addr = nullptr; + if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion mem_addr = reinterpret_cast(reinterpret_cast(v_output_offset[i])); v_output_data_addr.push_back(mem_addr); + } else if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_TS_4G) { + const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); + GE_CHECK_NOTNULL_EXEC(tensor_desc, return {}); + int64_t tensor_size = 0; + GE_CHK_STATUS_EXEC(TensorUtils::GetSize(*tensor_desc, tensor_size), return {}); + VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_output_offset[i]); + mem_addr = model_param.ts_mem_mall->Acquire(v_output_offset[i], static_cast(tensor_size)); + v_output_data_addr.push_back(mem_addr); } else { VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_output_offset[i]); mem_addr = static_cast(model_param.mem_base + v_output_offset[i]); @@ -447,22 +468,39 @@ vector ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param return v_workspace_data_addr; } vector v_memory_type; + vector workspace_memory_type; bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, v_memory_type); + bool has_mem_type_workspace = + ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_TYPE_LIST, workspace_memory_type); for (size_t i = 0; i < v_workspace_bytes.size(); ++i) { + if (has_mem_type_workspace && workspace_memory_type[i] == RT_MEMORY_P2P_DDR) { + int64_t p2p_workspace_offset = v_workspace_offset[i]; + int64_t p2p_workspace_bytes = v_workspace_bytes[i]; + uint8_t *p2p_mem_addr = p2p_workspace_bytes == 0 + ? nullptr + : model_param.memory_infos.at(RT_MEMORY_P2P_DDR).memory_base + p2p_workspace_offset; + v_workspace_data_addr.push_back(p2p_mem_addr); + GELOGI( + "[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] p2p workspace[%zu] offset[%ld] bytes[%ld] " + "memaddr[%p]", + model_param.graph_id, op_desc->GetName().c_str(), i, p2p_workspace_offset, p2p_workspace_bytes, p2p_mem_addr); + continue; + } if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { v_workspace_data_addr.push_back(reinterpret_cast(reinterpret_cast(v_workspace_offset[i]))); GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[L1] name[%s], mem_addr[workspace index %zu]:0x%lx", - model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i]); + model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i]); } else if (v_workspace_bytes[i] == 0) { v_workspace_data_addr.push_back(nullptr); GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] Null addr", - model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i]); + model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i]); } else { VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_workspace_offset[i]); uint8_t *mem_addr = model_param.mem_base + v_workspace_offset[i]; v_workspace_data_addr.push_back(mem_addr); GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]", - model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i], mem_addr); + model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i], + mem_addr); } } @@ -478,16 +516,16 @@ Status ModelUtils::GetRtAddress(const RuntimeParam ¶m, uintptr_t logic_addr, uint8_t *runtime_base_addr = nullptr; if ((param.logic_mem_base <= logic_addr) && (logic_addr < param.logic_mem_base + param.mem_size)) { runtime_base_addr = param.mem_base - param.logic_mem_base; - GELOGI("The logic addr:0x%lx is data address, base:0x%lx, size:%lu", - logic_addr, param.logic_mem_base, param.mem_size); + GELOGI("The logic addr:0x%lx is data address, base:0x%lx, size:%lu", logic_addr, param.logic_mem_base, + param.mem_size); } else if ((param.logic_weight_base <= logic_addr) && (logic_addr < param.logic_weight_base + param.weight_size)) { runtime_base_addr = param.weight_base - param.logic_weight_base; - GELOGI("The logic addr:0x%lx is weight address, base:0x%lx, size:%lu", - logic_addr, param.logic_weight_base, param.weight_size); + GELOGI("The logic addr:0x%lx is weight address, base:0x%lx, size:%lu", logic_addr, param.logic_weight_base, + param.weight_size); } else if ((param.logic_var_base <= logic_addr) && (logic_addr < param.logic_var_base + param.var_size)) { runtime_base_addr = param.var_base - param.logic_var_base; - GELOGI("The logic addr:0x%lx is variable address, base:0x%lx, size:%lu", - logic_addr, param.logic_var_base, param.var_size); + GELOGI("The logic addr:0x%lx is variable address, base:0x%lx, size:%lu", logic_addr, param.logic_var_base, + param.var_size); } else if (logic_addr != 0) { mem_addr = nullptr; GELOGE(PARAM_INVALID, "The logic addr:0x%lx is abnormal", logic_addr); diff --git a/ge/graph/load/new_model_manager/model_utils.h b/ge/graph/load/new_model_manager/model_utils.h index 4b3d7ae7..8474a987 100755 --- a/ge/graph/load/new_model_manager/model_utils.h +++ b/ge/graph/load/new_model_manager/model_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc b/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc index b8b02f59..39f0591d 100644 --- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h b/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h index 614544f9..82e228e6 100644 --- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc b/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc index 772078c6..f742118c 100755 --- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h b/ge/graph/load/new_model_manager/task_info/event_record_task_info.h index d3f5961e..04ee1779 100755 --- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/event_record_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc b/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc index b6d8f04c..e8f96b35 100755 --- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h b/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h index a92252d7..f9da30b8 100755 --- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc b/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc index 32c79647..9b1ea04a 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h b/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h index b1897533..7f575639 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc b/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc index dd4edfd0..7acbb5b3 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h b/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h index 880ca487..66248e9f 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc index 6679c980..f2a49213 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h b/ge/graph/load/new_model_manager/task_info/hccl_task_info.h index f7ce3468..d8456834 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc index 49723f17..c28654ea 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -79,8 +79,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin return RT_ERROR_TO_GE_STATUS(rt_ret);) } - GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", - op_desc_->GetName().c_str(), op_desc_->GetType().c_str(), ext_info.size(), ext_info_addr_); + GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc_->GetName().c_str(), + op_desc_->GetType().c_str(), ext_info.size(), ext_info_addr_); // 2.1 get loop cond variable for tensor array write uint64_t step_id_addr = 0; @@ -171,6 +171,10 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin dump_flag_ = RT_KERNEL_DUMPFLAG; dump_args_ = input_output_addr_; } + if (davinci_model_->GetOpDugReg()) { + GELOGI("Op debug is open in kernel ex task info"); + dump_args_ = input_output_addr_; + } } uint64_t input_output_addr = static_cast(reinterpret_cast(input_output_addr_)); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h index e4d3e6fd..565e34d7 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 14d38c68..aef40f5d 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -638,6 +638,9 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne dump_args_ = static_cast(args_) + offset; } + GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast(args_) + offset, + "Op debug is open in TVM task info"); + Status ge_ret = UpdateL2Data(kernel_def); // update origin l2 data if (ge_ret != SUCCESS) { @@ -859,8 +862,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k GELOGI("Do InitAicpuTask"); so_name_ = kernel_def.so_name(); kernel_name_ = kernel_def.kernel_name(); - GELOGI("node[%s] test so name %s, kernel name %s", - op_desc_->GetName().c_str(), so_name_.c_str(), kernel_name_.c_str()); + GELOGI("node[%s] test so name %s, kernel name %s", op_desc_->GetName().c_str(), so_name_.c_str(), + kernel_name_.c_str()); OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index); if (op_desc == nullptr) { @@ -869,8 +872,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), - "launch cust aicpu so failed"); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed"); } // copy args to new host memory @@ -937,12 +939,15 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } dump_args_ = static_cast(args_) + sizeof(aicpu::AicpuParamHead); } + if (davinci_model_->GetOpDugReg()) { + GELOGI("Op debug is open in aicpu task info"); + dump_args_ = static_cast(args_) + sizeof(aicpu::AicpuParamHead); + } if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; } - davinci_model_->SetZeroCopyAddr(op_desc, io_addrs, args_addr.get(), args_, args_size_, - sizeof(aicpu::AicpuParamHead)); + davinci_model_->SetZeroCopyAddr(op_desc, io_addrs, args_addr.get(), args_, args_size_, sizeof(aicpu::AicpuParamHead)); return SUCCESS; } @@ -956,8 +961,7 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); return RT_ERROR_TO_GE_STATUS(rt_ret); } - rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), - ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); + rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); return RT_ERROR_TO_GE_STATUS(rt_ret); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h index f2945b0b..ab5c4445 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc b/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc index 393c0b31..2f6aff36 100755 --- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h b/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h index f83cd1d9..c8a695c9 100755 --- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc b/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc index 5fa96a96..f55c4b02 100644 --- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h b/ge/graph/load/new_model_manager/task_info/label_set_task_info.h index bb02ccf0..c68ffb98 100644 --- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/label_set_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc index 0c2d63f3..bbbf313f 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -94,8 +94,10 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo label_list_[idx] = label_list[label_id]; } + rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; + GELOGI("memory_type: %u", memory_type); args_size_ = branch_max_ * sizeof(rtLabelDevInfo); - rtError_t rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); + rtError_t rt_ret = rtMalloc(&args_, args_size_, memory_type); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h index 538b2d68..4cb39c95 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc b/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc index 40c8974b..a4d7fcc7 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,10 +26,7 @@ const uint32_t kAlignBytes = 64; namespace ge { Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { GELOGI("MemcpyAddrAsyncTaskInfo Init Start"); - if (davinci_model == nullptr) { - GELOGE(PARAM_INVALID, "davinci_model is null"); - return PARAM_INVALID; - } + GE_CHECK_NOTNULL(davinci_model); Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); if (ret != SUCCESS) { @@ -43,12 +40,13 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel return INTERNAL_ERROR; } - ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.src(), src_); + const RuntimeParam &rts_param = davinci_model->GetRuntimeParam(); + ret = ModelUtils::GetRtAddress(rts_param, memcpy_async.src(), src_); if (ret != SUCCESS) { return ret; } - ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.dst(), dst_); + ret = ModelUtils::GetRtAddress(rts_param, memcpy_async.dst(), dst_); if (ret != SUCCESS) { return ret; } @@ -59,10 +57,7 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel // malloc args memory size_t args_size = sizeof(void *) * io_addrs.size(); - rtMemType_t memory_type = RT_MEMORY_HBM; - if (op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE)) { - memory_type = RT_MEMORY_TS_4G; - } + rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; GELOGI("memory_type: %u", memory_type); rtError_t rt_ret = rtMalloc(&args_, args_size + kAlignBytes, memory_type); if (rt_ret != RT_ERROR_NONE) { diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h b/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h index c7645b9f..90aad9b7 100644 --- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc index 59831996..3bad3c67 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,27 +22,25 @@ namespace ge { Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { GELOGI("MemcpyAsyncTaskInfo Init Start"); - if (davinci_model == nullptr) { - GELOGE(PARAM_INVALID, "davinci_model is null"); - return PARAM_INVALID; - } + GE_CHECK_NOTNULL(davinci_model); + davinci_model_ = davinci_model; - Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); + Status ret = SetStream(task_def.stream_id(), davinci_model_->GetStreamList()); if (ret != SUCCESS) { return ret; } - memcpy_async = task_def.memcpy_async(); - count_ = memcpy_async.count(); - kind_ = memcpy_async.kind(); - dst_max_ = memcpy_async.dst_max(); - OpDescPtr op_desc = davinci_model->GetOpByIndex(memcpy_async.op_index()); + memcpy_async_ = task_def.memcpy_async(); + count_ = memcpy_async_.count(); + kind_ = memcpy_async_.kind(); + dst_max_ = memcpy_async_.dst_max(); + OpDescPtr op_desc = davinci_model_->GetOpByIndex(memcpy_async_.op_index()); if (op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async.op_index()); + GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async_.op_index()); return INTERNAL_ERROR; } - if (davinci_model->IsKnownNode()) { + if (davinci_model_->IsKnownNode()) { src_ = reinterpret_cast(davinci_model_->GetCurrentArgsAddr(args_offset_)); dst_ = reinterpret_cast(reinterpret_cast(src_) + sizeof(void *)); // for zero copy @@ -50,29 +48,34 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da GELOGI("MemcpyAsyncTaskInfo src_ %p, dst_ %p, args_offset %u.", src_, dst_, args_offset_); return SUCCESS; } - ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.src(), src_); + + const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); + ret = ModelUtils::GetRtAddress(rts_param, memcpy_async_.src(), src_); if (ret != SUCCESS) { return ret; } // dst_ needs different address for different chips - if (op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE)) { - ret = AllocTsMemoryForMemcpy(op_desc, davinci_model); - if (ret != SUCCESS) { - return ret; + vector memory_type_list; + (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memory_type_list); + if (!memory_type_list.empty() && memory_type_list[0] == RT_MEMORY_TS_4G) { // TS Feature, Just one. + uint64_t mem_offset = memcpy_async_.dst() - rts_param.logic_mem_base; + dst_ = static_cast(rts_param.ts_mem_mall->Acquire(mem_offset, memcpy_async_.dst_max())); + if (dst_ == nullptr) { + return FAILED; } } else { - ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.dst(), dst_); + ret = ModelUtils::GetRtAddress(rts_param, memcpy_async_.dst(), dst_); if (ret != SUCCESS) { return ret; } } GELOGI("MemcpyAsyncTaskInfo Init Success, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu", - memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_); + memcpy_async_.src(), memcpy_async_.dst(), src_, dst_, dst_max_, count_); - davinci_model->DisableZeroCopy(src_); - davinci_model->DisableZeroCopy(dst_); + davinci_model_->DisableZeroCopy(src_); + davinci_model_->DisableZeroCopy(dst_); return SUCCESS; } @@ -102,12 +105,12 @@ Status MemcpyAsyncTaskInfo::CalculateArgs(const domi::TaskDef &task_def, Davinci Status MemcpyAsyncTaskInfo::UpdateArgs() { GELOGI("MemcpyAsyncTaskInfo::UpdateArgs in."); GE_CHECK_NOTNULL(davinci_model_); - Status ret = ModelUtils::GetRtAddress(davinci_model_->GetRuntimeParam(), memcpy_async.src(), src_); + Status ret = ModelUtils::GetRtAddress(davinci_model_->GetRuntimeParam(), memcpy_async_.src(), src_); if (ret != SUCCESS) { return ret; } - ret = ModelUtils::GetRtAddress(davinci_model_->GetRuntimeParam(), memcpy_async.dst(), dst_); + ret = ModelUtils::GetRtAddress(davinci_model_->GetRuntimeParam(), memcpy_async_.dst(), dst_); if (ret != SUCCESS) { return ret; } @@ -122,33 +125,5 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() { return SUCCESS; } -Status MemcpyAsyncTaskInfo::AllocTsMemoryForMemcpy(const OpDescPtr &op_desc, DavinciModel *davinci_model) { - int64_t size = 0; - auto tensor_desc = op_desc->GetOutputDescPtr(0); - if ((tensor_desc == nullptr) || (TensorUtils::GetTensorSizeInBytes(*tensor_desc, size) != GRAPH_SUCCESS)) { - GELOGE(FAILED, "GetTensorSizeInBytes failed!"); - return FAILED; - } - - rtError_t rt_ret = rtMalloc(&memory_4g_, size, RT_MEMORY_TS_4G); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtMalloc failed, ret: 0x%X", rt_ret); - return FAILED; - } - - // map save the opdesc's offset and special address, for update the streamSwitchN's input address - std::map memcpy_4g_offset_addr; - vector offsets = op_desc->GetOutputOffset(); - if (offsets.empty()) { - GELOGE(FAILED, "GetOutputOffset failed!"); - return FAILED; - } - memcpy_4g_offset_addr.insert(std::pair(offsets[0], memory_4g_)); - davinci_model->SetMemcpyOffsetAndAddr(memcpy_4g_offset_addr); - - dst_ = reinterpret_cast(memory_4g_); - return SUCCESS; -} - REGISTER_TASK_INFO(RT_MODEL_TASK_MEMCPY_ASYNC, MemcpyAsyncTaskInfo); } // namespace ge diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h index 3272b91f..9fe1ce24 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,19 +23,11 @@ namespace ge { class MemcpyAsyncTaskInfo : public TaskInfo { public: - MemcpyAsyncTaskInfo() : dst_(nullptr), dst_max_(0), src_(nullptr), count_(0), kind_(0), memory_4g_(nullptr) {} + MemcpyAsyncTaskInfo() : dst_(nullptr), dst_max_(0), src_(nullptr), count_(0), kind_(RT_MEMCPY_RESERVED) {} ~MemcpyAsyncTaskInfo() override { src_ = nullptr; dst_ = nullptr; - - if (memory_4g_ != nullptr) { - rtError_t ret = rtFree(memory_4g_); - if (ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret); - } - memory_4g_ = nullptr; - } } Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; @@ -47,7 +39,6 @@ class MemcpyAsyncTaskInfo : public TaskInfo { Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; private: - Status AllocTsMemoryForMemcpy(const OpDescPtr &op_desc, DavinciModel *davinci_model); uint8_t *dst_; uint64_t dst_max_; uint8_t *src_; @@ -55,8 +46,7 @@ class MemcpyAsyncTaskInfo : public TaskInfo { uint32_t kind_; DavinciModel *davinci_model_ = nullptr; uint32_t args_offset_ = 0; - domi::MemcpyAsyncDef memcpy_async; - void *memory_4g_; + domi::MemcpyAsyncDef memcpy_async_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc b/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc index 533c459a..fd5f4f4c 100755 --- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h b/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h index 8989096d..ab07eb22 100755 --- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc b/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc index 33ebea3b..b9ebfccf 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h b/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h index c6b263b4..a75e616e 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc index 616ba85f..b27ceb93 100644 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h index 89642cf8..9e44cbcd 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc b/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc index 0c1a1d35..2e389612 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/load/new_model_manager/task_info/stream_switchn_task_info.h" #include #include "framework/common/debug/ge_log.h" @@ -147,38 +148,37 @@ Status StreamSwitchNTaskInfo::CalculateArgs(const domi::TaskDef &task_def, Davin int64_t tensor_size = 0; GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); davinci_model->SetTotalFixedAddrsSize(input_tensor_name, tensor_size); - GELOGI("Calculate stream switchn task args , tensor_size %ld, args_offset %ld", tensor_size, args_offset_); + GELOGI("Calculate stream switchn task args, tensor_size %ld, args_offset %ld", tensor_size, args_offset_); return SUCCESS; } Status StreamSwitchNTaskInfo::InputPtrUpdate(const OpDescPtr &op_desc, DavinciModel *davinci_model) { - bool is_4g_mem = false; - const map memcpy_4g_offset_addr = davinci_model->GetMemcpyOffsetAndAddr(); - vector input_offset = op_desc->GetInputOffset(); - if (input_offset.empty()) { - GELOGE(FAILED, "Get StreamSwitchN's input offset failed."); - return FAILED; - } - - auto iter = memcpy_4g_offset_addr.find(input_offset[0]); - if (iter != memcpy_4g_offset_addr.end()) { - input_ptr_ = iter->second; - is_4g_mem = true; - } - - if (is_4g_mem == false) { + // dst_ needs different address for different chips + vector memory_type_list; + (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type_list); + if (!memory_type_list.empty() && memory_type_list[0] == RT_MEMORY_TS_4G) { // TS Feature, Just one. + const vector input_offset = op_desc->GetInputOffset(); + const vector input_legnth = ModelUtils::GetInputSize(op_desc); + if (input_offset.empty() || input_legnth.empty()) { + GELOGE(FAILED, "input offset size %zu, input legnth size: %zu", input_offset.size(), input_legnth.size()); + return FAILED; + } + const RuntimeParam &rts_param = davinci_model->GetRuntimeParam(); + input_ptr_ = rts_param.ts_mem_mall->Acquire(input_offset[0], input_legnth[0]); + } else { if (davinci_model->IsKnownNode()) { input_ptr_ = davinci_model->GetCurrentFixedAddr(args_offset_); } else { auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc); if (input_data_addr.empty()) { + GELOGE(FAILED, "input data addr is empty"); return FAILED; } input_ptr_ = input_data_addr[0]; } } - GELOGI("StreamSwitchN's input_ptr is %p, is_4g_mem: %d", input_ptr_, is_4g_mem); + GELOGI("StreamSwitchN's input_ptr is %p", input_ptr_); return SUCCESS; } REGISTER_TASK_INFO(RT_MODEL_TASK_STREAM_SWITCH_N, StreamSwitchNTaskInfo); diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h b/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h index 3d65a086..84e69c8d 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc index 09ed7458..23132f45 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h index 9c94d1a9..b7e76af0 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc index d237d56c..28ce916e 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h index efd61ef7..829e377b 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/task_info.cc b/ge/graph/load/new_model_manager/task_info/task_info.cc index 674d477f..01bf0690 100755 --- a/ge/graph/load/new_model_manager/task_info/task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/task_info.h b/ge/graph/load/new_model_manager/task_info/task_info.h index e131a356..4fa0a51b 100644 --- a/ge/graph/load/new_model_manager/task_info/task_info.h +++ b/ge/graph/load/new_model_manager/task_info/task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,10 +22,23 @@ #include "cce/customize.h" #include "cce/taskdown_common.hpp" #include "framework/common/ge_inner_error_codes.h" +#include "graph/load/new_model_manager/ts_mem_mall.h" #include "graph/load/new_model_manager/task_info/task_info_factory.h" #include "proto/task.pb.h" + namespace ge { +struct MemInfo { + uint64_t memory_size = 0; + uint64_t logic_memory_base = 0; + uint8_t *memory_base = nullptr; +}; + struct RuntimeParam { + RuntimeParam() { + ts_mem_mall = std::unique_ptr(new (std::nothrow) TsMemMall); + } + ~RuntimeParam() = default; + uint64_t mem_size = 0; uint64_t logic_mem_base = 0; uint8_t *mem_base = nullptr; @@ -35,12 +48,15 @@ struct RuntimeParam { uint64_t var_size = 0; uint64_t logic_var_base = 0; uint8_t *var_base = nullptr; + std::map memory_infos; uint32_t batch_num = 0; uint32_t stream_num = 0; uint32_t event_num = 0; uint32_t label_num = 0; uint64_t session_id = 0; uint32_t graph_id = 0; + + std::unique_ptr ts_mem_mall; }; typedef struct FusionOpInfo { diff --git a/ge/graph/load/new_model_manager/task_info/task_info_factory.h b/ge/graph/load/new_model_manager/task_info/task_info_factory.h index 8feef0ac..5b220960 100644 --- a/ge/graph/load/new_model_manager/task_info/task_info_factory.h +++ b/ge/graph/load/new_model_manager/task_info/task_info_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.cc b/ge/graph/load/new_model_manager/tbe_handle_store.cc index 591e88d0..c47221ad 100755 --- a/ge/graph/load/new_model_manager/tbe_handle_store.cc +++ b/ge/graph/load/new_model_manager/tbe_handle_store.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "tbe_handle_store.h" #include diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.h b/ge/graph/load/new_model_manager/tbe_handle_store.h index 6c3ad750..a8f68514 100644 --- a/ge/graph/load/new_model_manager/tbe_handle_store.h +++ b/ge/graph/load/new_model_manager/tbe_handle_store.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/ts_mem_mall.h b/ge/graph/load/new_model_manager/ts_mem_mall.h new file mode 100644 index 00000000..01820ce5 --- /dev/null +++ b/ge/graph/load/new_model_manager/ts_mem_mall.h @@ -0,0 +1,102 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_LOAD_TS_MEM_MALL_H_ +#define GE_GRAPH_LOAD_TS_MEM_MALL_H_ + +#include +#include +#include + +#include "runtime/base.h" +#include "framework/common/debug/ge_log.h" + +#define TS_MEM_ALIGNMENT 64 +#define TS_MEM_ALIGN_MASK (TS_MEM_ALIGNMENT - 1) +#define TS_MEM_ALIGN_SIZE(size) (((size) + TS_MEM_ALIGN_MASK) & ~TS_MEM_ALIGN_MASK) + +namespace ge { +constexpr uint32_t kMaxTsMemBlock = 2 * 1024 * 1024; // Max block 2M. + +class TsMemMall { + public: + TsMemMall() = default; + ~TsMemMall() { + for (auto it : mem_store_size_) { + rtError_t ret = rtFree(it.second); + if (ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rtFree failed, ret: 0x%X", ret); + } + } + mem_store_size_.clear(); + mem_store_addr_.clear(); + } + + void *Acquire(int64_t offset, uint64_t size) { + if (size == 0) { + GELOGE(RT_FAILED, "Acquire mem block failed, size: %lu", size); + return nullptr; + } + + uint64_t bytes = TS_MEM_ALIGN_SIZE(size); + if (bytes > kMaxTsMemBlock) { + GELOGW("Acquire TS memory may not physical continuity, size: %lu", bytes); + } + + std::lock_guard lock(mem_mutex_); + const auto it = mem_store_size_.find(offset); + if (it != mem_store_size_.end()) { + GELOGI("Acquire TS memory: %p, offset: %ld, size: %lu, align: %lu", it->second, offset, size, bytes); + return it->second; + } + + void *addr = nullptr; + rtError_t rt_ret = rtMalloc(&addr, bytes, RT_MEMORY_TS_4G); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); + return nullptr; + } + + GELOGI("Acquire TS memory: %p, offset: %ld, size: %lu, align: %lu", addr, offset, size, bytes); + mem_store_size_[offset] = addr; + mem_store_addr_[addr] = offset; + return addr; + } + + void Release(void *addr) { + std::lock_guard lock(mem_mutex_); + const auto it = mem_store_addr_.find(addr); + if (it == mem_store_addr_.end()) { + GELOGW("Not TS memory: %p.", addr); + return; + } + + GELOGI("Release TS memory: %p.", addr); + mem_store_size_.erase(it->second); + mem_store_addr_.erase(it); + rtError_t ret = rtFree(addr); + if (ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rtFree failed, ret: 0x%X", ret); + } + } + + private: + std::mutex mem_mutex_; + std::unordered_map mem_store_size_; + std::unordered_map mem_store_addr_; +}; +} // namespace ge +#endif // GE_GRAPH_LOAD_TS_MEM_MALL_H_ diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.cc b/ge/graph/load/new_model_manager/zero_copy_offset.cc index e93a7250..7ef5b51c 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.cc +++ b/ge/graph/load/new_model_manager/zero_copy_offset.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.h b/ge/graph/load/new_model_manager/zero_copy_offset.h index c662032b..8749d937 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.h +++ b/ge/graph/load/new_model_manager/zero_copy_offset.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index db68b8fd..90c0fcec 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -45,6 +45,7 @@ #include "graph/manager/util/rt_context_util.h" #include "graph/partition/dynamic_shape_partition.h" #include "graph/passes/enter_pass.h" +#include "graph/partition/stage_partition.h" #include "graph/passes/addn_pass.h" #include "graph/passes/bitcast_pass.h" #include "graph/passes/atomic_addr_clean_pass.h" @@ -132,12 +133,10 @@ bool IsTailingOptimization() { } // namespace namespace ge { -GraphManager::GraphManager(OmgContext &omg_context) +GraphManager::GraphManager() : thread_run_flag_(false), graph_run_listener_(nullptr), - init_flag_(false), - omg_context_(omg_context) { - SetLocalOmgContext(omg_context); + init_flag_(false) { } Status GraphManager::Initialize(const std::map &options) { @@ -166,14 +165,6 @@ Status GraphManager::Initialize(const std::map &options) { return ret; } - graph_builder_.SetOptions(options_); - ret = graph_optimize_.SetOptions(options_); - if (ret != SUCCESS) { - GELOGE(ret, "[Initialize] Graph optimize initialize failed."); - return ret; - } - graph_preparer_.SetOptions(options_); - ret = graph_context_->Initialize(options); if (ret != SUCCESS) { GELOGE(ret, "[Initialize] GraphContext initialize failed."); @@ -269,8 +260,9 @@ Status GraphManager::Finalize() { } Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, - const std::map &options) { - if (graph_map_.find(graph_id) != graph_map_.end()) { + const std::map &options, + const OmgContext &omg_context) { + if (HasGraphNode(graph_id)) { GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] graph exists, graph_id = %u.", graph_id); return GE_GRAPH_GRAPH_ALREADY_EXIST; } @@ -315,19 +307,34 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, graph_node->SetGraph(graph_ptr); graph_node->SetOptions(options); + AddGraphNode(graph_id, graph_node); - graph_map_.insert(std::make_pair(graph_id, graph_node)); + AddLocalOmgContext(graph_id, omg_context); + if (!options_.output_datatype.empty()) { + GetLocalOmgContext().output_type = options_.output_datatype; + } - GELOGI("[GraphManager] add graph success, graph_id = %u.", graph_id); + CompilerStages &stages = GetCompilerStages(graph_id); + stages.preparer.SetOptions(options_); + Status status = stages.optimizer.SetOptions(options_); + if (status != SUCCESS) { + GELOGE(status, "Graph optimizer set options failed."); + return status; + } + stages.builder.SetOptions(options_); var_acc_ctrl_.AddGraph(graph_id, compute_graph); + + GELOGI("[GraphManager] add graph success, graph_id = %u.", graph_id); return SUCCESS; } -Status GraphManager::MergeSubGraph(ComputeGraphPtr &compute_graph, const ge::ComputeGraphPtr &original_compute_graph) { +Status GraphManager::MergeSubGraph(ComputeGraphPtr &compute_graph, const ge::ComputeGraphPtr &original_compute_graph, + GraphId root_graph_id) { std::shared_ptr instance_ptr = ge::GELib::GetInstance(); + GraphPartitioner &partitioner = GetCompilerStages(root_graph_id).partitioner; if (instance_ptr != nullptr && instance_ptr->InitFlag()) { - Status ret = graph_partitioner_.MergeAfterSubGraphOptimization(compute_graph, original_compute_graph); + Status ret = partitioner.MergeAfterSubGraphOptimization(compute_graph, original_compute_graph); if (ret != SUCCESS) { GELOGE(ret, "merge end and placeholder after subGraph optimization failed."); return FAILED; @@ -339,7 +346,7 @@ Status GraphManager::MergeSubGraph(ComputeGraphPtr &compute_graph, const ge::Com return ret_topo; } } else { - auto subgraph_list = graph_partitioner_.GetSubGraphMap(); + auto subgraph_list = partitioner.GetSubGraphMap(); if (subgraph_list.find(original_compute_graph) != subgraph_list.end() && !subgraph_list[original_compute_graph].empty() && subgraph_list[original_compute_graph][0] != nullptr) { compute_graph = subgraph_list[original_compute_graph][0]->GetSubGraph(); @@ -400,8 +407,8 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr if (!op_compile_strategy.empty()) { (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); } - std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, subgraph, session_id, - GetThreadLocalContext()); + std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, + compute_graph->GetGraphID(), subgraph, session_id, GetThreadLocalContext()); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); return FAILED; @@ -415,7 +422,8 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr if (!op_compile_strategy.empty()) { (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); } - std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, subgraph, session_id, + std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, + compute_graph->GetGraphID(), subgraph, session_id, GetThreadLocalContext()); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); @@ -498,9 +506,9 @@ Status GraphManager::ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_ return SUCCESS; } -Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph) { +Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph, GraphPartitioner &partitioner) { GE_CHECK_NOTNULL(compute_graph); - auto sub_graph_map = graph_partitioner_.GetSubGraphMap(); + auto sub_graph_map = partitioner.GetSubGraphMap(); std::string buffer_optimize; graphStatus graph_status = ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); bool need_lx_fusion = (graph_status == GRAPH_SUCCESS) && (buffer_optimize != kOffOptimize); @@ -572,18 +580,20 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, ge::ComputeGraphPtr &compute_graph, uint64_t session_id) { GE_CHECK_NOTNULL(graph_node); GE_CHECK_NOTNULL(compute_graph); - GM_RUN_AND_DUMP_PERF("OptimizeGraphPrepare", graph_optimize_.OptimizeOriginalGraphForQuantize, compute_graph); - GM_RUN_AND_DUMP_PERF("HandleSummaryOp", graph_optimize_.HandleSummaryOp, compute_graph); - GM_RUN_AND_DUMP_PERF("Prepare", graph_preparer_.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph, + + CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); + GM_RUN_AND_DUMP_PERF("OptimizeGraphPrepare", stages.optimizer.OptimizeOriginalGraphForQuantize, compute_graph); + GM_RUN_AND_DUMP_PERF("HandleSummaryOp", stages.optimizer.HandleSummaryOp, compute_graph); + GM_RUN_AND_DUMP_PERF("Prepare", stages.preparer.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph, session_id); - GM_RUN_AND_DUMP_PERF("OptimizeOriginalGraph", graph_optimize_.OptimizeOriginalGraph, compute_graph); + GM_RUN_AND_DUMP_PERF("OptimizeOriginalGraph", stages.optimizer.OptimizeOriginalGraph, compute_graph); - GM_RUN_AND_DUMP_PERF("PrepareRunningFormatRefiner", graph_preparer_.PrepareRunningFormatRefiner); - GM_RUN_AND_DUMP_PERF("RefineRunningFormat", graph_optimize_.OptimizeOriginalGraphJudgeInsert, compute_graph); + GM_RUN_AND_DUMP_PERF("PrepareRunningFormatRefiner", stages.preparer.PrepareRunningFormatRefiner); + GM_RUN_AND_DUMP_PERF("RefineRunningFormat", stages.optimizer.OptimizeOriginalGraphJudgeInsert, compute_graph); GM_RUN_AND_DUMP_PERF("SubexpressionMigration", SubexpressionMigration, compute_graph); - GE_RUN(GraphManager, graph_preparer_.RecordAIPPInfo, compute_graph); + GE_RUN(GraphManager, stages.preparer.RecordAIPPInfo, compute_graph); if (IsTailingOptimization()) { - GM_RUN_AND_DUMP_PERF("OptimizeSwitchOp", graph_preparer_.SwitchOpOptimize, compute_graph); + GM_RUN_AND_DUMP_PERF("OptimizeSwitchOp", stages.preparer.SwitchOpOptimize, compute_graph); } GM_RUN_AND_DUMP_PERF("Optimize1", OptimizeStage1, compute_graph); GM_RUN_AND_DUMP_PERF("InferShape2", compute_graph->InferShapeInNeed); @@ -592,7 +602,7 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); - GE_CHK_STATUS_RET(graph_optimize_.IdentifyReference(compute_graph), "Identify reference failed."); + GE_CHK_STATUS_RET(stages.optimizer.IdentifyReference(compute_graph), "Identify reference failed."); GELOGI("PreRun:PreRunOptimizeOriginalGraph success."); return SUCCESS; } @@ -621,7 +631,8 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, GE_CHECK_NOTNULL(compute_graph); GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", - graph_optimize_.OptimizeGraphBeforeBuildForRts, compute_graph); + GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts, + compute_graph); GM_RUN_AND_DUMP_PERF("Build", Build, graph_node, compute_graph, ge_root_model, session_id); GELOGI("PreRun:PreRunAfterOptimizeSubGraph success."); return SUCCESS; @@ -872,6 +883,7 @@ Status GraphManager::SaveCacheAfterBuild(uint32_t graph_id, ge::ComputeGraphPtr } if (instance_ptr->IsIncreBuild()) { + std::lock_guard lock(member_mutex_); auto iter = cache_helper_map_.find(graph_id); if (iter == cache_helper_map_.end()) { GELOGW("Can not find ModelCacheHelper of graph[%u]", graph_id); @@ -950,6 +962,9 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vectorSetRunFlag(true); ComputeGraphPtr compute_graph_tmp = GraphUtils::GetComputeGraph(*(graph_node->GetGraph())); @@ -964,7 +979,7 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vectorGetGraph()); + UpdateLocalOmgContext(graph_id); + + ret = GetCompilerStages(graph_id).preparer.GenerateInfershapeGraph(graph_node->GetGraph()); if (ret != SUCCESS) { GELOGE(ret, "ATC dump infershape json failed"); return ret; @@ -1045,11 +1062,14 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[BuildGraph] graph node is NULL, graphId = %u.", graph_id); return GE_GRAPH_GRAPH_NODE_NULL; } + + UpdateLocalOmgContext(graph_id); + auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); GE_CHECK_NOTNULL(compute_graph); - GM_RUN_AND_DUMP_PERF("Prepare", graph_preparer_.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph, - session_id); + GM_RUN_AND_DUMP_PERF("Prepare", GetCompilerStages(graph_id).preparer.PrepareDynShape, graph_node->GetGraph(), inputs, + compute_graph, session_id); for (auto &node : compute_graph->GetAllNodes()) { OpDescPtr op_desc = node->GetOpDesc(); @@ -1107,6 +1127,9 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vectorGetGraphId()); return GE_GRAPH_ALREADY_RUNNING; } + + UpdateLocalOmgContext(graph_id); + graph_node->SetAsync(async); // set graph's run flag graph_node->SetRunFlag(true); @@ -1151,6 +1174,7 @@ Status GraphManager::SaveParams(ge::GeModel &model, const std::string &type, con } void GraphManager::RemoveModelCacheHelper(const GraphId &graph_id) { + std::lock_guard lock(member_mutex_); auto iter = cache_helper_map_.find(graph_id); if (iter != cache_helper_map_.end()) { cache_helper_map_.erase(iter); @@ -1164,18 +1188,20 @@ bool GraphManager::CheckModelLoad(const GeRootModelPtr &ge_root_model, bool load } Status GraphManager::RemoveGraph(const GraphId &graph_id) { - auto it = graph_map_.find(graph_id); - if (it == graph_map_.end()) { + GraphNodePtr graph_node = nullptr; + Status ret = GetGraphNode(graph_id, graph_node); + if (ret != SUCCESS) { GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "[GraphManager] Id %u does not exists.", graph_id); return GE_GRAPH_GRAPH_NOT_EXIST; } - GraphNodePtr graph_node = it->second; if ((graph_node == nullptr) || (graph_node->GetRunFlag())) { GELOGE(GE_GRAPH_GRAPH_IS_RUNNING, "[GraphManager] Id %u is running, can't be deleted.", graph_id); return GE_GRAPH_GRAPH_IS_RUNNING; } - Status ret = SUCCESS; + + std::lock_guard lock(unload_model_mutex_); + Status middle_ret; rtError_t rt_ret; const std::vector &all_sub_graph = graph_node->GetAllSubGraph(); @@ -1211,7 +1237,7 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) { } } var_acc_ctrl_.RemoveGraph(graph_id); - graph_map_.erase(it); + RemoveGraphNode(graph_id); RemoveModelCacheHelper(graph_id); @@ -1237,6 +1263,9 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) { ret = FAILED; } } + + RemoveCompilerStages(graph_id); + GE_CHK_STATUS_RET(ret, "[GraphManager:] Remove graph failed, graph_id=%u.", graph_id); GELOGI("[GraphManager] remove graph success, graph_id=%u.", graph_id); return SUCCESS; @@ -1360,9 +1389,6 @@ Status GraphManager::ParseOptions(const std::map &opti // net output node dataType ParseOption(options, OUTPUT_DATATYPE, options_.output_datatype); - if (!options_.output_datatype.empty()) { - omg_context_.output_type = options_.output_datatype; - } // Set save_original_model flag (ge.save_original_model) ParseOption(options, SAVE_ORIGINAL_MODEL, options_.save_original_model); @@ -1558,7 +1584,24 @@ Status GraphManager::ParseParallelNum(const std::string ¶llel_num, const std return SUCCESS; } + +void GraphManager::AddGraphNode(GraphId graph_id, const GraphNodePtr &graph_node) { + std::lock_guard lock(member_mutex_); + graph_map_.emplace(graph_id, graph_node); +} + +void GraphManager::RemoveGraphNode(GraphId graph_id) { + std::lock_guard lock(member_mutex_); + graph_map_.erase(graph_id); +} + +bool GraphManager::HasGraphNode(GraphId graph_id) { + std::lock_guard lock(member_mutex_); + return graph_map_.find(graph_id) != graph_map_.end(); +} + Status GraphManager::GetGraphNode(const GraphId &graph_id, GraphNodePtr &out) { + std::lock_guard lock(member_mutex_); auto iter = graph_map_.find(graph_id); if (iter == graph_map_.end()) { out = nullptr; @@ -1580,7 +1623,7 @@ Status GraphManager::SummaryHandle(const GraphId &graph_id, std::vector summary_output_index; GELOGI("[GraphManager] SummaryHandle, outputsSize=%zu.", outputs.size()); const std::map> &whole_summary_output_indexes = - graph_optimize_.GetSummaryOutputIndexes(); + GetCompilerStages(graph_id).optimizer.GetSummaryOutputIndexes(); if (whole_summary_output_indexes.find(graph_id) == whole_summary_output_indexes.end()) { GELOGE(FAILED, "No Summary graph found in map."); return FAILED; @@ -1676,6 +1719,7 @@ Status GraphManager::CheckpointHandle(const GraphId &graph_id, const ComputeGrap Status GraphManager::RegisterCallBackFunc( const std::string &key, const std::function &)> &callback) { + std::lock_guard lock(member_mutex_); GELOGI("[GraphManager] RegisterCallBackFunc, key=%s.", key.c_str()); me_callback_map_[key] = callback; return SUCCESS; @@ -1683,6 +1727,7 @@ Status GraphManager::RegisterCallBackFunc( Status GraphManager::PushSummaryData2ME(const GraphId &graph_id, const std::map &summary_data) { + std::lock_guard lock(member_mutex_); GELOGI("[GraphManager] PushSummaryData2ME, dataSize=%zu.", summary_data.size()); auto itr = me_callback_map_.find(kSummary); if (itr == me_callback_map_.end()) { @@ -1693,6 +1738,7 @@ Status GraphManager::PushSummaryData2ME(const GraphId &graph_id, } Status GraphManager::PushSaveData2ME(const GraphId &graph_id, const std::map &save_data) { + std::lock_guard lock(member_mutex_); GELOGI("[GraphManager] PushSaveData2ME, dataSize=%zu.", save_data.size()); auto itr = me_callback_map_.find(kSave); if (itr == me_callback_map_.end()) { @@ -2137,7 +2183,7 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { } // After while sub graph handle, mark all node rw type - auto result = graph_optimize_.HandleMemoryRWConflict(compute_graph); + auto result = GetCompilerStages(compute_graph->GetGraphID()).optimizer.HandleMemoryRWConflict(compute_graph); if (result != SUCCESS) { GELOGW( "Mark node rw type failed. It will take some effect on memory_assign_conflicts handling." @@ -2228,8 +2274,16 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra if (free_memory >= (memory_size + weight_size)) { return SUCCESS; } - rtError_t rt_ret; - for (auto &it : graph_map_) { + + std::lock_guard lock(unload_model_mutex_); + + std::map graph_map; + { + std::lock_guard lock(member_mutex_); + graph_map = graph_map_; + } + + for (auto &it : graph_map) { auto graph_id = it.second->GetGraphId(); auto model = it.second->GetGeRootModel(); if (model == nullptr) { @@ -2248,7 +2302,7 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra } GELOGI("CheckAndReleaseMemory try to UnloadGraph[%u], model[%u] which MaxUsedMemory[%lu].", graph_id, model_id, max_memory_size); - rt_ret = rtSetDevice(GetContext().DeviceId()); + rtError_t rt_ret = rtSetDevice(GetContext().DeviceId()); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", model_id, graph_id); continue; @@ -2270,16 +2324,18 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra it.second->SetLoadFlag(false); GELOGI("CheckAndReleaseMemory UnloadGraph[%u], model[%u] success and set LoadFlag to false.", graph_id, model_id); } + return SUCCESS; } -Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, +Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, const SubGraphInfoPtr &sub_graph_info_ptr, uint64_t session_id, const GEThreadLocalContext &ge_context) { - Status ret = SUCCESS; - GetThreadLocalContext() = ge_context; if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) { - SetLocalOmgContext(graph_manager->omg_context_); + GetContext().SetSessionId(session_id); + GetThreadLocalContext() = ge_context; + graph_manager->UpdateLocalOmgContext(root_graph_id); + ComputeGraphPtr compute_graph_tmp = sub_graph_info_ptr->GetSubGraph(); const std::string &engine_name = sub_graph_info_ptr->GetEngineName(); GELOGI("ProcessSubGraphWithMultiThreads start, graph name is %s, engine_name is %s, thread id is %lu", @@ -2288,7 +2344,8 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager GE_DUMP(compute_graph_tmp, "OptimizeSubGraphBefore"); GE_CHECK_NOTNULL(compute_graph_tmp); compute_graph_tmp->SetSessionID(session_id); - ret = graph_manager->graph_optimize_.OptimizeSubGraph(compute_graph_tmp, engine_name); + Status ret = graph_manager->GetCompilerStages(root_graph_id).optimizer.OptimizeSubGraph(compute_graph_tmp, + engine_name); if (ret != SUCCESS) { GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str()); return ret; @@ -2301,9 +2358,10 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager compute_graph_tmp != nullptr ? compute_graph_tmp->GetName().c_str() : "", engine_name.c_str(), pthread_self()); } else { - GELOGE(ret, "graph_manager or sub_graph_info_ptr is nullptr"); + GELOGE(FAILED, "graph_manager or sub_graph_info_ptr is nullptr"); return FAILED; } + return SUCCESS; } @@ -2326,6 +2384,7 @@ void GraphManager::AddModelCacheHelperToMap(const GraphId &graph_id, uint64_t se ComputeGraphPtr &compute_graph) { std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr != nullptr && instance_ptr->IsIncreBuild()) { + std::lock_guard lock(member_mutex_); auto iter = cache_helper_map_.find(graph_id); if (iter == cache_helper_map_.end()) { ModelCacheHelperPtr cache_helper = MakeShared(session_id, graph_id, compute_graph); @@ -2338,18 +2397,27 @@ void GraphManager::AddModelCacheHelperToMap(const GraphId &graph_id, uint64_t se } } +ModelCacheHelperPtr GraphManager::FindModelCacheHelper(GraphId graph_id) { + std::lock_guard lock(member_mutex_); + auto iter = cache_helper_map_.find(graph_id); + if (iter != cache_helper_map_.end()) { + return iter->second; + } + + return nullptr; +} + Status GraphManager::IncreBuild(const GraphNodePtr &graph_node, GeModelPtr &ge_model) { std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->IsIncreBuild()) { return FAILED; } const uint32_t graph_id = graph_node->GetGraphId(); - auto iter = cache_helper_map_.find(graph_id); - if (iter == cache_helper_map_.end()) { + ModelCacheHelperPtr cache_helper = FindModelCacheHelper(graph_id); + if (cache_helper == nullptr) { GELOGW("Can not find ModelCacheHelper of graph[%u]", graph_id); return FAILED; } - ModelCacheHelperPtr cache_helper = iter->second; if (cache_helper->IsModelCacheHit()) { GEEVENT("Model cache hit."); Status ret = LoadFromCache(graph_node, cache_helper, ge_model); @@ -2384,7 +2452,6 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { if (prctl(PR_SET_NAME, ("GE_PreRun")) != 0) { GELOGW("Set thread name failed."); } - SetLocalOmgContext(graph_manager->omg_context_); PreRunArgs args; while (graph_manager->thread_run_flag_) { @@ -2392,8 +2459,13 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { if (!pop_status) { continue; } - GetThreadLocalContext() = args.context; + GELOGI("A new loop start."); + + GetContext().SetSessionId(args.session_id); + GetThreadLocalContext() = args.context; + graph_manager->UpdateLocalOmgContext(args.graph_id); + std::vector ge_inputs; ConstructGeInput(ge_inputs, args); @@ -2414,6 +2486,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { graph_node->Unlock(); return; } + // set graph's run flag graph_node->SetRunFlag(true); @@ -2430,7 +2503,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { std::vector ge_models; if (graph_manager->options_.local_fmk_op_flag) { - graph_manager->graph_optimize_.TranFrameOp(compute_graph_tmp); + graph_manager->GetCompilerStages(graph_node->GetGraphId()).optimizer.TranFrameOp(compute_graph_tmp); } // it will not execute graph preprocess, optimize, parition, build if the graph has built successful. @@ -2473,8 +2546,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { ge_root_model = graph_node->GetGeRootModel(); } - graph_manager->run_args_q_.Push( - RunArgs({graph_node, args.graph_id, args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback})); + graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.input_tensor, + ge_root_model, GetThreadLocalContext(), args.callback })); GELOGI("Loop end."); } } @@ -2483,7 +2556,6 @@ void GraphManager::RunThread(GraphManager *graph_manager) { if (prctl(PR_SET_NAME, ("GE_Run")) != 0) { GELOGW("Set thread name failed."); } - SetLocalOmgContext(graph_manager->omg_context_); RunArgs args; while (graph_manager->thread_run_flag_) { @@ -2491,8 +2563,13 @@ void GraphManager::RunThread(GraphManager *graph_manager) { if (!pop_status) { continue; } + GELOGI("A new loop start."); + + GetContext().SetSessionId(args.session_id); GetThreadLocalContext() = args.context; + graph_manager->UpdateLocalOmgContext(args.graph_id); + if (args.graph_node->graph_run_async_listener_ != nullptr) { args.graph_node->graph_run_async_listener_->SetCallback(args.callback); } @@ -2648,10 +2725,19 @@ void GraphManager::SetOptionsRunGraphFlag(bool run_graph_flag) { options_.run_gr Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, uint64_t session_id) { // graph partition + // Stage partition, only for root graph + GE_TIMESTAMP_START(StagePartition); + StagePartitioner stage_partitioner(compute_graph); + auto ret = stage_partitioner.Partition(); + if (ret != SUCCESS) { + GELOGE(ret, "Graph partition by stage Failed"); + return ret; + } + GE_TIMESTAMP_EVENT_END(StagePartition, "OptimizeSubgraph::StagePartition"); // all sub graph list of root graph and sub graph GE_TIMESTAMP_START(GraphPartitionDynamicShape); DynamicShapePartitioner dynamic_shape_partitioner(compute_graph); - auto ret = dynamic_shape_partitioner.Partition(); + ret = dynamic_shape_partitioner.Partition(); if (ret != SUCCESS) { GELOGE(ret, "Graph partition by dynamic shape Failed"); return ret; @@ -2663,14 +2749,15 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra } GE_TIMESTAMP_EVENT_END(GraphPartitionDynamicShape, "OptimizeSubgraph::GraphPartitionDynamicShape"); GE_TIMESTAMP_START(GraphPartition); - ret = graph_partitioner_.Partition(compute_graph, GraphPartitioner::kPartitioning); + GraphPartitioner &partitioner = GetCompilerStages(graph_node->GetGraphId()).partitioner; + ret = partitioner.Partition(compute_graph, GraphPartitioner::kPartitioning); if (ret != SUCCESS) { GELOGE(ret, "Graph partition Failed"); return ret; } GE_TIMESTAMP_EVENT_END(GraphPartition, "OptimizeSubgraph::Partition1"); GE_TIMESTAMP_START(SetSubgraph); - ret = SetSubgraph(session_id, compute_graph); + ret = SetSubgraph(session_id, compute_graph, partitioner); if (ret != SUCCESS) { GELOGE(ret, "Graph set subgraph Failed"); return ret; @@ -2682,7 +2769,8 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra GE_TIMESTAMP_START(ConvertGraphToFile); std::string tuning_path; (void) GetContext().GetOption(TUNING_PATH, tuning_path); - Status ret = ConvertGraphToFile(compute_graph, tuning_path, (options_.build_step == BUILD_STEP_AFTER_BUILDER)); + Status ret = ConvertGraphToFile(compute_graph, partitioner, tuning_path, + (options_.build_step == BUILD_STEP_AFTER_BUILDER)); if (ret != SUCCESS) { GELOGE(ret, "Convert graph[%s] to file failed", compute_graph->GetName().c_str()); return ret; @@ -2695,7 +2783,7 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra std::vector merged_sub_graph_list; GE_TIMESTAMP_START(MergeSubgraph); - ret = MergeSubGraph(merged_compute_graph, compute_graph); + ret = MergeSubGraph(merged_compute_graph, compute_graph, graph_node->GetGraphId()); if (ret != SUCCESS) { GELOGE(ret, "Merge SubGraph Failed"); return ret; @@ -2718,16 +2806,17 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra return SUCCESS; } -Status GraphManager::ConvertGraphToFile(ComputeGraphPtr &compute_graph, std::string path, bool exe_flag) { +Status GraphManager::ConvertGraphToFile(ComputeGraphPtr &compute_graph, GraphPartitioner &partitioner, std::string path, + bool exe_flag) { GE_CHECK_NOTNULL(compute_graph); GELOGI("compute_graph [%s] path [%s] Enter ConvertGraphToFile.", compute_graph->GetName().c_str(), path.c_str()); std::vector non_tuning_subgraphs; - auto input_node_sub_graph_map = graph_partitioner_.graph_2_input_subgraph_; + auto input_node_sub_graph_map = partitioner.graph_2_input_subgraph_; const auto &input_subgraph_info = input_node_sub_graph_map[compute_graph]; GE_CHECK_NOTNULL(input_subgraph_info); ComputeGraphPtr input_graph_tmp = input_subgraph_info->GetSubGraph(); non_tuning_subgraphs.push_back(input_graph_tmp); - auto sub_graph_map = graph_partitioner_.GetSubGraphMap(); + auto sub_graph_map = partitioner.GetSubGraphMap(); const auto &subgraph_infos = sub_graph_map[compute_graph]; std::vector tuning_subgraphs; for (const auto &sub_graph_info_ptr: subgraph_infos) { @@ -2753,7 +2842,8 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp compute_graph->SetName(graph_name); } std::vector sub_graph_list; - auto ret = graph_builder_.Build(compute_graph, sub_graph_list, ge_root_model, session_id); + auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, sub_graph_list, ge_root_model, + session_id); if (ret != SUCCESS) { GELOGE(ret, "SubGraph build Failed."); return ret; @@ -2872,4 +2962,30 @@ Status GraphManager::SaveCheckPointResult(const Graph &graph, const std::vector< } return SUCCESS; } + +void GraphManager::AddLocalOmgContext(GraphId graph_id, const OmgContext &omg_context) { + std::lock_guard lock(member_mutex_); + omg_contexts_.emplace(graph_id, omg_context); + SetLocalOmgContext(omg_contexts_[graph_id]); +} + +void GraphManager::UpdateLocalOmgContext(GraphId graph_id) { + std::lock_guard lock(member_mutex_); + auto iter = omg_contexts_.find(graph_id); + if (iter != omg_contexts_.end()) { + SetLocalOmgContext(iter->second); + } else { + GELOGW("OmgContext of graph %u not found.", graph_id); + } +} + +GraphManager::CompilerStages &GraphManager::GetCompilerStages(GraphId graph_id) { + std::lock_guard lock(member_mutex_); + return compiler_stages_[graph_id]; +} + +void GraphManager::RemoveCompilerStages(GraphId graph_id) { + std::lock_guard lock(member_mutex_); + compiler_stages_.erase(graph_id); +} } // namespace ge diff --git a/ge/graph/manager/graph_manager.h b/ge/graph/manager/graph_manager.h index 45c91406..fc3601af 100644 --- a/ge/graph/manager/graph_manager.h +++ b/ge/graph/manager/graph_manager.h @@ -45,8 +45,7 @@ namespace ge { class GraphManager { public: - GraphManager(OmgContext &omg_context); - + GraphManager(); ~GraphManager() = default; /// @@ -71,7 +70,8 @@ class GraphManager { /// @param [out] Graph output graph /// @return Status result of function /// - Status AddGraph(const GraphId &graph_id, const Graph &graph, const std::map &options); + Status AddGraph(const GraphId &graph_id, const Graph &graph, const std::map &options, + const OmgContext &omg_context); /// /// @ingroup ge_graph @@ -170,6 +170,13 @@ class GraphManager { Status SaveCheckPointResult(const Graph &graph, const std::vector &outputs, map &var_results); private: + struct CompilerStages { + GraphPrepare preparer; + GraphOptimize optimizer; + GraphPartitioner partitioner; + GraphBuilder builder; + }; + struct PreRunArgs { GraphId graph_id; std::vector input_tensor; @@ -181,18 +188,23 @@ class GraphManager { struct RunArgs { GraphNodePtr graph_node; GraphId graph_id; + uint64_t session_id; std::vector input_tensor; GeRootModelPtr ge_root_model; GEThreadLocalContext context; RunAsyncCallback callback; }; + void AddGraphNode(GraphId graph_id, const GraphNodePtr &graph_node); + void RemoveGraphNode(GraphId graph_id); + bool HasGraphNode(GraphId graph_id); Status GetGraphNode(const GraphId &graph_id, GraphNodePtr &out); std::shared_ptr GetModelListener() const { return graph_run_listener_; } - static Status ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, const SubGraphInfoPtr &sub_graph_info_ptr, - uint64_t session_id, const GEThreadLocalContext &ge_context); + static Status ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, + const SubGraphInfoPtr &sub_graph_info_ptr, uint64_t session_id, + const GEThreadLocalContext &ge_context); Status PreRun(const GraphNodePtr &graph_node, const std::vector &inputs, GeRootModelPtr &ge_root_model, uint64_t session_id = INVALID_SESSION_ID); @@ -249,11 +261,13 @@ class GraphManager { bool CheckTransOpForCheckpointGraph(NodePtr &node); - Status MergeSubGraph(ComputeGraphPtr &compute_graph, const ge::ComputeGraphPtr &original_compute_graph); + Status MergeSubGraph(ComputeGraphPtr &compute_graph, const ge::ComputeGraphPtr &original_compute_graph, + GraphId root_graph_id); - Status ConvertGraphToFile(ComputeGraphPtr &compute_graph, std::string file_path, bool exe_flag = false); + Status ConvertGraphToFile(ComputeGraphPtr &compute_graph, GraphPartitioner &partitioner, std::string file_path, + bool exe_flag = false); - Status SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph); + Status SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph, GraphPartitioner &partitioner); void SetAttrForHcomBroadCastOp(ge::ComputeGraphPtr &compute_graph); @@ -298,6 +312,7 @@ class GraphManager { void AddModelCacheHelperToMap(const GraphId &graph_id, uint64_t session_id, ComputeGraphPtr &compute_graph); Status IncreBuild(const GraphNodePtr &graph_node, GeModelPtr &ge_model); void RemoveModelCacheHelper(const GraphId &graph_id); + ModelCacheHelperPtr FindModelCacheHelper(GraphId graph_id); static void ConstructGeInput(std::vector &ge_inputs, PreRunArgs &args); static void PreRunThread(GraphManager *graph_manager); @@ -334,6 +349,12 @@ class GraphManager { std::unordered_map ©_graphs); Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint64_t session_id, uint32_t graph_id); + void AddLocalOmgContext(GraphId graph_id, const OmgContext &omg_context); + void UpdateLocalOmgContext(GraphId graph_id); + + CompilerStages &GetCompilerStages(GraphId graph_id); + void RemoveCompilerStages(GraphId graph_id); + std::atomic_bool thread_run_flag_; BlockingQueue prerun_args_q_{}; BlockingQueue run_args_q_{}; @@ -341,7 +362,6 @@ class GraphManager { std::thread run_thread_; std::map graph_map_; - std::map cache_helper_map_; // for run graph synchronous return @@ -356,19 +376,18 @@ class GraphManager { bool init_flag_; GraphManagerOptions options_; - OmgContext &omg_context_; + GraphContextPtr graph_context_ = nullptr; + map omg_contexts_; - GraphPrepare graph_preparer_; - GraphOptimize graph_optimize_; - GraphPartitioner graph_partitioner_; - GraphBuilder graph_builder_; - GraphLoader graph_loader_; + map compiler_stages_; GraphExecutor graph_executor_; - GraphContextPtr graph_context_ = nullptr; VarAccelerateCtrl var_acc_ctrl_; std::mutex run_mutex_; + + std::mutex member_mutex_; + std::mutex unload_model_mutex_; }; } // namespace ge diff --git a/ge/graph/manager/host_mem_manager.cc b/ge/graph/manager/host_mem_manager.cc index 43bc8e17..d4aceddd 100644 --- a/ge/graph/manager/host_mem_manager.cc +++ b/ge/graph/manager/host_mem_manager.cc @@ -23,7 +23,7 @@ #include "runtime/mem.h" namespace { -const uint32_t kMallocHostMemFlag = 1; +const uint32_t kMallocHostMemFlag = 0; } // namespace namespace ge { Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) { @@ -101,6 +101,7 @@ Status HostMemManager::MallocSharedMemory(SharedMemInfo &mem_info) { } Status HostMemManager::QueryVarMemInfo(const string &op_name, uint64_t &base_addr, uint64_t &data_size) { + std::lock_guard lock(mutex_); if (var_memory_base_map_.find(op_name) == var_memory_base_map_.end()) { GELOGE(INTERNAL_ERROR, "Find host base base_addr failed,node name:%s!", op_name.c_str()); return INTERNAL_ERROR; diff --git a/ge/graph/manager/memory_api.cc b/ge/graph/manager/memory_api.cc index 116a4b86..355b6ae4 100644 --- a/ge/graph/manager/memory_api.cc +++ b/ge/graph/manager/memory_api.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,6 +37,43 @@ Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t uint64_t device_base = 0; uint64_t device_size = 0; GE_CHK_STATUS_RET(MemManager::Instance().RdmaPoolInstance(mem_type).GetBaseAddr(device_base, device_size)); + auto table_len = var_info.size() + 1; + std::unique_ptr reg_addrs(new (std::nothrow) MemRegisterAddr[table_len]); + GE_CHECK_NOTNULL(reg_addrs); + for (size_t i = 0; i < var_info.size(); ++i) { + reg_addrs[i] = {var_info[i].base_addr, var_info[i].var_size}; + } + reg_addrs[table_len - 1] = {device_base, device_size}; + + std::string file_name = "libhccl.so"; + std::string path = PluginManager::GetPath(); + path.append(file_name); + string canonical_path = RealPath(path.c_str()); + if (canonical_path.empty()) { + GELOGE(FAILED, "Failed to get realpath of %s", path.c_str()); + return FAILED; + } + GELOGI("FileName:%s, Path:%s.", file_name.c_str(), canonical_path.c_str()); + auto handle = dlopen(canonical_path.c_str(), RTLD_NOW | RTLD_GLOBAL); + GE_CHECK_NOTNULL(handle); + GE_MAKE_GUARD(not_used_var, [&] { + if (dlclose(handle) != 0) { + GELOGW("Failed to close handle %s", dlerror()); + } + }); + + auto hcom_remote_mem_register = + (HcclResult(*)(const MemRegisterAddr *, uint32_t))dlsym(handle, "hcom_remote_access_mem_register"); + if (hcom_remote_mem_register == nullptr) { + GELOGE(FAILED, "Failed to invoke hcom_remote_mem_register function."); + return FAILED; + } + + HcclResult hccl_ret = hcom_remote_mem_register(reg_addrs.get(), table_len); + if (hccl_ret != HCCL_SUCCESS) { + GELOGE(HCCL_E_INTERNAL, "Rdma mem register failed, ret: 0x%X", hccl_ret); + return HCCL_E_INTERNAL; + } return SUCCESS; } diff --git a/ge/graph/manager/util/hcom_util.h b/ge/graph/manager/util/hcom_util.h index 448ed611..f80ced35 100644 --- a/ge/graph/manager/util/hcom_util.h +++ b/ge/graph/manager/util/hcom_util.h @@ -39,6 +39,8 @@ static std::map kConstOpHcclDataType = { {ge::DT_FLOAT16, HCCL_DATA_TYPE_FP16}, {ge::DT_INT8, HCCL_DATA_TYPE_INT8}, {ge::DT_INT32, HCCL_DATA_TYPE_INT32}, + {ge::DT_INT64, HCCL_DATA_TYPE_INT64}, + {ge::DT_UINT64, HCCL_DATA_TYPE_UINT64}, }; static std::map kConstOpHcclDataTypeSize = { @@ -46,6 +48,8 @@ static std::map kConstOpHcclDataTypeSize = { {HCCL_DATA_TYPE_FP16, sizeof(float) / 2}, {HCCL_DATA_TYPE_INT8, sizeof(int8_t)}, {HCCL_DATA_TYPE_INT32, sizeof(int32_t)}, + {HCCL_DATA_TYPE_INT64, sizeof(int64_t)}, + {HCCL_DATA_TYPE_UINT64, sizeof(uint64_t)}, }; static std::map kHorovodRedOpToHcclRedOp = { diff --git a/ge/graph/manager/util/variable_accelerate_ctrl.cc b/ge/graph/manager/util/variable_accelerate_ctrl.cc index 522b5ee3..22f9169c 100644 --- a/ge/graph/manager/util/variable_accelerate_ctrl.cc +++ b/ge/graph/manager/util/variable_accelerate_ctrl.cc @@ -28,6 +28,7 @@ inline bool IsVariable(const std::string &node_type) { } bool VarAccelerateCtrl::IsVarPermitToChangeFormats(const std::string &var_name) { + std::lock_guard lock(mutex_); auto iter = var_names_to_change_times_.find(var_name); if (iter == var_names_to_change_times_.end()) { return true; @@ -36,6 +37,7 @@ bool VarAccelerateCtrl::IsVarPermitToChangeFormats(const std::string &var_name) } void VarAccelerateCtrl::SetVarChanged(const std::string &var_name) { + std::lock_guard lock(mutex_); auto times = ++var_names_to_change_times_[var_name]; for (auto &graph_id_to_var_names : graph_ids_to_var_names_) { if (graph_id_to_var_names.second.count(var_name) > 0) { @@ -50,6 +52,7 @@ void VarAccelerateCtrl::SetVarChanged(const std::string &var_name) { } void VarAccelerateCtrl::AddGraph(uint32_t graph_id, const ComputeGraphPtr &compute_graph) { + std::lock_guard lock(mutex_); if (compute_graph == nullptr) { GELOGE(PARAM_INVALID, "Failed to add graph %u, the compute graph is null", graph_id); return; @@ -66,14 +69,19 @@ void VarAccelerateCtrl::AddGraph(uint32_t graph_id, const ComputeGraphPtr &compu } void VarAccelerateCtrl::RemoveGraph(uint32_t graph_id) { + std::lock_guard lock(mutex_); GELOGD("Remove graph %u", graph_id); graph_ids_to_var_names_.erase(graph_id); graph_ids_need_rebuild_.erase(graph_id); } + bool VarAccelerateCtrl::IsGraphNeedRebuild(uint32_t graph_id) const { + std::lock_guard lock(mutex_); return graph_ids_need_rebuild_.count(graph_id) > 0; } + void VarAccelerateCtrl::SetGraphBuildEnd(uint32_t graph_id) { + std::lock_guard lock(mutex_); graph_ids_need_rebuild_.erase(graph_id); GELOGD("The graph %u has built end, remove it from the rebuild-set", graph_id); } diff --git a/ge/graph/manager/util/variable_accelerate_ctrl.h b/ge/graph/manager/util/variable_accelerate_ctrl.h index d8504c02..a7ff04c2 100644 --- a/ge/graph/manager/util/variable_accelerate_ctrl.h +++ b/ge/graph/manager/util/variable_accelerate_ctrl.h @@ -20,6 +20,7 @@ #include #include #include +#include #include "graph/compute_graph.h" #include "graph/node.h" @@ -59,6 +60,8 @@ class VarAccelerateCtrl { /// std::map var_names_to_change_times_; static const int kMaxVarChangeTimes_ = 1; + + mutable std::mutex mutex_; }; } // namespace ge diff --git a/ge/graph/optimize/common/params.h b/ge/graph/optimize/common/params.h index c174a4d1..ee2a735b 100644 --- a/ge/graph/optimize/common/params.h +++ b/ge/graph/optimize/common/params.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/optimize/graph_optimize.cc b/ge/graph/optimize/graph_optimize.cc index 53695fba..dc107874 100644 --- a/ge/graph/optimize/graph_optimize.cc +++ b/ge/graph/optimize/graph_optimize.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,9 +33,7 @@ GraphOptimize::GraphOptimize() : optimize_type_(domi::FrameworkType::TENSORFLOW), cal_config_(""), insert_op_config_(""), - parse_out_node_(""), - core_type_(""), - graph_context_(nullptr) {} + core_type_("") {} void AddNodeInputProperty(ComputeGraphPtr &compute_graph) { if (compute_graph == nullptr) { diff --git a/ge/graph/optimize/graph_optimize.h b/ge/graph/optimize/graph_optimize.h index 19bf1b4a..b4a19c3f 100755 --- a/ge/graph/optimize/graph_optimize.h +++ b/ge/graph/optimize/graph_optimize.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -60,7 +60,7 @@ class GraphOptimize { const std::map> &GetSummaryOutputIndexes() const { return summary_output_indexes_; - } // lint !e1073 + } // handle summary node before preRun graph Status HandleSummaryOp(ComputeGraphPtr &compute_graph); @@ -79,12 +79,8 @@ class GraphOptimize { domi::FrameworkType optimize_type_; std::string cal_config_; std::string insert_op_config_; - std::string parse_out_node_; std::string core_type_; - std::vector out_nodes_name_; - std::vector out_nodes_index_; bool train_graph_flag_ = false; - GraphContextPtr graph_context_; bool local_fmk_op_flag_ = false; // record the summary names for filter sumarry result. std::map> summary_output_indexes_ = {}; diff --git a/ge/graph/optimize/mem_rw_conflict_optimize.cc b/ge/graph/optimize/mem_rw_conflict_optimize.cc index 226ebbed..bc005932 100644 --- a/ge/graph/optimize/mem_rw_conflict_optimize.cc +++ b/ge/graph/optimize/mem_rw_conflict_optimize.cc @@ -1,11 +1,12 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -567,7 +568,7 @@ Status SplitIdentity(const NodePtr &node) { Status InsertIdentityAsNeeded(const NodePtr &node) { auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); - if (node->GetOutDataNodesSize() == 0 || node->GetInDataNodes().empty()) { + if (node->GetOutDataNodesSize() == 0) { return SUCCESS; } for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) { diff --git a/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc b/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc index 34c3a0de..be025730 100644 --- a/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc +++ b/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/optimize/optimizer/allreduce_fusion_pass.h b/ge/graph/optimize/optimizer/allreduce_fusion_pass.h index 8b2168d9..63119520 100644 --- a/ge/graph/optimize/optimizer/allreduce_fusion_pass.h +++ b/ge/graph/optimize/optimizer/allreduce_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/optimize/summary_optimize.cc b/ge/graph/optimize/summary_optimize.cc index 077ab1b0..eae13401 100644 --- a/ge/graph/optimize/summary_optimize.cc +++ b/ge/graph/optimize/summary_optimize.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h index b0477ae8..f2e5ba24 100644 --- a/ge/graph/partition/dynamic_shape_partition.h +++ b/ge/graph/partition/dynamic_shape_partition.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/partition/engine_place.cc b/ge/graph/partition/engine_place.cc index cdf29e56..749cfa9f 100755 --- a/ge/graph/partition/engine_place.cc +++ b/ge/graph/partition/engine_place.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/partition/engine_place.h b/ge/graph/partition/engine_place.h index 5dc3e6a0..1672df0d 100755 --- a/ge/graph/partition/engine_place.h +++ b/ge/graph/partition/engine_place.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/partition/graph_partition.cc b/ge/graph/partition/graph_partition.cc index 07ab4198..c8e8cda3 100755 --- a/ge/graph/partition/graph_partition.cc +++ b/ge/graph/partition/graph_partition.cc @@ -183,9 +183,7 @@ Status ge::GraphPartitioner::MergeAfterSubGraphOptimization(ge::ComputeGraphPtr GELOGE(FAILED, "Find graph info failed, graph name is %s", original_graph->GetName().c_str()); return FAILED;) auto graph_info = graph_2_graph_partition_info_[original_graph]; - GE_IF_BOOL_EXEC( - graph_info.corresponding_node_in_partitions_.find(parent_node) == - graph_info.corresponding_node_in_partitions_.end(), + GE_IF_BOOL_EXEC(graph_info.corresponding_node_in_partitions_.count(parent_node) == 0, GELOGE(FAILED, "Find corresponding node failed, parent node name is %s", parent_node->GetName().c_str()); return FAILED;) auto corresponding_node = graph_info.corresponding_node_in_partitions_[parent_node]; @@ -201,8 +199,7 @@ Status ge::GraphPartitioner::MergeAfterSubGraphOptimization(ge::ComputeGraphPtr if (real_ret != SUCCESS) { auto root_graph = ge::GraphUtils::FindRootGraph(original_compute_graph); GE_CHECK_NOTNULL(root_graph); - (void)Analyzer::GetInstance()->SaveAnalyzerDataToFile(root_graph->GetSessionID(), - root_graph->GetGraphID()); + (void)Analyzer::GetInstance()->SaveAnalyzerDataToFile(root_graph->GetSessionID(), root_graph->GetGraphID()); } return real_ret; } diff --git a/ge/graph/partition/graph_partition.h b/ge/graph/partition/graph_partition.h index 703a1570..c4425355 100644 --- a/ge/graph/partition/graph_partition.h +++ b/ge/graph/partition/graph_partition.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/partition/stage_partition.cc b/ge/graph/partition/stage_partition.cc new file mode 100644 index 00000000..5df15bba --- /dev/null +++ b/ge/graph/partition/stage_partition.cc @@ -0,0 +1,377 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/partition/stage_partition.h" + +#include +#include "framework/common/debug/ge_log.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "common/util.h" +#include "common/types.h" + +namespace ge { +Status StagePartitioner::Partition() { + GE_CHECK_NOTNULL(root_graph_); + if (root_graph_->GetParentGraph() != nullptr) { + return SUCCESS; + } + + for (const auto &node : root_graph_->GetDirectNode()) { + auto op_desc = node->GetOpDesc(); + uint32_t level = 0; + if (!AttrUtils::GetInt(op_desc, ATTR_STAGE_LEVEL, level)) { + continue; + } + stage_nodes_[level].insert(node); + } + if (stage_nodes_.empty()) { + GELOGI("Graph %s does not set stage_level, it is not_changed.", root_graph_->GetName().c_str()); + return SUCCESS; + } + + if (SplitStageLevel() != SUCCESS) { + GELOGE(FAILED, "Split graph-stage for graph %s failed.", root_graph_->GetName().c_str()); + return FAILED; + } + + if (StagePartition() != SUCCESS) { + GELOGE(FAILED, "Stage partition for graph %s failed.", root_graph_->GetName().c_str()); + return FAILED; + } + + if (root_graph_->TopologicalSorting() != GRAPH_SUCCESS) { + GELOGE(FAILED, "Topological sort for graph %s after stage partition failed, " + "maybe stage_level was not set correctly.", root_graph_->GetName().c_str()); + return FAILED; + } + return SUCCESS; +} + +Status StagePartitioner::SplitStageLevel() { + std::stack nodes; + std::unordered_set visited_stage_nodes; + for (auto &stage : stage_nodes_) { + uint32_t cur_stage_level = stage.first; + const auto &cur_stage_nodes = stage.second; + for (const auto &marked_node : cur_stage_nodes) { + nodes.push(marked_node); + } + visited_stage_nodes.clear(); + while (!nodes.empty()) { + auto node = nodes.top(); + nodes.pop(); + GE_CHECK_NOTNULL(node->GetOpDesc()); + if (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL) && (cur_stage_nodes.count(node) == 0)) { + continue; + } + for (const auto &in_node : node->GetInAllNodes()) { + if (visited_stage_nodes.count(in_node) != 0) { + continue; + } + nodes.push(in_node); + } + if (!AttrUtils::SetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) { + GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", node->GetName().c_str()); + return INTERNAL_ERROR; + } + GELOGD("Mark stage_level node %s, stage_level=%u", node->GetName().c_str(), cur_stage_level); + visited_stage_nodes.emplace(node); + } + for (const auto &node : visited_stage_nodes) { + stage.second.insert(node); + } + } + + return SUCCESS; +} + +Status StagePartitioner::StagePartition() { + for (const auto &stage : stage_nodes_) { + StageInfo stage_info(stage.first); + FindStageIO(stage.second, stage_info); + + std::string subgraph_name = "Subgraph_Level_" + std::to_string(stage.first); + NodePtr graph_node = BuildSubgraphNode(subgraph_name, stage_info); + if (graph_node == nullptr) { + GELOGE(FAILED, "Build PartitionedCall node for stage %u failed.", stage.first); + return FAILED; + } + + ComputeGraphPtr subgraph = BuildStageGraph(graph_node, stage_info); + if (subgraph == nullptr) { + GELOGE(FAILED, "Build subgraph for stage %u failed.", stage.first); + return FAILED; + } + if (root_graph_->AddSubgraph(subgraph) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Add subgraph of stage %u failed.", stage.first); + return FAILED; + } + + if ((RelinkDataEdges(graph_node, stage_info) != SUCCESS) || + (RelinkCtrlEdges(graph_node, stage_info) != SUCCESS)) { + GELOGE(FAILED, "Relink edges for stage %u failed.", stage.first); + return FAILED; + } + + for (const auto &stage_node : stage.second) { + if (GraphUtils::RemoveNodeWithoutRelink(root_graph_, stage_node) != GRAPH_SUCCESS) { + GELOGW("Remove node %s failed.", stage_node->GetName().c_str()); + } + } + } + + return SUCCESS; +} + +void StagePartitioner::FindStageIO(const std::unordered_set &stage_nodes, StageInfo &stage_info) { + for (const auto &node : stage_nodes) { + // stage nodes + stage_info.stage_nodes.emplace(node); + // in data nodes + for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { + OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); + if (peer_out_anchor == nullptr) { + continue; + } + if (stage_nodes.count(peer_out_anchor->GetOwnerNode()) == 0) { + stage_info.data_inputs.emplace_back(std::make_pair(peer_out_anchor, in_data_anchor)); + } else { + stage_info.inner_data_edges.emplace_back(std::make_pair(peer_out_anchor, in_data_anchor)); + } + } + // out data nodes + std::list peer_data_anchors; + for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) { + peer_data_anchors.clear(); + for (const auto &peer_in_anchor : out_data_anchor->GetPeerInDataAnchors()) { + if (stage_nodes.count(peer_in_anchor->GetOwnerNode()) == 0) { + peer_data_anchors.emplace_back(peer_in_anchor); + } + } + if (!peer_data_anchors.empty()) { + stage_info.data_outputs.emplace_back(std::make_pair(out_data_anchor, peer_data_anchors)); + } + } + // in ctrl nodes + for (const auto &in_ctrl_node : node->GetInControlNodes()) { + if (stage_nodes.count(in_ctrl_node) == 0) { + stage_info.ctrl_inputs.emplace_back(in_ctrl_node->GetOutControlAnchor(), node->GetInControlAnchor()); + } else { + stage_info.inner_ctrl_edges.emplace_back(std::make_pair(in_ctrl_node->GetOutControlAnchor(), + node->GetInControlAnchor())); + } + } + // out ctrl nodes + for (const auto &out_ctrl_node : node->GetOutControlNodes()) { + if (stage_nodes.count(out_ctrl_node) == 0) { + stage_info.ctrl_outputs.emplace_back(node->GetOutControlAnchor(), out_ctrl_node->GetInControlAnchor()); + } + } + } +} + +NodePtr StagePartitioner::BuildSubgraphNode(const std::string &graph_name, const StageInfo &stage_info) { + OpDescBuilder op_desc_builder(graph_name, PARTITIONEDCALL); + size_t input_num = stage_info.data_inputs.size(); + for (size_t i = 0; i < input_num; i++) { + auto input_desc = stage_info.data_inputs[i].second->GetOwnerNode()->GetOpDesc(); + if (input_desc == nullptr) { + GELOGE(PARAM_INVALID, "op_desc is null, node: %s", + stage_info.data_inputs[i].second->GetOwnerNode()->GetName().c_str()); + return nullptr; + } + op_desc_builder.AddInput("args" + std::to_string(i), + input_desc->GetInputDesc(stage_info.data_inputs[i].second->GetIdx())); + } + size_t output_num = stage_info.data_outputs.size(); + for (size_t i = 0; i < output_num; i++) { + auto output_desc = stage_info.data_outputs[i].first->GetOwnerNode()->GetOpDesc(); + if (output_desc == nullptr) { + GELOGE(PARAM_INVALID, "op_desc is null, node: %s", + stage_info.data_outputs[i].first->GetOwnerNode()->GetName().c_str()); + return nullptr; + } + op_desc_builder.AddOutput("output" + std::to_string(i), + output_desc->GetOutputDesc(stage_info.data_outputs[i].first->GetIdx())); + } + + OpDescPtr op_desc = op_desc_builder.Build(); + if (op_desc == nullptr) { + GELOGE(FAILED, "Create op_desc for subgraph node failed, name:%s.", graph_name.c_str()); + return nullptr; + } + + op_desc->AddSubgraphName("f"); + op_desc->SetSubgraphInstanceName(0, graph_name); + + NodePtr subgraph_node = root_graph_->AddNode(op_desc); + if (subgraph_node == nullptr) { + GELOGE(FAILED, "Add node %s failed.", graph_name.c_str()); + return nullptr; + } + if (subgraph_node->SetOwnerComputeGraph(root_graph_) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Set owner graph for node %s failed.", subgraph_node->GetName().c_str()); + return nullptr; + } + + return subgraph_node; +} + +ComputeGraphPtr StagePartitioner::BuildStageGraph(const NodePtr &subgraph_node, const StageInfo &stage_info) { + CompleteGraphBuilder graph_builder(subgraph_node->GetName(), false); + // Add parent node + graph_builder.SetParentNode(subgraph_node); + + // Add node + for (const auto &node : stage_info.stage_nodes) { + graph_builder.AddNode(AttrUtils::CopyOpDesc(node->GetOpDesc())); + } + + // Set Input + size_t data_input_num = stage_info.data_inputs.size(); + for (size_t i = 0; i < data_input_num; i++) { + graph_builder.SetInput(i, { stage_info.data_inputs[i].second->GetOwnerNode()->GetName() }, + { static_cast(stage_info.data_inputs[i].second->GetIdx()) }); + } + + // Add Outputs + size_t data_output_num = stage_info.data_outputs.size(); + for (uint32_t i = 0; i < data_output_num; i++) { + graph_builder.AddOutput(stage_info.data_outputs[i].first->GetOwnerNode()->GetName(), + stage_info.data_outputs[i].first->GetIdx()); + } + + // Add Data Edges + for (const auto &data_edge : stage_info.inner_data_edges) { + graph_builder.AddDataLink(data_edge.first->GetOwnerNode()->GetName(), data_edge.first->GetIdx(), + data_edge.second->GetOwnerNode()->GetName(), data_edge.second->GetIdx()); + } + + // Add Ctrl Edges + for (const auto &ctrl_edge : stage_info.inner_ctrl_edges) { + graph_builder.AddControlLink(ctrl_edge.first->GetOwnerNode()->GetName(), + ctrl_edge.second->GetOwnerNode()->GetName()); + } + + // Add Input-Mapping + std::map input_mapping; + for (size_t i = 0; i < data_input_num; i++) { + input_mapping[i] = i; + } + graph_builder.SetInputMapping(input_mapping); + + // Add outputMapping + std::map output_mapping; + for (size_t i = 0; i < data_output_num; i++) { + output_mapping[i] = i; + } + graph_builder.SetOutputMapping(output_mapping); + + graphStatus error_code = GRAPH_SUCCESS; + std::string error_msg; + ComputeGraphPtr subgraph = graph_builder.Build(error_code, error_msg); + if (subgraph == nullptr) { + GELOGE(error_code, "Build subgraph %s failed: %s.", subgraph_node->GetName().c_str(), error_msg.c_str()); + return nullptr; + } + if (!AttrUtils::SetInt(subgraph, ATTR_STAGE_LEVEL, stage_info.stage_level)) { + GELOGE(FAILED, "Set ATTR_STAGE_LEVEL on graph %s failed.", subgraph->GetName().c_str()); + return nullptr; + } + + return subgraph; +} + +Status StagePartitioner::RelinkDataEdges(const NodePtr &subgraph_node, const StageInfo &stage_info) { + // in data nodes + for (size_t i = 0; i < stage_info.data_inputs.size(); i++) { + if (stage_info.data_inputs[i].first->Unlink(stage_info.data_inputs[i].second) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Remove data edge %s:%d->%s:%d failed.", + stage_info.data_inputs[i].first->GetOwnerNode()->GetName().c_str(), + stage_info.data_inputs[i].first->GetIdx(), + stage_info.data_inputs[i].second->GetOwnerNode()->GetName().c_str(), + stage_info.data_inputs[i].second->GetIdx()); + return INTERNAL_ERROR; + } + if (stage_info.data_inputs[i].first->LinkTo(subgraph_node->GetInDataAnchor(i)) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add data edge %s:%d->%s:%zu failed.", + stage_info.data_inputs[i].first->GetOwnerNode()->GetName().c_str(), + stage_info.data_inputs[i].first->GetIdx(), + subgraph_node->GetName().c_str(), i); + return INTERNAL_ERROR; + } + } + // out data nodes + for (size_t i = 0; i < stage_info.data_outputs.size(); i++) { + const auto &out_data_anchor = subgraph_node->GetOutDataAnchor(i); + GE_CHECK_NOTNULL(out_data_anchor); + for (const auto &peer_in_anchor : stage_info.data_outputs[i].second) { + if (stage_info.data_outputs[i].first->Unlink(peer_in_anchor) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Remove data edge %s:%d->%s:%d failed.", + stage_info.data_outputs[i].first->GetOwnerNode()->GetName().c_str(), + stage_info.data_outputs[i].first->GetIdx(), + peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx()); + return INTERNAL_ERROR; + } + if (out_data_anchor->LinkTo(peer_in_anchor) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add data edge %s:%zu->%s:%d failed.", subgraph_node->GetName().c_str(), i, + peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx()); + return INTERNAL_ERROR; + } + } + } + + return SUCCESS; +} + +Status StagePartitioner::RelinkCtrlEdges(const NodePtr &subgraph_node, const StageInfo &stage_info) { + // in ctrl nodes + for (const auto &ctrl_input : stage_info.ctrl_inputs) { + if (ctrl_input.first->Unlink(ctrl_input.second) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Remove ctrl edge %s->%s failed.", + ctrl_input.first->GetOwnerNode()->GetName().c_str(), ctrl_input.second->GetOwnerNode()->GetName().c_str()); + return INTERNAL_ERROR; + } + if (!ctrl_input.first->IsLinkedWith(subgraph_node->GetInControlAnchor())) { + if (ctrl_input.first->LinkTo(subgraph_node->GetInControlAnchor()) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add ctrl edge %s->%s failed.", + ctrl_input.first->GetOwnerNode()->GetName().c_str(), subgraph_node->GetName().c_str()); + return INTERNAL_ERROR; + } + } + } + // out ctrl nodes + for (const auto &ctrl_output : stage_info.ctrl_outputs) { + if (ctrl_output.first->Unlink(ctrl_output.second) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Remove ctrl edge %s->%s failed.", + ctrl_output.first->GetOwnerNode()->GetName().c_str(), + ctrl_output.second->GetOwnerNode()->GetName().c_str()); + return INTERNAL_ERROR; + } + if (!subgraph_node->GetOutControlAnchor()->IsLinkedWith(ctrl_output.second)) { + if (subgraph_node->GetOutControlAnchor()->LinkTo(ctrl_output.second) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add ctrl edge %s->%s failed.", + subgraph_node->GetName().c_str(), ctrl_output.second->GetOwnerNode()->GetName().c_str()); + return INTERNAL_ERROR; + } + } + } + + return SUCCESS; +} +} // namespace ge diff --git a/ge/graph/partition/stage_partition.h b/ge/graph/partition/stage_partition.h new file mode 100644 index 00000000..d8364f0d --- /dev/null +++ b/ge/graph/partition/stage_partition.h @@ -0,0 +1,67 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PARTITION_STAGE_PARTITION_H_ +#define GE_GRAPH_PARTITION_STAGE_PARTITION_H_ + +#include +#include +#include +#include +#include "common/ge_inner_error_codes.h" +#include "graph/compute_graph.h" + +namespace ge { +struct StageInfo { + explicit StageInfo(uint32_t level) : stage_level(level) {} + uint32_t stage_level; + std::unordered_set stage_nodes; + std::vector> data_inputs; + std::vector>> data_outputs; + std::list> ctrl_inputs; + std::list> ctrl_outputs; + std::list> inner_data_edges; + std::list> inner_ctrl_edges; +}; + +class StagePartitioner { + public: + explicit StagePartitioner(ComputeGraphPtr graph) : root_graph_(std::move(graph)) {} + ~StagePartitioner() = default; + + Status Partition(); + + private: + Status SplitStageLevel(); + + Status StagePartition(); + + static void FindStageIO(const std::unordered_set &stage_nodes, StageInfo &stage_info); + + NodePtr BuildSubgraphNode(const std::string &graph_name, const StageInfo &stage_info); + + static ComputeGraphPtr BuildStageGraph(const NodePtr &subgraph_node, const StageInfo &stage_info); + + static Status RelinkDataEdges(const NodePtr &subgraph_node, const StageInfo &stage_info); + + static Status RelinkCtrlEdges(const NodePtr &subgraph_node, const StageInfo &stage_info); + + ComputeGraphPtr root_graph_; + std::map> stage_nodes_; +}; +} // namespace ge + +#endif // GE_GRAPH_PARTITION_STAGE_PARTITION_H_ diff --git a/ge/graph/passes/addn_pass.cc b/ge/graph/passes/addn_pass.cc index c8f820fc..c0592965 100644 --- a/ge/graph/passes/addn_pass.cc +++ b/ge/graph/passes/addn_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/addn_pass.h b/ge/graph/passes/addn_pass.h index 373d1842..dd44e3cd 100644 --- a/ge/graph/passes/addn_pass.h +++ b/ge/graph/passes/addn_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/aicpu_constant_folding_pass.cc b/ge/graph/passes/aicpu_constant_folding_pass.cc index ddc31079..903cff0b 100644 --- a/ge/graph/passes/aicpu_constant_folding_pass.cc +++ b/ge/graph/passes/aicpu_constant_folding_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/aicpu_constant_folding_pass.h b/ge/graph/passes/aicpu_constant_folding_pass.h index d584c392..02babd8e 100755 --- a/ge/graph/passes/aicpu_constant_folding_pass.h +++ b/ge/graph/passes/aicpu_constant_folding_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/assert_pass.cc b/ge/graph/passes/assert_pass.cc index 79f75f53..725016a9 100644 --- a/ge/graph/passes/assert_pass.cc +++ b/ge/graph/passes/assert_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/assert_pass.h b/ge/graph/passes/assert_pass.h index 7d8546f2..528f6046 100755 --- a/ge/graph/passes/assert_pass.h +++ b/ge/graph/passes/assert_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ - #ifndef GE_GRAPH_PASSES_ASSERT_PASS_H_ #define GE_GRAPH_PASSES_ASSERT_PASS_H_ diff --git a/ge/graph/passes/assign_pass.cc b/ge/graph/passes/assign_pass.cc index bb7a0f04..ff93efb9 100644 --- a/ge/graph/passes/assign_pass.cc +++ b/ge/graph/passes/assign_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/assign_pass.h b/ge/graph/passes/assign_pass.h index 11cf1073..d7dc5138 100644 --- a/ge/graph/passes/assign_pass.h +++ b/ge/graph/passes/assign_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/atomic_addr_clean_pass.cc b/ge/graph/passes/atomic_addr_clean_pass.cc index 690dee27..18a81b2c 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/ge/graph/passes/atomic_addr_clean_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/atomic_addr_clean_pass.h b/ge/graph/passes/atomic_addr_clean_pass.h index ad60b7b5..51050125 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.h +++ b/ge/graph/passes/atomic_addr_clean_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/attach_stream_label_pass.cc b/ge/graph/passes/attach_stream_label_pass.cc index 06c32e7d..103ff7a6 100644 --- a/ge/graph/passes/attach_stream_label_pass.cc +++ b/ge/graph/passes/attach_stream_label_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,16 +89,13 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { nodes.push(node); static const std::set end_type_set = {STREAMSWITCH, STREAMMERGE, MERGE}; - bool merge_flag = false; - bool exit_flag = false; - bool net_output_flag = false; while (!nodes.empty()) { NodePtr cur_node = nodes.top(); nodes.pop(); if (visited.count(cur_node) > 0) { continue; } - if (AttachFlag(cur_node, stream_label, merge_flag, exit_flag, net_output_flag) != SUCCESS) { + if (AttachFlag(cur_node, stream_label) != SUCCESS) { GELOGE(FAILED, "Attach flag for node %s failed.", cur_node->GetName().c_str()); return FAILED; } @@ -122,12 +119,6 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed."); } - bool attach_flag = (merge_flag || exit_flag) && net_output_flag; - if (attach_flag) { - GELOGI("No need to keep on attaching label."); - return SUCCESS; - } - for (const NodePtr &tmp_node : branch_nodes) { GELOGD("Attach label %s to node: %s.", stream_label.c_str(), tmp_node->GetName().c_str()); GE_CHK_STATUS_RET(SetStreamLabel(tmp_node, stream_label), "Set stream label failed."); @@ -140,13 +131,9 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { /// @brief attach flag /// @param [in] node /// @param [out] stream_label -/// @param [out] merge_flag -/// @param [out] exit_flag -/// @param [out] net_output_flag /// @return Status /// -Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, - bool &exit_flag, bool &net_output_flag) { +Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label) { const std::string &type = node->GetType(); if (type == STREAMSWITCH) { if (node->GetInDataNodes().empty()) { @@ -164,12 +151,8 @@ Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &strea } else if (type == STREAMMERGE) { stream_label = node->GetName(); GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); - merge_flag = true; } else if ((type == EXIT) || (type == REFEXIT)) { GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); - exit_flag = true; - } else if (type == NETOUTPUT) { - net_output_flag = true; } return SUCCESS; diff --git a/ge/graph/passes/attach_stream_label_pass.h b/ge/graph/passes/attach_stream_label_pass.h index d228134f..28e828b5 100755 --- a/ge/graph/passes/attach_stream_label_pass.h +++ b/ge/graph/passes/attach_stream_label_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,13 +50,9 @@ class AttachStreamLabelPass : public GraphPass { /// @brief attach flag /// @param [in] node /// @param [out] stream_label - /// @param [out] merge_flag - /// @param [out] exit_flag - /// @param [out] net_output_flag /// @return Status /// - static Status AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, bool &exit_flag, - bool &net_output_flag); + static Status AttachFlag(const NodePtr &node, std::string &stream_label); /// /// @brief Update stream_label for loop_branch diff --git a/ge/graph/passes/base_pass.cc b/ge/graph/passes/base_pass.cc index 8c808e46..f92ec409 100755 --- a/ge/graph/passes/base_pass.cc +++ b/ge/graph/passes/base_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/base_pass.h b/ge/graph/passes/base_pass.h index bb41691d..6e7b292e 100644 --- a/ge/graph/passes/base_pass.h +++ b/ge/graph/passes/base_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/bitcast_pass.cc b/ge/graph/passes/bitcast_pass.cc index 8388b21a..ab73adcc 100644 --- a/ge/graph/passes/bitcast_pass.cc +++ b/ge/graph/passes/bitcast_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/bitcast_pass.h b/ge/graph/passes/bitcast_pass.h index 34acaf57..c77e3022 100644 --- a/ge/graph/passes/bitcast_pass.h +++ b/ge/graph/passes/bitcast_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/cast_remove_pass.cc b/ge/graph/passes/cast_remove_pass.cc index 62c92866..ab4f2098 100644 --- a/ge/graph/passes/cast_remove_pass.cc +++ b/ge/graph/passes/cast_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/cast_remove_pass.h b/ge/graph/passes/cast_remove_pass.h index 0ee52998..67fa697e 100644 --- a/ge/graph/passes/cast_remove_pass.h +++ b/ge/graph/passes/cast_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/cast_translate_pass.cc b/ge/graph/passes/cast_translate_pass.cc index 01b5c96b..810d2db5 100644 --- a/ge/graph/passes/cast_translate_pass.cc +++ b/ge/graph/passes/cast_translate_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/cast_translate_pass.h b/ge/graph/passes/cast_translate_pass.h index 04c03d42..a802fe9e 100755 --- a/ge/graph/passes/cast_translate_pass.h +++ b/ge/graph/passes/cast_translate_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/common_subexpression_elimination_pass.cc b/ge/graph/passes/common_subexpression_elimination_pass.cc index a4662d5d..4849271b 100644 --- a/ge/graph/passes/common_subexpression_elimination_pass.cc +++ b/ge/graph/passes/common_subexpression_elimination_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "common_subexpression_elimination_pass.h" #include diff --git a/ge/graph/passes/common_subexpression_elimination_pass.h b/ge/graph/passes/common_subexpression_elimination_pass.h index 83bfbace..f4439edc 100644 --- a/ge/graph/passes/common_subexpression_elimination_pass.h +++ b/ge/graph/passes/common_subexpression_elimination_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_COMMON_SUBEXPRESSION_ELIMINATION_H_ #define GE_COMMON_SUBEXPRESSION_ELIMINATION_H_ diff --git a/ge/graph/passes/compile_nodes_pass.cc b/ge/graph/passes/compile_nodes_pass.cc index 9faa35ae..037cc332 100755 --- a/ge/graph/passes/compile_nodes_pass.cc +++ b/ge/graph/passes/compile_nodes_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/passes/compile_nodes_pass.h" #include diff --git a/ge/graph/passes/compile_nodes_pass.h b/ge/graph/passes/compile_nodes_pass.h index e2fb59c2..70f8cbf5 100644 --- a/ge/graph/passes/compile_nodes_pass.h +++ b/ge/graph/passes/compile_nodes_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/cond_pass.cc b/ge/graph/passes/cond_pass.cc index a2d77a1b..4c48359a 100644 --- a/ge/graph/passes/cond_pass.cc +++ b/ge/graph/passes/cond_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/passes/cond_pass.h" #include "common/op/ge_op_utils.h" #include "graph/utils/graph_utils.h" diff --git a/ge/graph/passes/cond_pass.h b/ge/graph/passes/cond_pass.h index 5c0c83bc..cf1b6207 100644 --- a/ge/graph/passes/cond_pass.h +++ b/ge/graph/passes/cond_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_COND_PASS_H #define GE_GRAPH_PASSES_COND_PASS_H diff --git a/ge/graph/passes/cond_remove_pass.cc b/ge/graph/passes/cond_remove_pass.cc index ec26ba3e..96795ac6 100644 --- a/ge/graph/passes/cond_remove_pass.cc +++ b/ge/graph/passes/cond_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/passes/cond_remove_pass.h" #include "common/op/ge_op_utils.h" #include "graph/utils/graph_utils.h" diff --git a/ge/graph/passes/cond_remove_pass.h b/ge/graph/passes/cond_remove_pass.h index 72ca64b8..69dd7195 100644 --- a/ge/graph/passes/cond_remove_pass.h +++ b/ge/graph/passes/cond_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_COND_REMOVE_PASS_H #define GE_GRAPH_PASSES_COND_REMOVE_PASS_H diff --git a/ge/graph/passes/constant_folding_pass.cc b/ge/graph/passes/constant_folding_pass.cc index 4db14fc3..7bb234f1 100644 --- a/ge/graph/passes/constant_folding_pass.cc +++ b/ge/graph/passes/constant_folding_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/constant_folding_pass.h b/ge/graph/passes/constant_folding_pass.h index c977157e..232fd156 100644 --- a/ge/graph/passes/constant_folding_pass.h +++ b/ge/graph/passes/constant_folding_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/constant_fuse_same_pass.cc b/ge/graph/passes/constant_fuse_same_pass.cc index d0970c59..4b7b4e95 100644 --- a/ge/graph/passes/constant_fuse_same_pass.cc +++ b/ge/graph/passes/constant_fuse_same_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/constant_fuse_same_pass.h b/ge/graph/passes/constant_fuse_same_pass.h index 4935da84..876c6477 100755 --- a/ge/graph/passes/constant_fuse_same_pass.h +++ b/ge/graph/passes/constant_fuse_same_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/control_trigger_pass.cc b/ge/graph/passes/control_trigger_pass.cc index e179c64e..0c00d553 100644 --- a/ge/graph/passes/control_trigger_pass.cc +++ b/ge/graph/passes/control_trigger_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/control_trigger_pass.h b/ge/graph/passes/control_trigger_pass.h index 03ddbbd2..2cf6d171 100755 --- a/ge/graph/passes/control_trigger_pass.h +++ b/ge/graph/passes/control_trigger_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/ctrl_edge_transfer_pass.cc b/ge/graph/passes/ctrl_edge_transfer_pass.cc index f53dc7be..b72e8039 100755 --- a/ge/graph/passes/ctrl_edge_transfer_pass.cc +++ b/ge/graph/passes/ctrl_edge_transfer_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/ctrl_edge_transfer_pass.h b/ge/graph/passes/ctrl_edge_transfer_pass.h index 1b6a624c..ee981012 100755 --- a/ge/graph/passes/ctrl_edge_transfer_pass.h +++ b/ge/graph/passes/ctrl_edge_transfer_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ - #ifndef GE_GRAPH_PASSES_CTRL_EDGE_TRANSFER_PASS_H_ #define GE_GRAPH_PASSES_CTRL_EDGE_TRANSFER_PASS_H_ diff --git a/ge/graph/passes/data_pass.cc b/ge/graph/passes/data_pass.cc index 38688848..7555d58a 100644 --- a/ge/graph/passes/data_pass.cc +++ b/ge/graph/passes/data_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/data_pass.h b/ge/graph/passes/data_pass.h index bce2fd5a..1f6d0f0b 100644 --- a/ge/graph/passes/data_pass.h +++ b/ge/graph/passes/data_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/dimension_adjust_pass.cc b/ge/graph/passes/dimension_adjust_pass.cc index fc5fe69f..bd5dab2c 100755 --- a/ge/graph/passes/dimension_adjust_pass.cc +++ b/ge/graph/passes/dimension_adjust_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/dimension_adjust_pass.h b/ge/graph/passes/dimension_adjust_pass.h index 685d9694..fa9d2320 100755 --- a/ge/graph/passes/dimension_adjust_pass.h +++ b/ge/graph/passes/dimension_adjust_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/dimension_compute_pass.cc b/ge/graph/passes/dimension_compute_pass.cc index dfa2d404..a429e69d 100755 --- a/ge/graph/passes/dimension_compute_pass.cc +++ b/ge/graph/passes/dimension_compute_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ - #include "graph/passes/dimension_compute_pass.h" #include diff --git a/ge/graph/passes/dimension_compute_pass.h b/ge/graph/passes/dimension_compute_pass.h index ba1a057c..40110757 100644 --- a/ge/graph/passes/dimension_compute_pass.h +++ b/ge/graph/passes/dimension_compute_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/dropout_pass.cc b/ge/graph/passes/dropout_pass.cc index 09c297a6..ab88aa23 100644 --- a/ge/graph/passes/dropout_pass.cc +++ b/ge/graph/passes/dropout_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/dropout_pass.h b/ge/graph/passes/dropout_pass.h index f127224e..506ee5d6 100755 --- a/ge/graph/passes/dropout_pass.h +++ b/ge/graph/passes/dropout_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/end_of_sequence_add_control_pass.cc b/ge/graph/passes/end_of_sequence_add_control_pass.cc index d6503d0d..90c0841c 100755 --- a/ge/graph/passes/end_of_sequence_add_control_pass.cc +++ b/ge/graph/passes/end_of_sequence_add_control_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/end_of_sequence_add_control_pass.h b/ge/graph/passes/end_of_sequence_add_control_pass.h index dcc65848..af60c70c 100644 --- a/ge/graph/passes/end_of_sequence_add_control_pass.h +++ b/ge/graph/passes/end_of_sequence_add_control_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/enter_pass.cc b/ge/graph/passes/enter_pass.cc index f19223f1..ad3d78fc 100644 --- a/ge/graph/passes/enter_pass.cc +++ b/ge/graph/passes/enter_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,13 +20,14 @@ #include "framework/common/debug/log.h" #include "graph/utils/graph_utils.h" +namespace { +const size_t kOutNodesNum = 1; +} + namespace ge { Status EnterPass::Run(NodePtr &node) { GELOGD("EnterPass running"); - if (node == nullptr) { - GELOGE(PARAM_INVALID, "param [node] must not be null."); - return PARAM_INVALID; - } + GE_CHECK_NOTNULL(node); if ((node->GetType() != ENTER) && (node->GetType() != REFENTER)) { return SUCCESS; @@ -38,32 +39,68 @@ Status EnterPass::Run(NodePtr &node) { return PARAM_INVALID; } NodePtr in_node = node->GetInDataNodes().at(0); - if (in_node == nullptr) { - GELOGE(PARAM_INVALID, "param [in_node] must not be null"); - return PARAM_INVALID; - } + GE_CHECK_NOTNULL(in_node); if ((in_node->GetType() != CONSTANT) && (in_node->GetType() != CONSTANTOP)) { return SUCCESS; } - bool need_remove_flag = in_node->GetInControlNodes().empty() && - node->GetInControlNodes().empty() && - node->GetOutDataNodes().empty(); - if (need_remove_flag) { + bool need_remove_flag = in_node->GetInControlNodes().empty() && node->GetInControlNodes().empty(); + if (!need_remove_flag) { + return SUCCESS; + } + if (node->GetOutDataNodes().empty()) { for (auto &out_ctrl_node : node->GetOutControlNodes()) { if (out_ctrl_node == nullptr) { continue; } if (GraphUtils::RemoveEdge(node->GetOutControlAnchor(), out_ctrl_node->GetInControlAnchor()) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Remove Enter ctrl output fail, %s->%s", - node->GetName().c_str(), out_ctrl_node->GetName().c_str()); + GELOGE(FAILED, "Remove Enter ctrl output fail, %s->%s", node->GetName().c_str(), + out_ctrl_node->GetName().c_str()); return FAILED; } } + } else { + if (OptimizeEnter(node, in_node) != SUCCESS) { + GELOGE(FAILED, "Optimize enter node[%s] failed.", node->GetName().c_str()); + return FAILED; + } } GELOGD("EnterPass success"); return SUCCESS; } + +Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) { + auto out_nodes_of_in_node = in_node->GetOutAllNodes(); + if (out_nodes_of_in_node.size() != kOutNodesNum) { + return SUCCESS; + } + + if (!node->GetOutControlNodes().empty()) { + return SUCCESS; + } + + for (const auto &out_node : node->GetOutDataNodes()) { + GE_CHECK_NOTNULL(out_node); + if (out_node->GetType() == MERGE) { + return SUCCESS; + } + } + + GE_CHECK_NOTNULL(in_node->GetOutDataAnchor(0)); + GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))); + auto out_data_anchor = node->GetOutDataAnchor(0); + GE_CHECK_NOTNULL(out_data_anchor); + for (auto peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { + GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)); + GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)); + } + + auto graph = node->GetOwnerComputeGraph(); + GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, node)) + AddRePassNodesWithInOut(in_node); + + return SUCCESS; +} } // namespace ge diff --git a/ge/graph/passes/enter_pass.h b/ge/graph/passes/enter_pass.h index dc6bffb1..73702c38 100644 --- a/ge/graph/passes/enter_pass.h +++ b/ge/graph/passes/enter_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,9 @@ namespace ge { class EnterPass : public BaseNodePass { public: Status Run(NodePtr &node) override; + + private: + Status OptimizeEnter(NodePtr &node, NodePtr &in_node); }; } // namespace ge #endif // GE_GRAPH_PASSES_ENTER_PASS_H_ diff --git a/ge/graph/passes/flow_ctrl_pass.cc b/ge/graph/passes/flow_ctrl_pass.cc index 23e14b43..99d68d17 100755 --- a/ge/graph/passes/flow_ctrl_pass.cc +++ b/ge/graph/passes/flow_ctrl_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/flow_ctrl_pass.h b/ge/graph/passes/flow_ctrl_pass.h index d01dcd44..a928aaa7 100755 --- a/ge/graph/passes/flow_ctrl_pass.h +++ b/ge/graph/passes/flow_ctrl_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/folding_pass.h b/ge/graph/passes/folding_pass.h index 745cffd7..4229755a 100755 --- a/ge/graph/passes/folding_pass.h +++ b/ge/graph/passes/folding_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ - #ifndef GE_GRAPH_PASSES_FOLDING_PASS_H_ #define GE_GRAPH_PASSES_FOLDING_PASS_H_ diff --git a/ge/graph/passes/for_pass.cc b/ge/graph/passes/for_pass.cc index f3caea35..44c7a4fe 100644 --- a/ge/graph/passes/for_pass.cc +++ b/ge/graph/passes/for_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/for_pass.h b/ge/graph/passes/for_pass.h index d6f307d1..0725f46a 100644 --- a/ge/graph/passes/for_pass.h +++ b/ge/graph/passes/for_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_FOR_PASS_H #define GE_GRAPH_PASSES_FOR_PASS_H diff --git a/ge/graph/passes/get_original_format_pass.cc b/ge/graph/passes/get_original_format_pass.cc index e743f190..31284a17 100644 --- a/ge/graph/passes/get_original_format_pass.cc +++ b/ge/graph/passes/get_original_format_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/get_original_format_pass.h b/ge/graph/passes/get_original_format_pass.h index 66e0222e..813fb2bf 100755 --- a/ge/graph/passes/get_original_format_pass.h +++ b/ge/graph/passes/get_original_format_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/global_step_insert_pass.cc b/ge/graph/passes/global_step_insert_pass.cc index 4431fc3d..fa49bdd9 100755 --- a/ge/graph/passes/global_step_insert_pass.cc +++ b/ge/graph/passes/global_step_insert_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/global_step_insert_pass.h b/ge/graph/passes/global_step_insert_pass.h index da83e93a..fb72eba3 100755 --- a/ge/graph/passes/global_step_insert_pass.h +++ b/ge/graph/passes/global_step_insert_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/guarantee_const_pass.cc b/ge/graph/passes/guarantee_const_pass.cc index a2d8f262..f099c01d 100644 --- a/ge/graph/passes/guarantee_const_pass.cc +++ b/ge/graph/passes/guarantee_const_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/guarantee_const_pass.h b/ge/graph/passes/guarantee_const_pass.h index 1f297944..7f289a10 100755 --- a/ge/graph/passes/guarantee_const_pass.h +++ b/ge/graph/passes/guarantee_const_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/hccl_group_pass.cc b/ge/graph/passes/hccl_group_pass.cc index bbfd9b56..d8f11434 100644 --- a/ge/graph/passes/hccl_group_pass.cc +++ b/ge/graph/passes/hccl_group_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/hccl_group_pass.h b/ge/graph/passes/hccl_group_pass.h index dbe15e96..de7bd20c 100644 --- a/ge/graph/passes/hccl_group_pass.h +++ b/ge/graph/passes/hccl_group_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc index 21747f42..d912cb2c 100755 --- a/ge/graph/passes/hccl_memcpy_pass.cc +++ b/ge/graph/passes/hccl_memcpy_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h index e73a5483..033fb169 100755 --- a/ge/graph/passes/hccl_memcpy_pass.h +++ b/ge/graph/passes/hccl_memcpy_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/identity_pass.cc b/ge/graph/passes/identity_pass.cc index 5a54e391..301cfe17 100755 --- a/ge/graph/passes/identity_pass.cc +++ b/ge/graph/passes/identity_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/identity_pass.h b/ge/graph/passes/identity_pass.h index a0d3f032..a4a80efc 100644 --- a/ge/graph/passes/identity_pass.h +++ b/ge/graph/passes/identity_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/infershape_pass.cc b/ge/graph/passes/infershape_pass.cc index 7b8f7b50..8130c847 100755 --- a/ge/graph/passes/infershape_pass.cc +++ b/ge/graph/passes/infershape_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/infershape_pass.h b/ge/graph/passes/infershape_pass.h index 30cf0472..9e4df9a6 100644 --- a/ge/graph/passes/infershape_pass.h +++ b/ge/graph/passes/infershape_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/input_output_connection_identify_pass.cc b/ge/graph/passes/input_output_connection_identify_pass.cc index 0d198dfb..93ede3d5 100644 --- a/ge/graph/passes/input_output_connection_identify_pass.cc +++ b/ge/graph/passes/input_output_connection_identify_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/input_output_connection_identify_pass.h b/ge/graph/passes/input_output_connection_identify_pass.h index 97ed315d..0dd32102 100755 --- a/ge/graph/passes/input_output_connection_identify_pass.h +++ b/ge/graph/passes/input_output_connection_identify_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/isolated_op_remove_pass.cc b/ge/graph/passes/isolated_op_remove_pass.cc index 5c9093e9..152104eb 100644 --- a/ge/graph/passes/isolated_op_remove_pass.cc +++ b/ge/graph/passes/isolated_op_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/isolated_op_remove_pass.h b/ge/graph/passes/isolated_op_remove_pass.h index 3b7fe7d1..f17df21a 100755 --- a/ge/graph/passes/isolated_op_remove_pass.h +++ b/ge/graph/passes/isolated_op_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/iterator_op_pass.cc b/ge/graph/passes/iterator_op_pass.cc index 1ec2bba9..3006fa50 100644 --- a/ge/graph/passes/iterator_op_pass.cc +++ b/ge/graph/passes/iterator_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/iterator_op_pass.h b/ge/graph/passes/iterator_op_pass.h index d9303358..77e80600 100644 --- a/ge/graph/passes/iterator_op_pass.h +++ b/ge/graph/passes/iterator_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/link_gen_mask_nodes_pass.cc b/ge/graph/passes/link_gen_mask_nodes_pass.cc index 9bd991aa..4f122fb2 100755 --- a/ge/graph/passes/link_gen_mask_nodes_pass.cc +++ b/ge/graph/passes/link_gen_mask_nodes_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/link_gen_mask_nodes_pass.h b/ge/graph/passes/link_gen_mask_nodes_pass.h index 12d68f1b..f9979ab1 100644 --- a/ge/graph/passes/link_gen_mask_nodes_pass.h +++ b/ge/graph/passes/link_gen_mask_nodes_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/mark_agnostic_pass.cc b/ge/graph/passes/mark_agnostic_pass.cc index 0275bc9f..d47e0368 100644 --- a/ge/graph/passes/mark_agnostic_pass.cc +++ b/ge/graph/passes/mark_agnostic_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/passes/mark_agnostic_pass.h" #include "utils/node_utils.h" diff --git a/ge/graph/passes/mark_agnostic_pass.h b/ge/graph/passes/mark_agnostic_pass.h index 9c581abe..31a57d86 100644 --- a/ge/graph/passes/mark_agnostic_pass.h +++ b/ge/graph/passes/mark_agnostic_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_MARK_AGNOSTIC_PASS_H_ #define GE_MARK_AGNOSTIC_PASS_H_ diff --git a/ge/graph/passes/mark_graph_unknown_status_pass.cc b/ge/graph/passes/mark_graph_unknown_status_pass.cc index d8f5feff..2abec90b 100644 --- a/ge/graph/passes/mark_graph_unknown_status_pass.cc +++ b/ge/graph/passes/mark_graph_unknown_status_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/mark_graph_unknown_status_pass.h b/ge/graph/passes/mark_graph_unknown_status_pass.h index a1148c6e..662e321c 100644 --- a/ge/graph/passes/mark_graph_unknown_status_pass.h +++ b/ge/graph/passes/mark_graph_unknown_status_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/mark_same_addr_pass.cc b/ge/graph/passes/mark_same_addr_pass.cc index 2441d0bd..0ed151d3 100644 --- a/ge/graph/passes/mark_same_addr_pass.cc +++ b/ge/graph/passes/mark_same_addr_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/mark_same_addr_pass.h b/ge/graph/passes/mark_same_addr_pass.h index 518fe418..ebfcf6b2 100644 --- a/ge/graph/passes/mark_same_addr_pass.h +++ b/ge/graph/passes/mark_same_addr_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/memcpy_addr_async_pass.cc b/ge/graph/passes/memcpy_addr_async_pass.cc index 3ede39a7..5894725a 100755 --- a/ge/graph/passes/memcpy_addr_async_pass.cc +++ b/ge/graph/passes/memcpy_addr_async_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/memcpy_addr_async_pass.h b/ge/graph/passes/memcpy_addr_async_pass.h index 0f22d10b..a70fcbdd 100755 --- a/ge/graph/passes/memcpy_addr_async_pass.h +++ b/ge/graph/passes/memcpy_addr_async_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/merge_pass.cc b/ge/graph/passes/merge_pass.cc index 61aab4aa..0ddff827 100644 --- a/ge/graph/passes/merge_pass.cc +++ b/ge/graph/passes/merge_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/merge_pass.h b/ge/graph/passes/merge_pass.h index 53582ff6..ef586713 100755 --- a/ge/graph/passes/merge_pass.h +++ b/ge/graph/passes/merge_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/merge_to_stream_merge_pass.cc b/ge/graph/passes/merge_to_stream_merge_pass.cc index 0ff05c23..f6dc8459 100644 --- a/ge/graph/passes/merge_to_stream_merge_pass.cc +++ b/ge/graph/passes/merge_to_stream_merge_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/merge_to_stream_merge_pass.h b/ge/graph/passes/merge_to_stream_merge_pass.h index 6eb2b22c..9f713989 100644 --- a/ge/graph/passes/merge_to_stream_merge_pass.h +++ b/ge/graph/passes/merge_to_stream_merge_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/multi_batch_clone_pass.cc b/ge/graph/passes/multi_batch_clone_pass.cc index 732844e5..361276c9 100755 --- a/ge/graph/passes/multi_batch_clone_pass.cc +++ b/ge/graph/passes/multi_batch_clone_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/multi_batch_clone_pass.h b/ge/graph/passes/multi_batch_clone_pass.h index 1155dfc8..454aff41 100755 --- a/ge/graph/passes/multi_batch_clone_pass.h +++ b/ge/graph/passes/multi_batch_clone_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/multi_batch_pass.cc b/ge/graph/passes/multi_batch_pass.cc index 70a09065..3638f8a0 100644 --- a/ge/graph/passes/multi_batch_pass.cc +++ b/ge/graph/passes/multi_batch_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/multi_batch_pass.h b/ge/graph/passes/multi_batch_pass.h index a714992a..0a3970ee 100644 --- a/ge/graph/passes/multi_batch_pass.h +++ b/ge/graph/passes/multi_batch_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/net_output_pass.cc b/ge/graph/passes/net_output_pass.cc index e3f2b71a..9b2f2723 100644 --- a/ge/graph/passes/net_output_pass.cc +++ b/ge/graph/passes/net_output_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/net_output_pass.h b/ge/graph/passes/net_output_pass.h index b959bd96..567d1246 100644 --- a/ge/graph/passes/net_output_pass.h +++ b/ge/graph/passes/net_output_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/next_iteration_pass.cc b/ge/graph/passes/next_iteration_pass.cc index 5cd0f29f..73b3b77e 100644 --- a/ge/graph/passes/next_iteration_pass.cc +++ b/ge/graph/passes/next_iteration_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/next_iteration_pass.h b/ge/graph/passes/next_iteration_pass.h index f8223c20..6f28a618 100755 --- a/ge/graph/passes/next_iteration_pass.h +++ b/ge/graph/passes/next_iteration_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/no_use_reshape_remove_pass.cc b/ge/graph/passes/no_use_reshape_remove_pass.cc index 66a798a5..07f58417 100644 --- a/ge/graph/passes/no_use_reshape_remove_pass.cc +++ b/ge/graph/passes/no_use_reshape_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/no_use_reshape_remove_pass.h b/ge/graph/passes/no_use_reshape_remove_pass.h index c142d8d2..7ca36807 100755 --- a/ge/graph/passes/no_use_reshape_remove_pass.h +++ b/ge/graph/passes/no_use_reshape_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/parallel_concat_start_op_pass.cc b/ge/graph/passes/parallel_concat_start_op_pass.cc index 508d9b19..0ac26b91 100755 --- a/ge/graph/passes/parallel_concat_start_op_pass.cc +++ b/ge/graph/passes/parallel_concat_start_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/parallel_concat_start_op_pass.h b/ge/graph/passes/parallel_concat_start_op_pass.h index db9d235a..0f6e754a 100755 --- a/ge/graph/passes/parallel_concat_start_op_pass.h +++ b/ge/graph/passes/parallel_concat_start_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/pass_manager.cc b/ge/graph/passes/pass_manager.cc index 59ede66b..b79b18ea 100644 --- a/ge/graph/passes/pass_manager.cc +++ b/ge/graph/passes/pass_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/pass_utils.cc b/ge/graph/passes/pass_utils.cc index 5359ff63..a920b4d9 100644 --- a/ge/graph/passes/pass_utils.cc +++ b/ge/graph/passes/pass_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/pass_utils.h b/ge/graph/passes/pass_utils.h index fbfb3b47..5ccfb585 100755 --- a/ge/graph/passes/pass_utils.h +++ b/ge/graph/passes/pass_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/permute_pass.cc b/ge/graph/passes/permute_pass.cc index 73d9a7f1..5e237346 100644 --- a/ge/graph/passes/permute_pass.cc +++ b/ge/graph/passes/permute_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/permute_pass.h b/ge/graph/passes/permute_pass.h index 9c4b911e..e4415b6e 100755 --- a/ge/graph/passes/permute_pass.h +++ b/ge/graph/passes/permute_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/placeholder_with_default_pass.cc b/ge/graph/passes/placeholder_with_default_pass.cc index 4c902322..7a72fc36 100644 --- a/ge/graph/passes/placeholder_with_default_pass.cc +++ b/ge/graph/passes/placeholder_with_default_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/placeholder_with_default_pass.h b/ge/graph/passes/placeholder_with_default_pass.h index f2b26933..d48a0a5a 100644 --- a/ge/graph/passes/placeholder_with_default_pass.h +++ b/ge/graph/passes/placeholder_with_default_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/prevent_gradient_pass.cc b/ge/graph/passes/prevent_gradient_pass.cc index 402529c3..87c1b3a1 100644 --- a/ge/graph/passes/prevent_gradient_pass.cc +++ b/ge/graph/passes/prevent_gradient_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/prevent_gradient_pass.h b/ge/graph/passes/prevent_gradient_pass.h index f1542c22..8fe02b96 100755 --- a/ge/graph/passes/prevent_gradient_pass.h +++ b/ge/graph/passes/prevent_gradient_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/print_op_pass.cc b/ge/graph/passes/print_op_pass.cc index 28b2332b..fba7b712 100755 --- a/ge/graph/passes/print_op_pass.cc +++ b/ge/graph/passes/print_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/print_op_pass.h b/ge/graph/passes/print_op_pass.h index deaf559b..15b0badc 100755 --- a/ge/graph/passes/print_op_pass.h +++ b/ge/graph/passes/print_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/prune_pass.cc b/ge/graph/passes/prune_pass.cc index f5f4cbcb..af10c54f 100644 --- a/ge/graph/passes/prune_pass.cc +++ b/ge/graph/passes/prune_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/prune_pass.h b/ge/graph/passes/prune_pass.h index c8cf8247..4bc6f184 100755 --- a/ge/graph/passes/prune_pass.h +++ b/ge/graph/passes/prune_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/ref_identity_delete_op_pass.cc b/ge/graph/passes/ref_identity_delete_op_pass.cc index 95f710f2..5bc0fad6 100644 --- a/ge/graph/passes/ref_identity_delete_op_pass.cc +++ b/ge/graph/passes/ref_identity_delete_op_pass.cc @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #include "ref_identity_delete_op_pass.h" #include diff --git a/ge/graph/passes/ref_identity_delete_op_pass.h b/ge/graph/passes/ref_identity_delete_op_pass.h index 8363528e..3e42def4 100644 --- a/ge/graph/passes/ref_identity_delete_op_pass.h +++ b/ge/graph/passes/ref_identity_delete_op_pass.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef GE_GRAPH_PASSES_REF_IDENTITY_DELETE_OP_PASS_H_ #define GE_GRAPH_PASSES_REF_IDENTITY_DELETE_OP_PASS_H_ diff --git a/ge/graph/passes/remove_nodes_pass.cc b/ge/graph/passes/remove_nodes_pass.cc index c238f003..b29d6af3 100644 --- a/ge/graph/passes/remove_nodes_pass.cc +++ b/ge/graph/passes/remove_nodes_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "remove_nodes_pass.h" #include "debug/ge_log.h" #include "inc/framework/common/util.h" diff --git a/ge/graph/passes/remove_nodes_pass.h b/ge/graph/passes/remove_nodes_pass.h index 1d4fced9..fdf71fa3 100644 --- a/ge/graph/passes/remove_nodes_pass.h +++ b/ge/graph/passes/remove_nodes_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_REMOVE_NODES_PASS_H_ #define GE_REMOVE_NODES_PASS_H_ #include "graph/passes/base_pass.h" diff --git a/ge/graph/passes/replace_transshape_pass.cc b/ge/graph/passes/replace_transshape_pass.cc index 9004df4e..66c3f07a 100644 --- a/ge/graph/passes/replace_transshape_pass.cc +++ b/ge/graph/passes/replace_transshape_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/replace_transshape_pass.h b/ge/graph/passes/replace_transshape_pass.h index 0620ed2d..6673b11d 100644 --- a/ge/graph/passes/replace_transshape_pass.h +++ b/ge/graph/passes/replace_transshape_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/replace_with_empty_const_pass.cc b/ge/graph/passes/replace_with_empty_const_pass.cc index 171c76d0..278293b2 100644 --- a/ge/graph/passes/replace_with_empty_const_pass.cc +++ b/ge/graph/passes/replace_with_empty_const_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/replace_with_empty_const_pass.h b/ge/graph/passes/replace_with_empty_const_pass.h index 5083c699..495b75b3 100644 --- a/ge/graph/passes/replace_with_empty_const_pass.h +++ b/ge/graph/passes/replace_with_empty_const_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/reshape_recovery_pass.cc b/ge/graph/passes/reshape_recovery_pass.cc index f0987ff5..d2bc7971 100644 --- a/ge/graph/passes/reshape_recovery_pass.cc +++ b/ge/graph/passes/reshape_recovery_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/passes/reshape_recovery_pass.h" #include "common/ge/ge_util.h" diff --git a/ge/graph/passes/reshape_recovery_pass.h b/ge/graph/passes/reshape_recovery_pass.h index f16d5efb..5733dc8f 100644 --- a/ge/graph/passes/reshape_recovery_pass.h +++ b/ge/graph/passes/reshape_recovery_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_RESHAPE_RECOVERY_PASS_H_ #define GE_RESHAPE_RECOVERY_PASS_H_ #include "inc/graph_pass.h" diff --git a/ge/graph/passes/reshape_remove_pass.cc b/ge/graph/passes/reshape_remove_pass.cc index ffa6a485..a39298b6 100755 --- a/ge/graph/passes/reshape_remove_pass.cc +++ b/ge/graph/passes/reshape_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/reshape_remove_pass.h b/ge/graph/passes/reshape_remove_pass.h index c89caf86..044bbdb7 100644 --- a/ge/graph/passes/reshape_remove_pass.h +++ b/ge/graph/passes/reshape_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/resource_pair_add_control_pass.cc b/ge/graph/passes/resource_pair_add_control_pass.cc index 432bff9e..bba8ee71 100755 --- a/ge/graph/passes/resource_pair_add_control_pass.cc +++ b/ge/graph/passes/resource_pair_add_control_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/resource_pair_add_control_pass.h b/ge/graph/passes/resource_pair_add_control_pass.h index 5e1a4465..02ebd78f 100644 --- a/ge/graph/passes/resource_pair_add_control_pass.h +++ b/ge/graph/passes/resource_pair_add_control_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/resource_pair_remove_control_pass.cc b/ge/graph/passes/resource_pair_remove_control_pass.cc index 83fc7081..00d97798 100755 --- a/ge/graph/passes/resource_pair_remove_control_pass.cc +++ b/ge/graph/passes/resource_pair_remove_control_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/resource_pair_remove_control_pass.h b/ge/graph/passes/resource_pair_remove_control_pass.h index 80f6b3ef..ab40b130 100644 --- a/ge/graph/passes/resource_pair_remove_control_pass.h +++ b/ge/graph/passes/resource_pair_remove_control_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/same_transdata_breadth_fusion_pass.cc b/ge/graph/passes/same_transdata_breadth_fusion_pass.cc index 5709dcb7..5ad20fae 100644 --- a/ge/graph/passes/same_transdata_breadth_fusion_pass.cc +++ b/ge/graph/passes/same_transdata_breadth_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/same_transdata_breadth_fusion_pass.h b/ge/graph/passes/same_transdata_breadth_fusion_pass.h index 92e559a0..03b65ecd 100755 --- a/ge/graph/passes/same_transdata_breadth_fusion_pass.h +++ b/ge/graph/passes/same_transdata_breadth_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/save_pass.cc b/ge/graph/passes/save_pass.cc index a2e34b1d..6215e3e4 100755 --- a/ge/graph/passes/save_pass.cc +++ b/ge/graph/passes/save_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/save_pass.h b/ge/graph/passes/save_pass.h index 512dfa62..ce8c8a7a 100755 --- a/ge/graph/passes/save_pass.h +++ b/ge/graph/passes/save_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/set_input_output_offset_pass.cc b/ge/graph/passes/set_input_output_offset_pass.cc index beac831c..7f86947f 100644 --- a/ge/graph/passes/set_input_output_offset_pass.cc +++ b/ge/graph/passes/set_input_output_offset_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/set_input_output_offset_pass.h b/ge/graph/passes/set_input_output_offset_pass.h index 2e5ddaca..24f9f6c4 100644 --- a/ge/graph/passes/set_input_output_offset_pass.h +++ b/ge/graph/passes/set_input_output_offset_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_SET_INPUT_OUTPUT_OFFSET_PASS_H_ #define GE_GRAPH_PASSES_SET_INPUT_OUTPUT_OFFSET_PASS_H_ diff --git a/ge/graph/passes/shape_operate_op_remove_pass.cc b/ge/graph/passes/shape_operate_op_remove_pass.cc index 9d6d7c40..5a6e5f99 100755 --- a/ge/graph/passes/shape_operate_op_remove_pass.cc +++ b/ge/graph/passes/shape_operate_op_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/shape_operate_op_remove_pass.h b/ge/graph/passes/shape_operate_op_remove_pass.h index 22892140..3abe68e5 100755 --- a/ge/graph/passes/shape_operate_op_remove_pass.h +++ b/ge/graph/passes/shape_operate_op_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/snapshot_pass.cc b/ge/graph/passes/snapshot_pass.cc index 2b578e51..702cf4de 100644 --- a/ge/graph/passes/snapshot_pass.cc +++ b/ge/graph/passes/snapshot_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/snapshot_pass.h b/ge/graph/passes/snapshot_pass.h index 3063ac3a..94062b3d 100644 --- a/ge/graph/passes/snapshot_pass.h +++ b/ge/graph/passes/snapshot_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/stop_gradient_pass.cc b/ge/graph/passes/stop_gradient_pass.cc index 223e4513..bd5c0ea8 100644 --- a/ge/graph/passes/stop_gradient_pass.cc +++ b/ge/graph/passes/stop_gradient_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/stop_gradient_pass.h b/ge/graph/passes/stop_gradient_pass.h index 808174bc..5b6e0e9e 100755 --- a/ge/graph/passes/stop_gradient_pass.h +++ b/ge/graph/passes/stop_gradient_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/subexpression_migration_pass.cc b/ge/graph/passes/subexpression_migration_pass.cc index d2831f5c..8bef42ef 100755 --- a/ge/graph/passes/subexpression_migration_pass.cc +++ b/ge/graph/passes/subexpression_migration_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "subexpression_migration_pass.h" #include "graph/utils/node_utils.h" diff --git a/ge/graph/passes/subexpression_migration_pass.h b/ge/graph/passes/subexpression_migration_pass.h index d2733fcf..e66a3ac7 100755 --- a/ge/graph/passes/subexpression_migration_pass.h +++ b/ge/graph/passes/subexpression_migration_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/subgraph_const_migration_pass.cc b/ge/graph/passes/subgraph_const_migration_pass.cc index d88fb878..b5e28940 100644 --- a/ge/graph/passes/subgraph_const_migration_pass.cc +++ b/ge/graph/passes/subgraph_const_migration_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "subgraph_const_migration_pass.h" #include "graph/utils/node_utils.h" @@ -187,10 +188,14 @@ Status SubgraphConstMigrationPass::ClassifyDataNodes(const ComputeGraphPtr &grap } } + auto iter = graph_datas.begin(); + if (iter == graph_datas.end()) { + return SUCCESS; + } for (const auto &data_nodes : graph_datas) { - if (data_nodes.second.size() != graph_datas.begin()->second.size()) { + if (data_nodes.second.size() != iter->second.size()) { GELOGE(FAILED, "Subgraph %s has invalid Data nodes[%zu != %zu]", - data_nodes.first->GetName().c_str(), data_nodes.second.size(), graph_datas.begin()->second.size()); + data_nodes.first->GetName().c_str(), data_nodes.second.size(), iter->second.size()); return FAILED; } } diff --git a/ge/graph/passes/subgraph_const_migration_pass.h b/ge/graph/passes/subgraph_const_migration_pass.h index 3c087852..cb18fde9 100755 --- a/ge/graph/passes/subgraph_const_migration_pass.h +++ b/ge/graph/passes/subgraph_const_migration_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/subgraph_pass.cc b/ge/graph/passes/subgraph_pass.cc index 04e28aaf..5d65c4f3 100755 --- a/ge/graph/passes/subgraph_pass.cc +++ b/ge/graph/passes/subgraph_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/subgraph_pass.h b/ge/graph/passes/subgraph_pass.h index 6e518ace..7ff2019f 100644 --- a/ge/graph/passes/subgraph_pass.h +++ b/ge/graph/passes/subgraph_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/switch_data_edges_bypass.cc b/ge/graph/passes/switch_data_edges_bypass.cc index ce2b715b..5a297d4a 100644 --- a/ge/graph/passes/switch_data_edges_bypass.cc +++ b/ge/graph/passes/switch_data_edges_bypass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/switch_data_edges_bypass.h b/ge/graph/passes/switch_data_edges_bypass.h index 25f71d20..0d53c24b 100644 --- a/ge/graph/passes/switch_data_edges_bypass.h +++ b/ge/graph/passes/switch_data_edges_bypass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_SWITCH_DATA_EDGES_BYPASS_H_ #define GE_SWITCH_DATA_EDGES_BYPASS_H_ diff --git a/ge/graph/passes/switch_dead_branch_elimination.cc b/ge/graph/passes/switch_dead_branch_elimination.cc index 9358c9c3..dd7ace60 100644 --- a/ge/graph/passes/switch_dead_branch_elimination.cc +++ b/ge/graph/passes/switch_dead_branch_elimination.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/switch_dead_branch_elimination.h b/ge/graph/passes/switch_dead_branch_elimination.h index fdefb5c0..e7e36040 100644 --- a/ge/graph/passes/switch_dead_branch_elimination.h +++ b/ge/graph/passes/switch_dead_branch_elimination.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_SWITCH_DEAD_BRANCH_ELIMINATION_H_ #define GE_GRAPH_PASSES_SWITCH_DEAD_BRANCH_ELIMINATION_H_ diff --git a/ge/graph/passes/switch_logic_remove_pass.cc b/ge/graph/passes/switch_logic_remove_pass.cc index a6758e86..debb8b34 100644 --- a/ge/graph/passes/switch_logic_remove_pass.cc +++ b/ge/graph/passes/switch_logic_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/switch_logic_remove_pass.h b/ge/graph/passes/switch_logic_remove_pass.h index dc679978..5c523b83 100644 --- a/ge/graph/passes/switch_logic_remove_pass.h +++ b/ge/graph/passes/switch_logic_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_SWITCH_LOGIC_REMOVE_PASS_H_ #define GE_GRAPH_PASSES_SWITCH_LOGIC_REMOVE_PASS_H_ #include "graph/passes/base_pass.h" diff --git a/ge/graph/passes/switch_to_stream_switch_pass.cc b/ge/graph/passes/switch_to_stream_switch_pass.cc index 529480a6..86483d88 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.cc +++ b/ge/graph/passes/switch_to_stream_switch_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/switch_to_stream_switch_pass.h b/ge/graph/passes/switch_to_stream_switch_pass.h index 48725230..15fe9dce 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.h +++ b/ge/graph/passes/switch_to_stream_switch_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_breadth_fusion_pass.cc b/ge/graph/passes/transop_breadth_fusion_pass.cc index 21fb1eaf..046d171e 100644 --- a/ge/graph/passes/transop_breadth_fusion_pass.cc +++ b/ge/graph/passes/transop_breadth_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_breadth_fusion_pass.h b/ge/graph/passes/transop_breadth_fusion_pass.h index 9a82259c..8e7799e1 100755 --- a/ge/graph/passes/transop_breadth_fusion_pass.h +++ b/ge/graph/passes/transop_breadth_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_depth_fusion_pass.cc b/ge/graph/passes/transop_depth_fusion_pass.cc index 85106e08..057858f5 100755 --- a/ge/graph/passes/transop_depth_fusion_pass.cc +++ b/ge/graph/passes/transop_depth_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_depth_fusion_pass.h b/ge/graph/passes/transop_depth_fusion_pass.h index 831e7138..cc449893 100755 --- a/ge/graph/passes/transop_depth_fusion_pass.h +++ b/ge/graph/passes/transop_depth_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc b/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc index b207abe9..c385662b 100644 --- a/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc +++ b/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_nearby_allreduce_fusion_pass.h b/ge/graph/passes/transop_nearby_allreduce_fusion_pass.h index 0cacf062..1cd1eeec 100755 --- a/ge/graph/passes/transop_nearby_allreduce_fusion_pass.h +++ b/ge/graph/passes/transop_nearby_allreduce_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_symmetry_elimination_pass.cc b/ge/graph/passes/transop_symmetry_elimination_pass.cc index 9db3aea1..3f1be84f 100644 --- a/ge/graph/passes/transop_symmetry_elimination_pass.cc +++ b/ge/graph/passes/transop_symmetry_elimination_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_symmetry_elimination_pass.h b/ge/graph/passes/transop_symmetry_elimination_pass.h index 3a80ada5..92354391 100644 --- a/ge/graph/passes/transop_symmetry_elimination_pass.h +++ b/ge/graph/passes/transop_symmetry_elimination_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_SYMMETRY_ELIMINATION_PASS_H #define GE_SYMMETRY_ELIMINATION_PASS_H diff --git a/ge/graph/passes/transop_without_reshape_fusion_pass.cc b/ge/graph/passes/transop_without_reshape_fusion_pass.cc index c1eaf0f9..61bca6b8 100644 --- a/ge/graph/passes/transop_without_reshape_fusion_pass.cc +++ b/ge/graph/passes/transop_without_reshape_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/passes/transop_without_reshape_fusion_pass.h" #include #include diff --git a/ge/graph/passes/transop_without_reshape_fusion_pass.h b/ge/graph/passes/transop_without_reshape_fusion_pass.h index 2aa2d0f7..326da001 100755 --- a/ge/graph/passes/transop_without_reshape_fusion_pass.h +++ b/ge/graph/passes/transop_without_reshape_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_TRANSOP_WITHOUT_RESHAPE_FUSION_PASS_H_ #define GE_GRAPH_PASSES_TRANSOP_WITHOUT_RESHAPE_FUSION_PASS_H_ diff --git a/ge/graph/passes/transpose_transdata_pass.cc b/ge/graph/passes/transpose_transdata_pass.cc index 19bff563..b9bd59be 100644 --- a/ge/graph/passes/transpose_transdata_pass.cc +++ b/ge/graph/passes/transpose_transdata_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transpose_transdata_pass.h b/ge/graph/passes/transpose_transdata_pass.h index a72893f6..bf42f5de 100644 --- a/ge/graph/passes/transpose_transdata_pass.h +++ b/ge/graph/passes/transpose_transdata_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/unused_args_clean_pass.cc b/ge/graph/passes/unused_args_clean_pass.cc index 83fd0438..6822b0b8 100755 --- a/ge/graph/passes/unused_args_clean_pass.cc +++ b/ge/graph/passes/unused_args_clean_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "unused_args_clean_pass.h" #include "graph/utils/node_utils.h" diff --git a/ge/graph/passes/unused_args_clean_pass.h b/ge/graph/passes/unused_args_clean_pass.h index 90a146b2..851115d9 100644 --- a/ge/graph/passes/unused_args_clean_pass.h +++ b/ge/graph/passes/unused_args_clean_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_COMMON_CASE_ARGS_CLEAN_H_ #define GE_COMMON_CASE_ARGS_CLEAN_H_ diff --git a/ge/graph/passes/unused_const_pass.cc b/ge/graph/passes/unused_const_pass.cc index 7c57c53e..386633b5 100644 --- a/ge/graph/passes/unused_const_pass.cc +++ b/ge/graph/passes/unused_const_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/unused_const_pass.h b/ge/graph/passes/unused_const_pass.h index 6b99f058..3c7f3460 100755 --- a/ge/graph/passes/unused_const_pass.h +++ b/ge/graph/passes/unused_const_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/unused_op_remove_pass.cc b/ge/graph/passes/unused_op_remove_pass.cc index 41f7c828..6cceccb0 100644 --- a/ge/graph/passes/unused_op_remove_pass.cc +++ b/ge/graph/passes/unused_op_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/unused_op_remove_pass.h b/ge/graph/passes/unused_op_remove_pass.h index b9429cfd..bbc43af5 100755 --- a/ge/graph/passes/unused_op_remove_pass.h +++ b/ge/graph/passes/unused_op_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/var_is_initialized_op_pass.cc b/ge/graph/passes/var_is_initialized_op_pass.cc index b9c752d8..a9b7f35e 100644 --- a/ge/graph/passes/var_is_initialized_op_pass.cc +++ b/ge/graph/passes/var_is_initialized_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/var_is_initialized_op_pass.h b/ge/graph/passes/var_is_initialized_op_pass.h index 9cfa7b99..37b3f49b 100755 --- a/ge/graph/passes/var_is_initialized_op_pass.h +++ b/ge/graph/passes/var_is_initialized_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_format_pass.cc b/ge/graph/passes/variable_format_pass.cc index bd5300a5..28f6a4f7 100644 --- a/ge/graph/passes/variable_format_pass.cc +++ b/ge/graph/passes/variable_format_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_format_pass.h b/ge/graph/passes/variable_format_pass.h index e2c32903..1a0abe2e 100755 --- a/ge/graph/passes/variable_format_pass.h +++ b/ge/graph/passes/variable_format_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_op_pass.cc b/ge/graph/passes/variable_op_pass.cc index f1843d94..cca03ddc 100644 --- a/ge/graph/passes/variable_op_pass.cc +++ b/ge/graph/passes/variable_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_op_pass.h b/ge/graph/passes/variable_op_pass.h index 3b18882c..e17980e9 100755 --- a/ge/graph/passes/variable_op_pass.h +++ b/ge/graph/passes/variable_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_op_pass_bak.cc b/ge/graph/passes/variable_op_pass_bak.cc new file mode 100644 index 00000000..ae9f646f --- /dev/null +++ b/ge/graph/passes/variable_op_pass_bak.cc @@ -0,0 +1,812 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/variable_op_pass_bak.h" +#include +#include + +#include "common/formats/formats.h" +#include "common/formats/utils/formats_trans_utils.h" +#include "graph/ge_context.h" +#include "graph/graph.h" +#include "graph/manager/graph_var_manager.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/type_utils.h" + +namespace ge { +namespace { +const int kTransOpOutIndex = 0; + +Status ByPassTransNode(NodePtr &front_node, NodePtr &back_node) { + GE_CHECK_NOTNULL(front_node); + GE_CHECK_NOTNULL(back_node); + GELOGD("Begin to bypass trans node %s", front_node->GetName().c_str()); + auto ret = GraphUtils::CopyInCtrlEdges(front_node, back_node); + if (ret != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, + "Failed to move control edges from trans " + "node %s to var-ref %s", + front_node->GetName().c_str(), back_node->GetName().c_str()); + return INTERNAL_ERROR; + } + auto back_node_in_anchor = back_node->GetInDataAnchor(0); + if (back_node_in_anchor == nullptr) { + GELOGE(INTERNAL_ERROR, + "The back node %s does not have an " + "input anchor", + back_node->GetName().c_str()); + return INTERNAL_ERROR; + } + back_node_in_anchor->UnlinkAll(); + auto trans_in_anchor = front_node->GetInDataAnchor(0); + if (trans_in_anchor == nullptr) { + GELOGE(INTERNAL_ERROR, + "Failed to get the in data anchor from trans" + " node %s type %s", + front_node->GetName().c_str(), front_node->GetType().c_str()); + return INTERNAL_ERROR; + } + auto prev_trans_node_out_anchor = trans_in_anchor->GetPeerOutAnchor(); + if (prev_trans_node_out_anchor == nullptr) { + GELOGW( + "The trans node %s does not have an input, so the ref node %s does" + " not have any inputs after bypass", + front_node->GetName().c_str(), front_node->GetName().c_str()); + } else { + ret = GraphUtils::AddEdge(prev_trans_node_out_anchor, back_node_in_anchor); + if (ret != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, + "Failed to add edge between ref node %s " + "and the prev node of trans node %s", + back_node->GetName().c_str(), front_node->GetName().c_str()); + return INTERNAL_ERROR; + } + } + return SUCCESS; +} + +bool IsTransSupport(const TransNodeInfo &trans_info) { + if (trans_info.output.GetShape().IsUnknownShape()) { + return false; + } + if (trans_info.node_type == RESHAPE || trans_info.node_type == REFORMAT) { + return true; + } else if (trans_info.node_type == TRANSDATA || trans_info.node_type == TRANSPOSED) { + formats::TransArgs args{nullptr, + trans_info.input.GetFormat(), + trans_info.output.GetFormat(), + trans_info.input.GetShape().GetDims(), + trans_info.output.GetShape().GetDims(), + trans_info.input.GetDataType()}; + return formats::IsTransFormatSupport(args); + } else if (trans_info.node_type == CAST) { + formats::CastArgs datatype_args{nullptr, static_cast(trans_info.input.GetShape().GetShapeSize()), + trans_info.input.GetDataType(), trans_info.output.GetDataType()}; + return formats::IsTransDataTypeSupport(datatype_args); + } else { + return false; + } +} + +std::string GetInAndOutDecsDiff(NodePtr &trans_node, bool reverse = false) { + int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); + auto op_desc = trans_node->GetOpDesc(); + GeTensorDesc input_desc = op_desc->GetInputDesc(tran_in_index); + GeTensorDesc output_desc = op_desc->GetOutputDesc(kTransOpOutIndex); + if (reverse) { + GeTensorDesc tmp_desc = input_desc; + input_desc = output_desc; + output_desc = tmp_desc; + } + auto input_format = input_desc.GetFormat(); + auto input_type = input_desc.GetDataType(); + auto input_shape = input_desc.GetShape(); + auto output_format = output_desc.GetFormat(); + auto output_type = output_desc.GetDataType(); + auto output_shape = output_desc.GetShape(); + std::stringstream diff_key; + diff_key.str(""); + if (input_format != output_format) { + diff_key << static_cast(input_format) << '-' << static_cast(output_format) << '-'; + } else { + diff_key << "*-"; + } + if (input_type != output_type) { + diff_key << static_cast(input_type) << '-' << static_cast(output_type) << '-'; + } else { + diff_key << "*-"; + } + if (!ge::formats::IsShapeEqual(input_shape, output_shape)) { + for (auto dim : input_shape.GetDims()) { + diff_key << dim << '-'; + } + for (auto dim : output_shape.GetDims()) { + diff_key << dim << '-'; + } + } else { + diff_key << "*"; + } + return diff_key.str(); +} +} // namespace + +Status VariableOpPass::Run(ge::ComputeGraphPtr graph) { + if (graph == nullptr) { + GELOGE(INTERNAL_ERROR, "Failed to run variable op pass, null graph"); + return INTERNAL_ERROR; + } + + GELOGD("Begin to run variable op pass on graph %s, session %lu, graph id %u", graph->GetName().c_str(), + GetContext().SessionId(), graph->GetGraphID()); + + if (var_accelerate_ctrl_ == nullptr) { + GELOGE(INTERNAL_ERROR, "Failed to run var op pass, the variable accelerate control is null"); + return INTERNAL_ERROR; + } + + GELOGD("Begin to generate ref map for variable and refs, graph name:%s.", graph->GetName().c_str()); + if (RenewVarDesc(graph) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to renew var desc on graph"); + return GE_GRAPH_VARIABLE_OP_PASS_FAILED; + } + + if (GenerateVariableVariableRefMap(graph) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to generate variable map for graph %s", graph->GetName().c_str()); + return GE_GRAPH_VARIABLE_OP_PASS_FAILED; + } + + GELOGD("Begin to fusion variables and trans nodes"); + for (auto &var_to_refs : var_and_var_ref_map_) { + auto &node = var_to_refs.first; + GE_CHECK_NOTNULL(node); + GE_CHECK_NOTNULL(var_accelerate_ctrl_); + if (!var_accelerate_ctrl_->IsVarPermitToChangeFormats(node->GetName())) { + GELOGD("The var %s does not permit to change formats, skip it", node->GetName().c_str()); + continue; + } + + VarTransRoad fusion_road; + auto ret = FusionIfNeed(node, fusion_road); + if (ret != SUCCESS) { + return ret; + } + + if (fusion_road.empty()) { + GELOGD("No need to fusion variable %s because it's fusion road is empty", node->GetName().c_str()); + continue; + } + + ret = RenewTransRoadDesc(node, fusion_road); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to renew description fusion road for var %s", node->GetName().c_str()); + return GE_GRAPH_VARIABLE_OP_PASS_FAILED; + } + + auto start_iter = fusion_road.begin(); + auto end_iter = fusion_road.rbegin(); + GELOGD( + "Trans variable data for %s from format %s to %s, shape %s to %s " + "data-type %s to %s, path len %zu success", + node->GetName().c_str(), TypeUtils::FormatToSerialString(start_iter->input.GetFormat()).c_str(), + TypeUtils::FormatToSerialString(end_iter->output.GetFormat()).c_str(), + formats::ShapeToString(start_iter->input.GetShape().GetDims()).c_str(), + formats::ShapeToString(end_iter->output.GetShape().GetDims()).c_str(), + TypeUtils::DataTypeToSerialString(start_iter->input.GetDataType()).c_str(), + TypeUtils::DataTypeToSerialString(end_iter->output.GetDataType()).c_str(), fusion_road.size()); + + ret = VarManager::Instance(graph->GetSessionID())->SetTransRoad(node->GetName(), fusion_road); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to update the format fusion road for var %s", node->GetName().c_str()); + return INTERNAL_ERROR; + } + ret = VarManager::Instance(graph->GetSessionID())->SetChangedGraphId(node->GetName(), graph->GetGraphID()); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to update the graph id for var %s", node->GetName().c_str()); + return INTERNAL_ERROR; + } + var_accelerate_ctrl_->SetVarChanged(node->GetName()); + + GELOGD("Begin to update format info for var %s.", node->GetName().c_str()); + std::set node_set({node}); + if (UpdateIOFormatInfo(end_iter->output, node_set) != SUCCESS) { + return GE_GRAPH_VARIABLE_OP_PASS_FAILED; + } + + // renew var desc if the trans_road is all reshape or reformat + ret = RenewVarDesc(graph->GetSessionID(), node, fusion_road); + if (ret != SUCCESS) { + GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); + return FAILED; + } + } + + return SUCCESS; +} + +Status VariableOpPass::RenewTransRoadDesc(const NodePtr &var, VarTransRoad &fusion_road) { + auto var_desc = var->GetOpDesc(); + GE_CHECK_NOTNULL(var_desc); + TransNodeInfo prev_node_info; + prev_node_info.node_type = var->GetType(); + prev_node_info.output = var_desc->GetOutputDesc(0); + // two cases + // fisrt Var->cast->transdata which transdata in fusion road + // the input of transdata is not equal with output of var + // case 1 : suppose input dtype of transdata equal with out dtype + // but not equal with var + // so we make input dtype and output dytpe of transroad equal with var + // case 2: suppose input format of transdata not equal with out format + // and input format not equal with var + // so we make input format equal with var + + for (auto &cur_trans : fusion_road) { + if (cur_trans.input.GetFormat() == cur_trans.output.GetFormat()) { + cur_trans.output.SetFormat(prev_node_info.output.GetFormat()); + } + if (cur_trans.input.GetDataType() == cur_trans.output.GetDataType()) { + cur_trans.output.SetDataType(prev_node_info.output.GetDataType()); + } + if (ge::formats::IsShapeEqual(cur_trans.input.GetShape(), cur_trans.output.GetShape())) { + cur_trans.output.SetShape(prev_node_info.output.GetShape()); + } + cur_trans.input = prev_node_info.output; + prev_node_info.output = cur_trans.output; + } + return SUCCESS; +} + +Status VariableOpPass::FusionIfNeed(const NodePtr &var, VarTransRoad &fusion_road) { + bool can_fusion = false; + while (true) { + map> trans_type_to_trans_ops ; + map> trans_type_to_changed_desc; + // record the order of trans op in first path + vector first_path_trans_order; + auto ret = CheckIfCouldBeOptimized(var, first_path_trans_order, trans_type_to_changed_desc, + trans_type_to_trans_ops, can_fusion); + if (ret != SUCCESS) { + GELOGE(FAILED, "Check trans ops after vatiable could be optimized or not failed"); + return ret; + } + + if (!can_fusion) { + break; + } + + vector> delete_var_ref_trans_nodes; + ret = GetAndCheckTransOpOfVarRef(var, can_fusion, trans_type_to_changed_desc, delete_var_ref_trans_nodes); + if (ret != SUCCESS) { + GELOGE(FAILED, "get and check trans op of varref failed"); + return ret; + } + + if (!can_fusion) { + break; + } + + ret = UpdateTransRoad(fusion_road, first_path_trans_order, + trans_type_to_changed_desc, trans_type_to_trans_ops); + if (ret != SUCCESS) { + GELOGE(FAILED, "Update trans road failed"); + return ret; + } + + if (fusion_road.empty()) { + return SUCCESS; + } + + ret = DealFusion(var, fusion_road, trans_type_to_changed_desc, + trans_type_to_trans_ops, delete_var_ref_trans_nodes); + if (ret != SUCCESS) { + return ret; + } + } + return SUCCESS; +} + +Status VariableOpPass::UpdateTransRoad(VarTransRoad &fusion_road, vector &first_path_trans_order, + map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops){ + vector delete_trans_type; + for (auto &trans_type : first_path_trans_order) { + if (trans_type_to_changed_desc.find(trans_type) == trans_type_to_changed_desc.end()) { + continue; + } + bool delete_flag = false; + for (auto &trans_node : trans_type_to_trans_ops[trans_type]) { + int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); + auto out_op_desc = trans_node->GetOpDesc(); + GE_CHECK_NOTNULL(out_op_desc); + TransNodeInfo trans_node_info; + trans_node_info.node_type = trans_node->GetType(); + trans_node_info.input = out_op_desc->GetInputDesc(tran_in_index); + trans_node_info.output = out_op_desc->GetOutputDesc(kTransOpOutIndex); + if (!IsTransSupport(trans_node_info)) { + delete_flag = true; + GELOGD("The trans node %s does not support, skip the variable accelerating", trans_node_info.node_type.c_str()); + break; + } + } + if (delete_flag) { + delete_trans_type.push_back(trans_type); + } else { + auto &trans_node = *trans_type_to_trans_ops[trans_type].begin(); + auto out_op_desc = trans_node->GetOpDesc(); + int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); + TransNodeInfo trans_node_info; + trans_node_info.node_type = trans_node->GetType(); + trans_node_info.input = out_op_desc->GetInputDesc(tran_in_index); + trans_node_info.output = out_op_desc->GetOutputDesc(kTransOpOutIndex); + fusion_road.emplace_back(trans_node_info); + } + } + for (auto &trans_type : delete_trans_type) { + trans_type_to_changed_desc.erase(trans_type); + } + return SUCCESS; +} + +Status VariableOpPass::DealFusion(const ge::NodePtr &var_node, VarTransRoad &fusion_road, + map> trans_type_to_changed_desc, + map> trans_type_to_trans_ops, + vector> &delete_trans_nodes) { + GE_CHECK_NOTNULL(var_node); + GELOGD("Begin to fusion var %s with trans", var_node->GetName().c_str()); + auto graph = var_node->GetOwnerComputeGraph(); + for (auto &trans_type : trans_type_to_changed_desc) { + for (auto &trans_node : trans_type_to_trans_ops[trans_type.first]) { + GELOGD("Remove node %s type %s when fusion with variable %s", trans_node->GetName().c_str(), + trans_node->GetType().c_str(), var_node->GetName().c_str()); + if (RenewTransOpDesc(trans_node, true) != SUCCESS) { + return GE_GRAPH_VARIABLE_OP_PASS_FAILED; + } + + if (GraphUtils::IsolateNode(trans_node, {0}) != SUCCESS) { + return GE_GRAPH_VARIABLE_OP_PASS_FAILED; + } + + if (GraphUtils::RemoveNodeWithoutRelink(graph, trans_node) != SUCCESS) { + return GE_GRAPH_VARIABLE_OP_PASS_FAILED; + } + } + } + + // Iterate delete_trans_nodes backward, eg a->b->c, delete_trans_nodes:{{b,c},{a,b}} + // we should delete {a,b} first , then b->c,then we can delete {b,c} + // if we delete {b,c} first, then a->c, then we can not get b when we delete {a,b} + for (auto iter = delete_trans_nodes.rbegin(); iter != delete_trans_nodes.rend(); ++iter) { + auto front_node = iter->first; + auto back_node = iter->second; + if (RenewTransOpDesc(front_node, false) != SUCCESS) { + return GE_GRAPH_VARIABLE_OP_PASS_FAILED; + } + if (front_node->GetOutDataNodes().size() > 1) { + GELOGD("The trans node %s type %s connecting with var-ref %s has more" + " than one output data nodes, unlink the edge between them", + front_node->GetName().c_str(), front_node->GetType().c_str(), back_node->GetName().c_str()); + if (ByPassTransNode(front_node, back_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to bypass trans node %s to node %s", front_node->GetName().c_str(), + back_node->GetName().c_str()); + return INTERNAL_ERROR; + } + } else { + GELOGD("The trans node %s type %s connecting with %s has only" + " one output data nodes, isolate and remove it.", + front_node->GetName().c_str(), front_node->GetType().c_str(), back_node->GetName().c_str()); + if (GraphUtils::IsolateNode(front_node, {0}) != SUCCESS) { + return GE_GRAPH_VARIABLE_OP_PASS_FAILED; + } + if (GraphUtils::RemoveNodeWithoutRelink(graph, front_node) != SUCCESS) { + return GE_GRAPH_VARIABLE_OP_PASS_FAILED; + } + } + } + return SUCCESS; +} + +Status VariableOpPass::RenewTransOpDesc(ge::NodePtr &node, bool is_reverse) { + int tran_in_index = TransOpUtil::GetTransOpDataIndex(node->GetType()); + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + GeTensorDesc input_desc = op_desc->GetInputDesc(tran_in_index); + GeTensorDesc output_desc = op_desc->GetOutputDesc(kTransOpOutIndex); + GeTensorDesc renew_desc = is_reverse ? output_desc : input_desc; + bool format_changed = false; + bool shape_changed = false; + bool dtype_changed = false; + if (input_desc.GetFormat() != output_desc.GetFormat()) { + format_changed = true; + } + if (input_desc.GetDataType() != output_desc.GetDataType()) { + dtype_changed = true; + } + if (!ge::formats::IsShapeEqual(input_desc.GetShape(), output_desc.GetShape())) { + shape_changed = true; + } + auto cur_node = node; + while (TransOpUtil::IsTransOp(cur_node)) { + tran_in_index = TransOpUtil::GetTransOpDataIndex(cur_node->GetType()); + auto next_node = is_reverse ? NodeUtils::GetInDataNodeByIndex(*cur_node, tran_in_index) : + cur_node->GetOutDataNodes().at(kTransOpOutIndex); + if (!TransOpUtil::IsTransOp(next_node)) { + break; + } + auto prev_desc = next_node->GetOpDesc(); + tran_in_index = TransOpUtil::GetTransOpDataIndex(next_node->GetType()); + auto mutable_output_desc = prev_desc->MutableOutputDesc(kTransOpOutIndex); + auto mutable_input_desc = prev_desc->MutableInputDesc(tran_in_index); + GE_CHECK_NOTNULL(prev_desc->MutableOutputDesc(kTransOpOutIndex)); + GE_CHECK_NOTNULL(prev_desc->MutableInputDesc(tran_in_index)); + if (shape_changed) { + mutable_input_desc->SetShape(renew_desc.GetShape()); + mutable_output_desc->SetShape(renew_desc.GetShape()); + } + if (dtype_changed) { + mutable_input_desc->SetDataType(renew_desc.GetDataType()); + mutable_output_desc->SetDataType(renew_desc.GetDataType()); + } + if (format_changed) { + mutable_input_desc->SetFormat(renew_desc.GetFormat()); + mutable_output_desc->SetFormat(renew_desc.GetFormat()); + } + cur_node = next_node; + } + return SUCCESS; +} + +Status VariableOpPass::CheckIfCouldBeOptimized(const NodePtr &var, vector &first_path_trans_order, + map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops, bool &flag) { + bool is_match = true; + auto ret = GetSameTransOP(var, first_path_trans_order, trans_type_to_changed_desc, + trans_type_to_trans_ops, is_match); + + if (ret != SUCCESS) { + GELOGE(FAILED, "Get same trans op of variable node: %s failed", var->GetName().c_str()); + return GE_GRAPH_VARIABLE_OP_PASS_FAILED; + } + + if (!is_match) { + flag = false; + GELOGI("trans nodes after variable do not meet the condition"); + return SUCCESS; + } + + flag = true; + return SUCCESS; +} + +Status VariableOpPass::GetSameTransOP(const NodePtr &var, vector &first_path_trans_order, + map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops, bool &is_match) { + GELOGD("Begin to get Node: %s trans op info of first path", var->GetName().c_str()); + auto ret = GetFisrtPathTransInfo(var, first_path_trans_order, + trans_type_to_changed_desc, trans_type_to_trans_ops); + if (ret != SUCCESS) { + GELOGE(FAILED, "Get var: %s first path trans info failed", var->GetName().c_str()); + return FAILED; + } + + if (first_path_trans_order.empty()) { + GELOGD("var %s first path has no trans op, not need to pass", var->GetName().c_str()); + is_match = false; + return SUCCESS; + } + + GELOGD("Begin to depth first search Node: %s ", var->GetName().c_str()); + VariableDFS(var, trans_type_to_changed_desc, trans_type_to_trans_ops, is_match); + + return SUCCESS; +} + +void VariableOpPass::VariableDFS(const NodePtr &node, map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops, bool &is_match) { + std::stack node_stack; + std::stack> path_stack; + for (auto &out_node : node->GetOutDataNodes()) { + if (!is_match) { + break; + } + if (out_node->GetOutDataNodesSize() == 0 || !ge::TransOpUtil::IsTransOp(out_node)) { + is_match = false; + break; + } + node_stack.push(out_node); + path_stack.emplace(vector{out_node}); + while (!node_stack.empty() && is_match) { + auto cur_node = node_stack.top(); + auto cur_path = path_stack.top(); + node_stack.pop(); + path_stack.pop(); + if (cur_node->GetOutDataNodesSize() == 0 || !ge::TransOpUtil::IsTransOp(cur_node)) { + UpdateTransInfo(cur_path, is_match, trans_type_to_changed_desc, trans_type_to_trans_ops); + continue; + } + for (auto &next_node : cur_node->GetOutDataNodes()) { + node_stack.push(next_node); + auto next_path = cur_path; + next_path.push_back(next_node); + path_stack.emplace(next_path); + } + } + } +} + +Status VariableOpPass::UpdateTransInfo(vector &cur_path, bool& is_match, + map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops) { + GELOGD("Begin to update trans info by path"); + std::set trans_op_occured; + for (auto &trans_node : cur_path) { + auto trans_node_type = trans_node->GetType(); + if (trans_op_occured.find(trans_node_type) != trans_op_occured.end() || + !ge::TransOpUtil::IsTransOp(trans_node_type)) { + continue; + } + trans_op_occured.insert(trans_node_type); + auto desc_diff = GetInAndOutDecsDiff(trans_node); + if (trans_type_to_changed_desc.find(trans_node_type) != trans_type_to_changed_desc.end() && + desc_diff == trans_type_to_changed_desc[trans_node_type].first) { + trans_type_to_changed_desc[trans_node_type].second = true; + auto iter = find(trans_type_to_trans_ops[trans_node_type].begin(), + trans_type_to_trans_ops[trans_node_type].end(), + trans_node); + if (iter == trans_type_to_trans_ops[trans_node_type].end()) { + trans_type_to_trans_ops[trans_node_type].push_back(trans_node); + } + } + } + std::set delete_trans_types; + for (auto &trans_item : trans_type_to_changed_desc) { + if (!trans_item.second.second) { + delete_trans_types.insert(trans_item.first); + } else { + trans_item.second.second = false; + } + } + for (auto& delete_item : delete_trans_types) { + trans_type_to_changed_desc.erase(delete_item); + } + if (trans_type_to_changed_desc.empty()) { + is_match = false; + } + return SUCCESS; +} + +Status VariableOpPass::GetFisrtPathTransInfo(const NodePtr &var, vector &first_path_trans_order, + map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops) { + auto cur_node = var; + while (cur_node->GetOutDataNodesSize() != 0) { + cur_node = cur_node->GetOutDataNodes().at(0); + GE_CHECK_NOTNULL(cur_node); + if (!ge::TransOpUtil::IsTransOp(cur_node)) { + break; + } + auto cur_node_type = cur_node->GetType(); + // only get the the first occurrence operator of same type + if (trans_type_to_changed_desc.find(cur_node_type) == trans_type_to_changed_desc.end()) { + auto desc_diff = GetInAndOutDecsDiff(cur_node); + trans_type_to_changed_desc[cur_node->GetType()] = make_pair(desc_diff, false); + trans_type_to_trans_ops[cur_node->GetType()] = vector{cur_node}; + first_path_trans_order.push_back(cur_node->GetType()); + } + } + GELOGD("get var %s first path trans info success", var->GetName().c_str()); + return SUCCESS; +} + +Status VariableOpPass::GetAndCheckTransOpOfVarRef(const ge::NodePtr &var_node, bool &pass_check, + map> &trans_type_to_changed_desc, + vector> &delete_var_ref_trans_nodes) { + auto iterator = var_and_var_ref_map_.find(var_node); + if (iterator == var_and_var_ref_map_.end()) { + GELOGD("there is no var_ref of node %s", var_node->GetName().c_str()); + return SUCCESS; + } + vector delete_trans_type; + for (auto &trans_type : trans_type_to_changed_desc) { + delete_trans_type.push_back(trans_type.first); + } + for (auto &ref_node : iterator->second) { + GE_CHECK_NOTNULL(ref_node); + auto cur_node = *ref_node->GetInDataNodes().begin(); + auto behind_node = ref_node; + GE_CHECK_NOTNULL(cur_node); + vector tmp_delete_trans_type = delete_trans_type; + while (TransOpUtil::IsTransOp(cur_node)) { + GE_CHECK_NOTNULL(cur_node); + auto iter = find(tmp_delete_trans_type.begin(), tmp_delete_trans_type.end(), cur_node->GetType()); + if (iter != tmp_delete_trans_type.end()) { + CheckTransOpOfVarAndVarRefSymmetry(cur_node, trans_type_to_changed_desc[cur_node->GetType()].first, + pass_check); + if (!pass_check) { + GELOGD("trans op : %s of var ref %s is illegal", cur_node->GetName().c_str(), ref_node->GetName().c_str()); + return SUCCESS; + } + tmp_delete_trans_type.erase(iter); + delete_var_ref_trans_nodes.emplace_back(std::make_pair(cur_node, behind_node)); + } + int tran_in_index = TransOpUtil::GetTransOpDataIndex(cur_node->GetType()); + behind_node = cur_node; + cur_node = cur_node->GetInDataNodes().at(tran_in_index); + } + if (!tmp_delete_trans_type.empty()) { + pass_check = false; + return SUCCESS; + } + } + return SUCCESS; +} + +Status VariableOpPass::CheckTransOpOfVarAndVarRefSymmetry(NodePtr &var_ref_trans_op, const string &desc_diff, + bool &is_symmetry){ + auto var_ref_trans_op_desc_diff = GetInAndOutDecsDiff(var_ref_trans_op, true); + is_symmetry = (var_ref_trans_op_desc_diff == desc_diff); + return SUCCESS; +} + +Status VariableOpPass::UpdateVarAndRefOutputFormatInfo(const GeTensorDesc &final_output, const ge::NodePtr &node) { + if (node == nullptr || node->GetOpDesc() == nullptr) { + GELOGE(FAILED, "node or opdesc is nullptr"); + return FAILED; + } + const Format &format = final_output.GetFormat(); + const DataType &data_type = final_output.GetDataType(); + const GeShape &shape = final_output.GetShape(); + GELOGD("last ref is (%s, %s, %lu), var_ref_name is %s.", TypeUtils::DataTypeToSerialString(data_type).c_str(), + TypeUtils::FormatToSerialString(format).c_str(), shape.GetDims().size(), node->GetName().c_str()); + + auto node_desc = node->GetOpDesc()->GetOutputDesc(0); + CopyVariableFormatDataTypeAndShape(final_output, node_desc); + if (node->GetOpDesc()->UpdateOutputDesc(0, node_desc) != GRAPH_SUCCESS) { + GELOGE(FAILED, "update output desc fail."); + return FAILED; + } + GELOGD("node ref is (%s, %s, %lu), var_ref_name is %s.", + TypeUtils::DataTypeToSerialString(node->GetOpDesc()->GetOutputDesc(0).GetDataType()).c_str(), + TypeUtils::FormatToSerialString(node->GetOpDesc()->GetOutputDesc(0).GetFormat()).c_str(), + node->GetOpDesc()->GetOutputDesc(0).GetShape().GetDims().size(), node->GetName().c_str()); + + auto iterator = var_and_var_ref_map_.find(node); + if (iterator == var_and_var_ref_map_.end()) { + auto graph = node->GetOwnerComputeGraph(); + if (GenerateVariableVariableRefMap(graph) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to generate variable map for graph %s", graph->GetName().c_str()); + return GE_GRAPH_VARIABLE_OP_PASS_FAILED; + } + } + iterator = var_and_var_ref_map_.find(node); + if (iterator == var_and_var_ref_map_.end()) { + GELOGW("The var node %s which belongs to graph %s can not be found on the graph", node->GetName().c_str(), + node->GetOwnerComputeGraph()->GetName().c_str()); + return SUCCESS; + } + + for (const auto &var_ref_node : iterator->second) { + auto var_ref_node_description = var_ref_node->GetOpDesc(); + GE_CHECK_NOTNULL(var_ref_node_description); + + GELOGD("var_ref_node before is (%s, %s, %zu), var_ref_name is %s.", + TypeUtils::DataTypeToSerialString(data_type).c_str(), TypeUtils::FormatToSerialString(format).c_str(), + shape.GetDims().size(), var_ref_node->GetName().c_str()); + if (var_ref_node_description->UpdateOutputDesc(0, node_desc) != GRAPH_SUCCESS) { + GELOGW("UpdateOutputDesc fail."); + } + if (var_ref_node_description->UpdateInputDesc(0, node_desc) != GRAPH_SUCCESS) { + GELOGW("UpdateInputDesc fail."); + } + const auto &input_desc = var_ref_node_description->MutableInputDesc(0); + const auto &output_desc = var_ref_node_description->MutableOutputDesc(0); + GE_CHECK_NOTNULL(input_desc); + GE_CHECK_NOTNULL(output_desc); + GELOGD("var_ref_node ref is (%s, %s, %zu), var_ref_name is %s.", + TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str(), + TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(), output_desc->GetShape().GetDims().size(), + var_ref_node->GetName().c_str()); + } + + return SUCCESS; +} + +Status VariableOpPass::GenerateVariableVariableRefMap(const ComputeGraphPtr &compute_graph) { + std::map names_to_var; + std::map> names_to_refs; + GE_CHECK_NOTNULL(compute_graph); + for (auto &node : compute_graph->GetDirectNode()) { + if (node->GetType() != VARIABLE) { + continue; + } + std::string ref_var_name; + if (!ge::AttrUtils::GetStr(node->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_name)) { + names_to_var[node->GetName()] = node; + } else { + names_to_refs[ref_var_name].insert(node); + } + } + + for (auto &name_to_var : names_to_var) { + var_and_var_ref_map_[name_to_var.second] = names_to_refs[name_to_var.first]; + } + return SUCCESS; +} + +void VariableOpPass::CopyVariableFormatDataTypeAndShape(const GeTensorDesc &src_tensor_desc, + GeTensorDesc &dst_tensor_desc) { + dst_tensor_desc.SetShape(src_tensor_desc.GetShape()); + dst_tensor_desc.SetFormat(src_tensor_desc.GetFormat()); + dst_tensor_desc.SetDataType(src_tensor_desc.GetDataType()); +} + +Status VariableOpPass::UpdateIOFormatInfo(const GeTensorDesc &final_output, std::set &nodes) { + for (auto &need_set_node : nodes) { + auto ret = UpdateVarAndRefOutputFormatInfo(final_output, need_set_node); + if (ret != SUCCESS) { + return GE_GRAPH_VARIABLE_OP_PASS_FAILED; + } + } + return SUCCESS; +} + +Status VariableOpPass::RenewVarDesc(ge::ComputeGraphPtr &graph) { + GE_CHECK_NOTNULL(graph); + // renew var manager desc + Status ret = SUCCESS; + for (auto &node : graph->GetDirectNode()) { + bool is_var_node = + (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) || (node->GetType() == VARHANDLEOP); + if (is_var_node) { + if (!ge::VarManager::Instance(graph->GetSessionID())->IsVarExist(node->GetName())) { + GELOGD("var manager does not exist var node[%s]", node->GetName().c_str()); + continue; + } + GELOGD("var manager exist var node[%s], graph name[%s]", node->GetName().c_str(), graph->GetName().c_str()); + GE_CHECK_NOTNULL(node->GetOpDesc()); + ret = ge::VarManager::Instance(graph->GetSessionID())->RenewCurVarDesc(node->GetName(), node->GetOpDesc()); + if (ret != SUCCESS) { + GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); + return FAILED; + } + } + } + return SUCCESS; +} + +Status VariableOpPass::RenewVarDesc(uint64_t session_id, const NodePtr &node, const VarTransRoad &fusion_road) { + // renew var desc if the trans_road is all reshape or reformat + for (auto &road : fusion_road) { + if (road.node_type != RESHAPE && road.node_type != REFORMAT) { + return SUCCESS; + } + } + + if (!ge::VarManager::Instance(session_id)->IsVarExist(node->GetName())) { + GELOGD("var manager does not exist var node[%s]", node->GetName().c_str()); + return SUCCESS; + } + GELOGD("var manager exist var node[%s]", node->GetName().c_str()); + GE_CHECK_NOTNULL(node->GetOpDesc()); + Status ret = ge::VarManager::Instance(session_id)->RenewCurVarDesc(node->GetName(), node->GetOpDesc()); + if (ret != SUCCESS) { + GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); + return FAILED; + } + + return SUCCESS; +} + +} // namespace ge diff --git a/ge/graph/passes/variable_op_pass_bak.h b/ge/graph/passes/variable_op_pass_bak.h new file mode 100644 index 00000000..a2b14cf1 --- /dev/null +++ b/ge/graph/passes/variable_op_pass_bak.h @@ -0,0 +1,104 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ +#define GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ +#include +#include +#include +#include "graph/common/transop_util.h" +#include "common/formats/utils/formats_trans_utils.h" +#include "graph/utils/node_utils.h" +#include "graph/graph.h" +#include "graph/manager/graph_var_manager.h" +#include "graph/manager/util/variable_accelerate_ctrl.h" +#include "inc/graph_pass.h" + +namespace ge { +namespace variable_op { +struct NodeDesc { + ge::GeTensorDesc input; + ge::GeTensorDesc output; + bool is_update = false; +}; +} // namespace variable_op +class VariableOpPass : public GraphPass { + public: + explicit VariableOpPass(VarAccelerateCtrl *ctrl) : var_accelerate_ctrl_(ctrl) {} + + ~VariableOpPass() override = default; + + Status Run(ge::ComputeGraphPtr graph) override; + + private: + Status UpdateTransRoad(VarTransRoad &fusion_road, vector &trans_road_order, + map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops); + + Status DealFusion(const ge::NodePtr &var_node, VarTransRoad &fusion_road, + map> trans_type_to_changed_desc, + map> trans_type_to_trans_ops, + vector> &delete_trans_nodes); + + Status RenewTransOpDesc(ge::NodePtr &node, bool is_reverse); + + Status RenewTransRoadDesc(const NodePtr &var, VarTransRoad &fusion_road); + + Status CheckIfCouldBeOptimized(const NodePtr &var, vector &trans_road_order, + map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops, bool &flag); + + Status FusionIfNeed(const NodePtr &var, VarTransRoad &fusion_road); + + Status GetSameTransOP(const NodePtr &var, vector &trans_road_order, + map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops, bool &is_match); + + Status GetFisrtPathTransInfo(const NodePtr &var, vector &trans_road_order, + map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops); + + void VariableDFS(const NodePtr &node, map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops, bool &is_match); + + Status UpdateTransInfo(vector &cur_path, bool& is_match, + map> &trans_type_to_changed_desc, + map> &trans_type_to_trans_ops); + + Status GetAndCheckTransOpOfVarRef(const ge::NodePtr &var_node, bool &pass_check, + map> &trans_type_to_changed_desc, + vector> &delete_var_ref_trans_nodes); + + Status CheckTransOpOfVarAndVarRefSymmetry(NodePtr &var_ref_trans_op, const string &desc_diff, bool &is_symmetry); + + Status UpdateVarAndRefOutputFormatInfo(const GeTensorDesc &final_output, const ge::NodePtr &node); + + Status GenerateVariableVariableRefMap(const ComputeGraphPtr &compute_graph); + + void CopyVariableFormatDataTypeAndShape(const GeTensorDesc &src_tensor_desc, GeTensorDesc &dst_tensor_desc); + + Status UpdateIOFormatInfo(const GeTensorDesc &final_output, std::set &nodes); + + Status RenewVarDesc(ge::ComputeGraphPtr &graph); + + Status RenewVarDesc(uint64_t session_id, const NodePtr &node, const VarTransRoad &fusion_road); + + map> var_and_var_ref_map_; + + VarAccelerateCtrl *var_accelerate_ctrl_; +}; +} // namespace ge +#endif // GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ diff --git a/ge/graph/passes/variable_prepare_op_pass.cc b/ge/graph/passes/variable_prepare_op_pass.cc index 9231e4eb..2693d5c4 100644 --- a/ge/graph/passes/variable_prepare_op_pass.cc +++ b/ge/graph/passes/variable_prepare_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_prepare_op_pass.h b/ge/graph/passes/variable_prepare_op_pass.h index 4cef5b59..563a9be5 100644 --- a/ge/graph/passes/variable_prepare_op_pass.h +++ b/ge/graph/passes/variable_prepare_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_ref_delete_op_pass.cc b/ge/graph/passes/variable_ref_delete_op_pass.cc index 8e625857..90cfd747 100644 --- a/ge/graph/passes/variable_ref_delete_op_pass.cc +++ b/ge/graph/passes/variable_ref_delete_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_ref_delete_op_pass.h b/ge/graph/passes/variable_ref_delete_op_pass.h index 7f6d1274..c6f1be43 100755 --- a/ge/graph/passes/variable_ref_delete_op_pass.h +++ b/ge/graph/passes/variable_ref_delete_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_ref_useless_control_out_delete_pass.cc b/ge/graph/passes/variable_ref_useless_control_out_delete_pass.cc index 1c8eb0ec..4c996a4c 100644 --- a/ge/graph/passes/variable_ref_useless_control_out_delete_pass.cc +++ b/ge/graph/passes/variable_ref_useless_control_out_delete_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "variable_ref_useless_control_out_delete_pass.h" namespace ge { diff --git a/ge/graph/passes/variable_ref_useless_control_out_delete_pass.h b/ge/graph/passes/variable_ref_useless_control_out_delete_pass.h index fd9dbb00..24648553 100644 --- a/ge/graph/passes/variable_ref_useless_control_out_delete_pass.h +++ b/ge/graph/passes/variable_ref_useless_control_out_delete_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_VARIABLE_REF_USELESS_CONTROL_OUT_DELETE_PASS_H_ #define GE_VARIABLE_REF_USELESS_CONTROL_OUT_DELETE_PASS_H_ diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index f90c0d80..2a0d0cfc 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/preprocess/graph_preprocess.h b/ge/graph/preprocess/graph_preprocess.h index ef0f3ed3..93aef853 100755 --- a/ge/graph/preprocess/graph_preprocess.h +++ b/ge/graph/preprocess/graph_preprocess.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/preprocess/insert_op/base_insert_op.h b/ge/graph/preprocess/insert_op/base_insert_op.h index b0d7a7a6..15e38639 100644 --- a/ge/graph/preprocess/insert_op/base_insert_op.h +++ b/ge/graph/preprocess/insert_op/base_insert_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 960a19b8..74619420 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -788,22 +788,24 @@ Status AippOp::AddAttrToAippData(const OpDescPtr &aipp_data_op_desc) { } Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size) { - static int index = 0; std::vector input_shape_dim(1, max_dynamic_aipp_size); GeShape input_shape(input_shape_dim); // construct input tensor GeTensorDesc input_tensor(input_shape, FORMAT_ND, DT_UINT8); TensorUtils::SetReuseInput(input_tensor, false); TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); - + GE_CHECK_NOTNULL(aipp_node); const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph(); string node_name; - if (index == 0) { + // First aippdata name should be definite. + if (graph->FindFirstNodeMatchType(AIPPDATA) == nullptr) { + GELOGI("Current graph has no aippdata node, so the name of it must be definite."); node_name = kDynamicAippData; } else { - node_name = string(kDynamicAippData) + "_" + to_string(index); + node_name = string(kDynamicAippData) + "_" + aipp_node->GetName(); } - ++index; + GELOGI("Current add aippdata node name is %s", node_name.c_str()); + // new add aipp_data ops for dynamic aipp param input OpDescPtr op_desc_ptr_data = MakeShared(node_name, AIPPDATA); GE_CHECK_NOTNULL(op_desc_ptr_data); diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.h b/ge/graph/preprocess/insert_op/ge_aipp_op.h index 22ae0cea..92f04d33 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.h +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index 8274ce8c..2da3657a 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.h b/ge/graph/preprocess/insert_op/util_insert_aipp_op.h index e785da98..ae431c32 100644 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.h +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index e05d1810..d97d0859 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/preprocess/multi_batch_copy_graph.h" #include @@ -43,7 +44,6 @@ using std::set; using std::string; using std::vector; -using std::map; namespace ge { namespace multibatch { @@ -264,24 +264,27 @@ Status MultiBatchGraphCopyer::Init() { } Status MultiBatchGraphCopyer::LabelStatus() { - map> frame_enters; - InitStatus(frame_enters); - + for (const auto &data : origin_data_nodes_) { + auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); + if (!IsAllDimsPositive(data_shape.GetDims())) { + origin_nodes_status_[data.get()] = kNodeInBatchBranch; + } + } bool changed = true; // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch while (changed) { changed = false; for (const auto &node : origin_all_nodes_) { + auto iter = origin_nodes_status_.find(node.get()); + if (iter != origin_nodes_status_.end()) { + continue; + } for (auto &in_node : node->GetInAllNodes()) { bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() && origin_nodes_status_[in_node.get()] == kNodeInBatchBranch; if (is_in_batch) { - if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end() || - origin_nodes_status_[node.get()] != kNodeInBatchBranch) { - origin_nodes_status_[node.get()] = kNodeInBatchBranch; - ResetEnterStatus(frame_enters, node); - changed = true; - } + origin_nodes_status_[node.get()] = kNodeInBatchBranch; + changed = true; break; } } @@ -312,45 +315,6 @@ Status MultiBatchGraphCopyer::LabelStatus() { return SUCCESS; } -void MultiBatchGraphCopyer::InitStatus(map> &frame_enters) { - for (const auto &node : origin_all_nodes_) { - if (node->GetType() != ENTER && node->GetType() != REFENTER) { - continue; - } - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - continue; - } - string frame_name; - if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { - frame_enters[frame_name].emplace_back(node); - } - } - - for (const auto &data : origin_data_nodes_) { - auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); - if (!IsAllDimsPositive(data_shape.GetDims())) { - origin_nodes_status_[data.get()] = kNodeInBatchBranch; - } - } -} - -void MultiBatchGraphCopyer::ResetEnterStatus(map> &frame_enters, const NodePtr &node) { - if (node->GetType() != ENTER && node->GetType() != REFENTER) { - return; - } - - for (const auto &frame_enter : frame_enters) { - auto &enters = frame_enter.second; - if (std::find(enters.begin(), enters.end(), node) != enters.end()) { - for (const auto &enter : enters) { - origin_nodes_status_[enter.get()] = kNodeInBatchBranch; - } - break; - } - } -} - Status MultiBatchGraphCopyer::CheckAndParseDynamicData(){ size_t unknown_shape_count = 0; auto data_name_and_shape = GetLocalOmgContext().user_input_dims; @@ -1190,7 +1154,7 @@ void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, } } -// Connect NetOutput directly: DTS2020070612498 +// Connect NetOutput directly void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, const set &dynamic_output_index, vector &dynamic_output_dims) { GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); diff --git a/ge/graph/preprocess/multi_batch_copy_graph.h b/ge/graph/preprocess/multi_batch_copy_graph.h index edd79ada..003de5ac 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.h +++ b/ge/graph/preprocess/multi_batch_copy_graph.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PREPROCESS_MULTI_BATCH_COPY_GRAPH_H_ #define GE_GRAPH_PREPROCESS_MULTI_BATCH_COPY_GRAPH_H_ #include @@ -69,8 +70,6 @@ class MultiBatchGraphCopyer { // label status for origin_all_nodes_ Status LabelStatus(); - void InitStatus(std::map> &frame_enters); - void ResetEnterStatus(std::map> &frame_enters, const NodePtr &node); // add nodes functions Status CreateNewNodes(); diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc index 9909b0dc..934d7943 100644 --- a/ge/graph/preprocess/multi_batch_options.cc +++ b/ge/graph/preprocess/multi_batch_options.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/preprocess/multi_batch_options.h b/ge/graph/preprocess/multi_batch_options.h index 8563f2f1..97424955 100644 --- a/ge/graph/preprocess/multi_batch_options.h +++ b/ge/graph/preprocess/multi_batch_options.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/common/constant/constant.h b/ge/host_cpu_engine/common/constant/constant.h index b9603b6a..a3cabdc4 100644 --- a/ge/host_cpu_engine/common/constant/constant.h +++ b/ge/host_cpu_engine/common/constant/constant.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/engine/host_cpu_engine.cc b/ge/host_cpu_engine/engine/host_cpu_engine.cc index cdbad1ed..648e13b1 100644 --- a/ge/host_cpu_engine/engine/host_cpu_engine.cc +++ b/ge/host_cpu_engine/engine/host_cpu_engine.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/engine/host_cpu_engine.h b/ge/host_cpu_engine/engine/host_cpu_engine.h index c8d5608f..ecafd98b 100644 --- a/ge/host_cpu_engine/engine/host_cpu_engine.h +++ b/ge/host_cpu_engine/engine/host_cpu_engine.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/module.mk b/ge/host_cpu_engine/module.mk index 2212b5ee..fdd6dca6 100644 --- a/ge/host_cpu_engine/module.mk +++ b/ge/host_cpu_engine/module.mk @@ -40,7 +40,7 @@ include ${BUILD_HOST_SHARED_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := atclib/libhost_cpu_engine LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := @@ -96,6 +96,26 @@ LOCAL_C_INCLUDES := $(local_lib_inc_path) include ${BUILD_HOST_STATIC_LIBRARY} +#compiler for device static lib +include $(CLEAR_VARS) +LOCAL_MODULE := libhost_cpu_opskernel_builder +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -std=c++11 +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := libprotobuf \ + libgraph \ + libregister \ + +LOCAL_SHARED_LIBRARIES := libc_sec \ + libslog \ + +LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc + +LOCAL_C_INCLUDES := $(local_lib_inc_path) + +include ${BUILD_STATIC_LIBRARY} + #compiler for atc ops kernel builder include $(CLEAR_VARS) LOCAL_MODULE := atclib/libhost_cpu_opskernel_builder diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc index adb252bc..0f522742 100644 --- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc +++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h index 82375b9f..c1c78a19 100644 --- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h +++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.cc b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.cc index 2d7798a4..7598a30a 100644 --- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.cc +++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h index d29e0c65..1202cc8a 100644 --- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h +++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc index a6e00f4a..472fca45 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc +++ b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/op/host_op.h b/ge/host_cpu_engine/ops_kernel_store/op/host_op.h index 0f560485..757b96a6 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/host_op.h +++ b/ge/host_cpu_engine/ops_kernel_store/op/host_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/op/op.h b/ge/host_cpu_engine/ops_kernel_store/op/op.h index c094f080..c1e1619c 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/op.h +++ b/ge/host_cpu_engine/ops_kernel_store/op/op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.cc b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.cc index 176ae579..efe44f80 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.cc +++ b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h index 3a235ffd..92f627fd 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h +++ b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/add_kernel.cc b/ge/host_kernels/add_kernel.cc index 1c206018..8fc201a8 100644 --- a/ge/host_kernels/add_kernel.cc +++ b/ge/host_kernels/add_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/add_kernel.h b/ge/host_kernels/add_kernel.h index 70800b66..f8fd272e 100755 --- a/ge/host_kernels/add_kernel.h +++ b/ge/host_kernels/add_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/broadcast_args_kernel.cc b/ge/host_kernels/broadcast_args_kernel.cc index d8880db9..94a92a7d 100644 --- a/ge/host_kernels/broadcast_args_kernel.cc +++ b/ge/host_kernels/broadcast_args_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/broadcast_args_kernel.h b/ge/host_kernels/broadcast_args_kernel.h index eb9a46f4..6d57976c 100755 --- a/ge/host_kernels/broadcast_args_kernel.h +++ b/ge/host_kernels/broadcast_args_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/broadcast_gradient_args_kernel.cc b/ge/host_kernels/broadcast_gradient_args_kernel.cc index 51ff4a4c..ed790dab 100644 --- a/ge/host_kernels/broadcast_gradient_args_kernel.cc +++ b/ge/host_kernels/broadcast_gradient_args_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "host_kernels/broadcast_gradient_args_kernel.h" #include diff --git a/ge/host_kernels/broadcast_gradient_args_kernel.h b/ge/host_kernels/broadcast_gradient_args_kernel.h index 84764228..8f183653 100755 --- a/ge/host_kernels/broadcast_gradient_args_kernel.h +++ b/ge/host_kernels/broadcast_gradient_args_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/cast_kernel.cc b/ge/host_kernels/cast_kernel.cc index 056081a1..0a0a4c6f 100644 --- a/ge/host_kernels/cast_kernel.cc +++ b/ge/host_kernels/cast_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/cast_kernel.h b/ge/host_kernels/cast_kernel.h index 12735cd4..5212bad0 100755 --- a/ge/host_kernels/cast_kernel.h +++ b/ge/host_kernels/cast_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/concat_offset_kernel.cc b/ge/host_kernels/concat_offset_kernel.cc index ff597873..6d5c8f88 100644 --- a/ge/host_kernels/concat_offset_kernel.cc +++ b/ge/host_kernels/concat_offset_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/concat_offset_kernel.h b/ge/host_kernels/concat_offset_kernel.h index d2f9422b..b1e0958a 100755 --- a/ge/host_kernels/concat_offset_kernel.h +++ b/ge/host_kernels/concat_offset_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/concat_v2_kernel.cc b/ge/host_kernels/concat_v2_kernel.cc index a9f0da81..2ab9d23c 100644 --- a/ge/host_kernels/concat_v2_kernel.cc +++ b/ge/host_kernels/concat_v2_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/concat_v2_kernel.h b/ge/host_kernels/concat_v2_kernel.h index 90f1899b..353b7ed5 100755 --- a/ge/host_kernels/concat_v2_kernel.h +++ b/ge/host_kernels/concat_v2_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/dynamic_stitch_kernel.cc b/ge/host_kernels/dynamic_stitch_kernel.cc index d26237f4..1f2b7aa9 100644 --- a/ge/host_kernels/dynamic_stitch_kernel.cc +++ b/ge/host_kernels/dynamic_stitch_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/dynamic_stitch_kernel.h b/ge/host_kernels/dynamic_stitch_kernel.h index 2cca94e3..512c731b 100644 --- a/ge/host_kernels/dynamic_stitch_kernel.h +++ b/ge/host_kernels/dynamic_stitch_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/empty_kernel.cc b/ge/host_kernels/empty_kernel.cc index 19e938ce..8f5b1eb1 100644 --- a/ge/host_kernels/empty_kernel.cc +++ b/ge/host_kernels/empty_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/empty_kernel.h b/ge/host_kernels/empty_kernel.h index 7fd2791c..bc426048 100755 --- a/ge/host_kernels/empty_kernel.h +++ b/ge/host_kernels/empty_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/expanddims_kernel.cc b/ge/host_kernels/expanddims_kernel.cc index f304fbdb..5978955d 100644 --- a/ge/host_kernels/expanddims_kernel.cc +++ b/ge/host_kernels/expanddims_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/expanddims_kernel.h b/ge/host_kernels/expanddims_kernel.h index 77971a29..4970d89c 100755 --- a/ge/host_kernels/expanddims_kernel.h +++ b/ge/host_kernels/expanddims_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/fill_kernel.cc b/ge/host_kernels/fill_kernel.cc index 4e3d4db5..86aec04b 100644 --- a/ge/host_kernels/fill_kernel.cc +++ b/ge/host_kernels/fill_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/fill_kernel.h b/ge/host_kernels/fill_kernel.h index 1a4546f2..a1b6b4ef 100755 --- a/ge/host_kernels/fill_kernel.h +++ b/ge/host_kernels/fill_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/floordiv_kernel.cc b/ge/host_kernels/floordiv_kernel.cc index 0574ca3b..5114122c 100644 --- a/ge/host_kernels/floordiv_kernel.cc +++ b/ge/host_kernels/floordiv_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/floordiv_kernel.h b/ge/host_kernels/floordiv_kernel.h index d3dc3ff7..c8505731 100755 --- a/ge/host_kernels/floordiv_kernel.h +++ b/ge/host_kernels/floordiv_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/floormod_kernel.cc b/ge/host_kernels/floormod_kernel.cc index 31e4e19b..7ad746de 100644 --- a/ge/host_kernels/floormod_kernel.cc +++ b/ge/host_kernels/floormod_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/floormod_kernel.h b/ge/host_kernels/floormod_kernel.h index 439fc0a6..faa5c8e2 100755 --- a/ge/host_kernels/floormod_kernel.h +++ b/ge/host_kernels/floormod_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/gather_v2_kernel.cc b/ge/host_kernels/gather_v2_kernel.cc index e52b4534..7413395a 100644 --- a/ge/host_kernels/gather_v2_kernel.cc +++ b/ge/host_kernels/gather_v2_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/gather_v2_kernel.h b/ge/host_kernels/gather_v2_kernel.h index 17fcba59..0bf4e3ee 100755 --- a/ge/host_kernels/gather_v2_kernel.h +++ b/ge/host_kernels/gather_v2_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/greater_kernel.cc b/ge/host_kernels/greater_kernel.cc index a245ec8d..f23eee2f 100644 --- a/ge/host_kernels/greater_kernel.cc +++ b/ge/host_kernels/greater_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/greater_kernel.h b/ge/host_kernels/greater_kernel.h index 6f136462..84b5bc87 100755 --- a/ge/host_kernels/greater_kernel.h +++ b/ge/host_kernels/greater_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/identity_kernel.cc b/ge/host_kernels/identity_kernel.cc index 702f5c93..46063ba7 100644 --- a/ge/host_kernels/identity_kernel.cc +++ b/ge/host_kernels/identity_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #include "identity_kernel.h" #include "inc/kernel_factory.h" diff --git a/ge/host_kernels/identity_kernel.h b/ge/host_kernels/identity_kernel.h index 84cd08bb..2164d880 100644 --- a/ge/host_kernels/identity_kernel.h +++ b/ge/host_kernels/identity_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef GE_GRAPH_PASSES_FOLDING_KERNEL_IDENTITY_KERNEL_H_ #define GE_GRAPH_PASSES_FOLDING_KERNEL_IDENTITY_KERNEL_H_ diff --git a/ge/host_kernels/kernel_utils.cc b/ge/host_kernels/kernel_utils.cc index 595f9517..e5b0a017 100755 --- a/ge/host_kernels/kernel_utils.cc +++ b/ge/host_kernels/kernel_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/kernel_utils.h b/ge/host_kernels/kernel_utils.h index c9c90634..7214fe52 100755 --- a/ge/host_kernels/kernel_utils.h +++ b/ge/host_kernels/kernel_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/maximum_kernel.cc b/ge/host_kernels/maximum_kernel.cc index 2ced113f..aca4ec2b 100644 --- a/ge/host_kernels/maximum_kernel.cc +++ b/ge/host_kernels/maximum_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/maximum_kernel.h b/ge/host_kernels/maximum_kernel.h index d7e69f59..feaa91e7 100755 --- a/ge/host_kernels/maximum_kernel.h +++ b/ge/host_kernels/maximum_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/mul_kernel.cc b/ge/host_kernels/mul_kernel.cc index b01a5c79..8dbe83a5 100644 --- a/ge/host_kernels/mul_kernel.cc +++ b/ge/host_kernels/mul_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/mul_kernel.h b/ge/host_kernels/mul_kernel.h index 2d06f676..e7c74c41 100755 --- a/ge/host_kernels/mul_kernel.h +++ b/ge/host_kernels/mul_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/pack_kernel.cc b/ge/host_kernels/pack_kernel.cc index 476005ef..e8094709 100644 --- a/ge/host_kernels/pack_kernel.cc +++ b/ge/host_kernels/pack_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "host_kernels/pack_kernel.h" #include diff --git a/ge/host_kernels/pack_kernel.h b/ge/host_kernels/pack_kernel.h index 87b77a66..b32e3fae 100755 --- a/ge/host_kernels/pack_kernel.h +++ b/ge/host_kernels/pack_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_FOLDING_KERNEL_PACK_KERNEL_H_ #define GE_GRAPH_PASSES_FOLDING_KERNEL_PACK_KERNEL_H_ diff --git a/ge/host_kernels/permute_kernel.cc b/ge/host_kernels/permute_kernel.cc index 327c94f8..d3f07577 100755 --- a/ge/host_kernels/permute_kernel.cc +++ b/ge/host_kernels/permute_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/permute_kernel.h b/ge/host_kernels/permute_kernel.h index 589ea49e..b022abd7 100755 --- a/ge/host_kernels/permute_kernel.h +++ b/ge/host_kernels/permute_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/range_kernel.cc b/ge/host_kernels/range_kernel.cc index 32a72b47..4ce3725d 100644 --- a/ge/host_kernels/range_kernel.cc +++ b/ge/host_kernels/range_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/range_kernel.h b/ge/host_kernels/range_kernel.h index e58530d0..50b1c232 100755 --- a/ge/host_kernels/range_kernel.h +++ b/ge/host_kernels/range_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/rank_kernel.cc b/ge/host_kernels/rank_kernel.cc index 1de9478c..1d93418c 100755 --- a/ge/host_kernels/rank_kernel.cc +++ b/ge/host_kernels/rank_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/rank_kernel.h b/ge/host_kernels/rank_kernel.h index 80c0bb7d..0de4960c 100755 --- a/ge/host_kernels/rank_kernel.h +++ b/ge/host_kernels/rank_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/reduce_prod_kernel.cc b/ge/host_kernels/reduce_prod_kernel.cc index 4837a921..ec95f28f 100644 --- a/ge/host_kernels/reduce_prod_kernel.cc +++ b/ge/host_kernels/reduce_prod_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/reduce_prod_kernel.h b/ge/host_kernels/reduce_prod_kernel.h index ccf33668..326dd2f5 100755 --- a/ge/host_kernels/reduce_prod_kernel.h +++ b/ge/host_kernels/reduce_prod_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/reformat_kernel.cc b/ge/host_kernels/reformat_kernel.cc index c1942983..46269c09 100644 --- a/ge/host_kernels/reformat_kernel.cc +++ b/ge/host_kernels/reformat_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/reformat_kernel.h b/ge/host_kernels/reformat_kernel.h index 770b90b3..e3d49acf 100755 --- a/ge/host_kernels/reformat_kernel.h +++ b/ge/host_kernels/reformat_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/reshape_kernel.cc b/ge/host_kernels/reshape_kernel.cc index 7c4f58f6..dc7e4bb8 100644 --- a/ge/host_kernels/reshape_kernel.cc +++ b/ge/host_kernels/reshape_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/reshape_kernel.h b/ge/host_kernels/reshape_kernel.h index 37b12db9..c0100e51 100755 --- a/ge/host_kernels/reshape_kernel.h +++ b/ge/host_kernels/reshape_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/rsqrt_kernel.cc b/ge/host_kernels/rsqrt_kernel.cc index 74c78787..93a96e14 100755 --- a/ge/host_kernels/rsqrt_kernel.cc +++ b/ge/host_kernels/rsqrt_kernel.cc @@ -1,5 +1,5 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "host_kernels/rsqrt_kernel.h" #include diff --git a/ge/host_kernels/rsqrt_kernel.h b/ge/host_kernels/rsqrt_kernel.h index e3733521..51ab628d 100755 --- a/ge/host_kernels/rsqrt_kernel.h +++ b/ge/host_kernels/rsqrt_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/shape_kernel.cc b/ge/host_kernels/shape_kernel.cc index ecb0e082..d4069fb0 100644 --- a/ge/host_kernels/shape_kernel.cc +++ b/ge/host_kernels/shape_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/shape_kernel.h b/ge/host_kernels/shape_kernel.h index 6ef416bf..8e8791e5 100755 --- a/ge/host_kernels/shape_kernel.h +++ b/ge/host_kernels/shape_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/shape_n_kernel.cc b/ge/host_kernels/shape_n_kernel.cc index 67d2eeff..ee02cccf 100644 --- a/ge/host_kernels/shape_n_kernel.cc +++ b/ge/host_kernels/shape_n_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/shape_n_kernel.h b/ge/host_kernels/shape_n_kernel.h index 51fd9393..55829a39 100755 --- a/ge/host_kernels/shape_n_kernel.h +++ b/ge/host_kernels/shape_n_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/size_kernel.cc b/ge/host_kernels/size_kernel.cc index caa5febc..65bb21fc 100644 --- a/ge/host_kernels/size_kernel.cc +++ b/ge/host_kernels/size_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/size_kernel.h b/ge/host_kernels/size_kernel.h index 43a00f2f..3a309bc7 100755 --- a/ge/host_kernels/size_kernel.h +++ b/ge/host_kernels/size_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/slice_d_kernel.cc b/ge/host_kernels/slice_d_kernel.cc index b8572290..3fb9eb93 100644 --- a/ge/host_kernels/slice_d_kernel.cc +++ b/ge/host_kernels/slice_d_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/slice_d_kernel.h b/ge/host_kernels/slice_d_kernel.h index 751b6076..90ef9b8b 100755 --- a/ge/host_kernels/slice_d_kernel.h +++ b/ge/host_kernels/slice_d_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/slice_kernel.cc b/ge/host_kernels/slice_kernel.cc index fc98e8a5..5f72fc49 100644 --- a/ge/host_kernels/slice_kernel.cc +++ b/ge/host_kernels/slice_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/slice_kernel.h b/ge/host_kernels/slice_kernel.h index 4c059b18..1a374096 100755 --- a/ge/host_kernels/slice_kernel.h +++ b/ge/host_kernels/slice_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/squeeze_kernel.cc b/ge/host_kernels/squeeze_kernel.cc index 4a2c6725..4f730e94 100644 --- a/ge/host_kernels/squeeze_kernel.cc +++ b/ge/host_kernels/squeeze_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/squeeze_kernel.h b/ge/host_kernels/squeeze_kernel.h index 6d4c15da..89fdf99b 100755 --- a/ge/host_kernels/squeeze_kernel.h +++ b/ge/host_kernels/squeeze_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/ssd_prior_box_kernel.cc b/ge/host_kernels/ssd_prior_box_kernel.cc index b3a0fc3e..58db7654 100644 --- a/ge/host_kernels/ssd_prior_box_kernel.cc +++ b/ge/host_kernels/ssd_prior_box_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/ssd_prior_box_kernel.h b/ge/host_kernels/ssd_prior_box_kernel.h index 0ebf221d..96de2b85 100755 --- a/ge/host_kernels/ssd_prior_box_kernel.h +++ b/ge/host_kernels/ssd_prior_box_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/strided_slice_kernel.cc b/ge/host_kernels/strided_slice_kernel.cc index e8fb658a..7024c6b1 100644 --- a/ge/host_kernels/strided_slice_kernel.cc +++ b/ge/host_kernels/strided_slice_kernel.cc @@ -1,28 +1,22 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #include "host_kernels/strided_slice_kernel.h" - -#include #include "common/fp16_t.h" -#include "common/ge_inner_error_codes.h" #include "common/math/math_util.h" -#include "common/op/ge_op_utils.h" -#include "external/graph/types.h" -#include "framework/common/debug/ge_log.h" #include "framework/common/types.h" #include "graph/utils/type_utils.h" #include "host_kernels/kernel_utils.h" @@ -37,16 +31,16 @@ const size_t kStridedSliceBeginIndex = 1; const size_t kStridedSliceEndIndex = 2; const size_t kStridedSliceStrideIndex = 3; const int32_t kDefaultStrideSize = 1; +const uint32_t kMaskBitLeftUnit = 1; const std::set kIndexNumberType = {DT_INT32, DT_INT64}; -bool IsEllipsisMaskValid(const GeTensorDescPtr &input_desc, const int ellipsis_mask) { +bool IsEllipsisMaskValid(const GeTensorDescPtr &input_desc, const uint32_t ellipsis_mask) { if (ellipsis_mask != 0) { auto ellipsis_num = 0; auto input_shape = input_desc->GetShape(); - bool ellipsis_mask_flag = false; - for (size_t i = 0; i < input_shape.GetDimNum(); i++) { - uint32_t i_temp = static_cast(i); - ellipsis_mask_flag = (static_cast(ellipsis_mask) & (1 << i_temp)); + for (size_t i = 0; i < input_shape.GetDimNum(); ++i) { + auto i_temp = static_cast(i); + bool ellipsis_mask_flag = (ellipsis_mask) & (kMaskBitLeftUnit << i_temp); if (ellipsis_mask_flag) { ++ellipsis_num; } @@ -58,6 +52,35 @@ bool IsEllipsisMaskValid(const GeTensorDescPtr &input_desc, const int ellipsis_m } return true; } + +void GetOriginStrideVec(const std::vector &input, vector &orig_begin_vec, + vector &orig_end_vec, vector &orig_stride_vec) { + ConstGeTensorPtr begin_tensor = input[kStridedSliceBeginIndex]; + ConstGeTensorPtr end_tensor = input[kStridedSliceEndIndex]; + ConstGeTensorPtr stride_tensor = input[kStridedSliceStrideIndex]; + + auto data_type = begin_tensor->GetTensorDesc().GetDataType(); + size_t vec_size = begin_tensor->GetData().size() / GetSizeByDataType(data_type); + if (data_type == DT_INT32) { + const int32_t *begin = reinterpret_cast(begin_tensor->GetData().data()); + const int32_t *end = reinterpret_cast(end_tensor->GetData().data()); + const int32_t *stride = reinterpret_cast(stride_tensor->GetData().data()); + for (size_t i = 0; i < vec_size; ++i) { + orig_begin_vec.emplace_back(begin[i]); + orig_end_vec.emplace_back(end[i]); + orig_stride_vec.emplace_back(stride[i]); + } + } else { + const int64_t *begin = reinterpret_cast(begin_tensor->GetData().data()); + const int64_t *end = reinterpret_cast(end_tensor->GetData().data()); + const int64_t *stride = reinterpret_cast(stride_tensor->GetData().data()); + for (size_t i = 0; i < vec_size; ++i) { + orig_begin_vec.emplace_back(begin[i]); + orig_end_vec.emplace_back(end[i]); + orig_stride_vec.emplace_back(stride[i]); + } + } +} } // namespace Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector &input, vector &v_output) { @@ -134,7 +157,7 @@ Status StridedSliceKernel::CheckAndGetAttr(const OpDescPtr &attr) { } return SUCCESS; } -Status StridedSliceKernel::CheckInputParam(const std::vector &input) const { +Status StridedSliceKernel::CheckInputParam(const std::vector &input) { if (input.size() != kStridedSliceInputSize) { GELOGE(PARAM_INVALID, "The number of input for strided slice must be %zu.", kStridedSliceInputSize); return PARAM_INVALID; @@ -171,9 +194,9 @@ Status StridedSliceKernel::CheckInputParam(const std::vector & return PARAM_INVALID; } size_t weight0_size = weight0->GetData().size() / x_data_size; - size_t begin_data_size = begin_tensor->GetData().size() / sizeof(int32_t); - size_t end_data_size = end_tensor->GetData().size() / sizeof(int32_t); - size_t stride_data_size = stride_tensor->GetData().size() / sizeof(int32_t); + size_t begin_data_size = begin_tensor->GetData().size(); + size_t end_data_size = end_tensor->GetData().size(); + size_t stride_data_size = stride_tensor->GetData().size(); if ((weight0_size == 0) || (begin_data_size == 0) || (end_data_size == 0) || (stride_data_size == 0)) { GELOGW("Data size of inputs is 0."); return PARAM_INVALID; @@ -183,7 +206,6 @@ Status StridedSliceKernel::CheckInputParam(const std::vector & GELOGW("The sizes of begin, end and stride is not supported."); return PARAM_INVALID; } - return SUCCESS; } @@ -192,8 +214,6 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector &output_dims, std::vector &stride_vec) { ConstGeTensorPtr weight0 = input[kStridedSliceInputIndex]; ConstGeTensorPtr begin_tensor = input[kStridedSliceBeginIndex]; - ConstGeTensorPtr end_tensor = input[kStridedSliceEndIndex]; - ConstGeTensorPtr stride_tensor = input[kStridedSliceStrideIndex]; const GeShape x_shape = weight0->GetTensorDesc().GetShape(); auto x_dims = x_shape.GetDims(); @@ -201,15 +221,13 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector(begin_tensor->GetData().data()); - const int32_t *end = reinterpret_cast(end_tensor->GetData().data()); - const int32_t *stride = reinterpret_cast(stride_tensor->GetData().data()); - auto begin_dim_num = begin_tensor->GetData().size() / sizeof(int32_t); + vector orig_begin_vec, orig_end_vec, orig_stride_vec; + GetOriginStrideVec(input, orig_begin_vec, orig_end_vec, orig_stride_vec); + auto begin_dim_num = orig_begin_vec.size(); auto min_dim = x_dims_num > begin_dim_num ? begin_dim_num : x_dims_num; for (size_t i = 0; i < x_dims.size(); ++i) { - auto i_temp = static_cast(i); - bool new_axis_mask_flag = - (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK)) & (1 << i_temp)); + auto i_temp = static_cast(i); + bool new_axis_mask_flag = (attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK) & (kMaskBitLeftUnit << i_temp)); if (new_axis_mask_flag) { output_dims.push_back(1); input_dims.push_back(1); @@ -222,9 +240,9 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector &x_dims) { auto begin_data_type_size = GetSizeByDataType(begin_tensor->GetTensorDesc().GetDataType()); size_t begin_vec_size = begin_tensor->GetData().size() / begin_data_type_size; auto final_dim_num = x_dims_num < begin_vec_size ? begin_vec_size : x_dims_num; for (size_t i = 0; i < final_dim_num; i++) { - auto i_temp = static_cast(i); - bool new_axis_mask_flag = - (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK)) & (1 << i_temp)); + auto i_temp = static_cast(i); + bool new_axis_mask_flag = (attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK) & (kMaskBitLeftUnit << i_temp)); if (new_axis_mask_flag) { x_dims.insert(x_dims.begin() + i, 1); } } } + Status StridedSliceKernel::MaskCal(const size_t i, int64_t &begin_i, int64_t &end_i, int64_t &dim_i) const { - uint64_t i_temp = static_cast(i); - bool begin_mask_flag = (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_BEGIN_MASK)) & (1 << i_temp)); - bool end_mask_flag = (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_END_MASK)) & (1 << i_temp)); - bool ellipsis_mask_flag = - (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_ELLIPSIS_MASK)) & (1 << i_temp)); - bool shrink_mask_flag = - (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK)) & (1 << i_temp)); + auto i_temp = static_cast(i); + bool begin_mask_flag = (attr_value_map_.at(STRIDE_SLICE_ATTR_BEGIN_MASK) & (kMaskBitLeftUnit << i_temp)); + bool end_mask_flag = (attr_value_map_.at(STRIDE_SLICE_ATTR_END_MASK) & (kMaskBitLeftUnit << i_temp)); + bool ellipsis_mask_flag = (attr_value_map_.at(STRIDE_SLICE_ATTR_ELLIPSIS_MASK) & (kMaskBitLeftUnit << i_temp)); + bool shrink_mask_flag = (attr_value_map_.at(STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK) & (kMaskBitLeftUnit << i_temp)); if (shrink_mask_flag) { begin_i = (begin_i < 0 ? (dim_i + begin_i) : begin_i); FMK_INT32_ADDCHECK(begin_i, kNumOne) @@ -292,8 +309,9 @@ Status StridedSliceKernel::MaskCal(const size_t i, int64_t &begin_i, int64_t &en } return SUCCESS; } + Status StridedSliceKernel::StrideCal(const int64_t x_dims_i, int64_t &begin_i, int64_t &end_i, int64_t &stride_i, - int64_t &dim_final) const { + int64_t &dim_final) { if (stride_i == 0) { stride_i = kDefaultStrideSize; } else if (stride_i < 0) { @@ -313,15 +331,17 @@ Status StridedSliceKernel::StrideCal(const int64_t x_dims_i, int64_t &begin_i, i } return SUCCESS; } + void StridedSliceKernel::GetOutputDims(uint32_t dims_size, const std::vector &output_dims, vector &v_dims) { for (uint32_t k = 0; k < dims_size; k++) { - bool shrink_mask_i = (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK)) & (1 << k)); + bool shrink_mask_i = (attr_value_map_.at(STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK) & (kMaskBitLeftUnit << k)); if (shrink_mask_i) { continue; } v_dims.push_back(output_dims[k]); } } + REGISTER_KERNEL(STRIDEDSLICE, StridedSliceKernel); } // namespace ge diff --git a/ge/host_kernels/strided_slice_kernel.h b/ge/host_kernels/strided_slice_kernel.h index b8d11477..7ac6dbf3 100755 --- a/ge/host_kernels/strided_slice_kernel.h +++ b/ge/host_kernels/strided_slice_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,13 +28,13 @@ class StridedSliceKernel : public Kernel { private: Status CheckAndGetAttr(const OpDescPtr &attr); - Status CheckInputParam(const std::vector &input) const; + static Status CheckInputParam(const std::vector &input) ; Status InitParamWithAttrs(const std::vector &input, std::vector &input_dims, std::vector &begin_vec, std::vector &output_dims, std::vector &stride_vec); Status MaskCal(const size_t i, int64_t &begin_i, int64_t &end_i, int64_t &dim_i) const; - Status StrideCal(const int64_t x_dims_i, int64_t &begin_i, int64_t &end_i, int64_t &stride_i, - int64_t &dim_final) const; + static Status StrideCal(const int64_t x_dims_i, int64_t &begin_i, int64_t &end_i, int64_t &stride_i, + int64_t &dim_final) ; void ExpandDimsWithNewAxis(const ConstGeTensorPtr &begin_tensor, const size_t x_dims_num, vector &x_dims); void GetOutputDims(uint32_t dims_size, const std::vector &output_dims, vector &v_dims); diff --git a/ge/host_kernels/sub_kernel.cc b/ge/host_kernels/sub_kernel.cc index deb36cb3..70a14c9f 100644 --- a/ge/host_kernels/sub_kernel.cc +++ b/ge/host_kernels/sub_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/sub_kernel.h b/ge/host_kernels/sub_kernel.h index 32ab7084..44744229 100755 --- a/ge/host_kernels/sub_kernel.h +++ b/ge/host_kernels/sub_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/transdata_kernel.cc b/ge/host_kernels/transdata_kernel.cc index 2b16b075..aeb9e338 100644 --- a/ge/host_kernels/transdata_kernel.cc +++ b/ge/host_kernels/transdata_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/transdata_kernel.h b/ge/host_kernels/transdata_kernel.h index 1d212cf5..e4cf9b39 100755 --- a/ge/host_kernels/transdata_kernel.h +++ b/ge/host_kernels/transdata_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/transpose_kernel.cc b/ge/host_kernels/transpose_kernel.cc index 03d112aa..5cbd9032 100755 --- a/ge/host_kernels/transpose_kernel.cc +++ b/ge/host_kernels/transpose_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/transpose_kernel.h b/ge/host_kernels/transpose_kernel.h index 9e7c54d7..bb073c15 100755 --- a/ge/host_kernels/transpose_kernel.h +++ b/ge/host_kernels/transpose_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/unpack_kernel.cc b/ge/host_kernels/unpack_kernel.cc index 1c28151f..9e8ccb65 100755 --- a/ge/host_kernels/unpack_kernel.cc +++ b/ge/host_kernels/unpack_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/unpack_kernel.h b/ge/host_kernels/unpack_kernel.h index f20c0d1d..8cfe9bd4 100755 --- a/ge/host_kernels/unpack_kernel.h +++ b/ge/host_kernels/unpack_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/unsqueeze_kernel.cc b/ge/host_kernels/unsqueeze_kernel.cc index 4ceaba3f..d66a3e2c 100644 --- a/ge/host_kernels/unsqueeze_kernel.cc +++ b/ge/host_kernels/unsqueeze_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/unsqueeze_kernel.h b/ge/host_kernels/unsqueeze_kernel.h index 510a1ffa..c676586f 100644 --- a/ge/host_kernels/unsqueeze_kernel.h +++ b/ge/host_kernels/unsqueeze_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index 6e93b7e4..d4652a91 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -18,6 +18,7 @@ #include "graph/load/new_model_manager/model_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" +#include "graph/ge_context.h" #include "omm/csa_interact.h" namespace ge { @@ -58,6 +59,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr &lis run_flag_ = true; listener_ = listener; future_ = std::async([&]() -> Status { + GetContext().SetSessionId(executor_->GetContext()->session_id); return RunInternal(); }); diff --git a/ge/hybrid/executor/subgraph_context.cc b/ge/hybrid/executor/subgraph_context.cc index 923c2aa3..b61da395 100644 --- a/ge/hybrid/executor/subgraph_context.cc +++ b/ge/hybrid/executor/subgraph_context.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/executor/subgraph_context.h b/ge/hybrid/executor/subgraph_context.h index b86765f7..48e78292 100644 --- a/ge/hybrid/executor/subgraph_context.h +++ b/ge/hybrid/executor/subgraph_context.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index ee5775f5..573e405e 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -15,6 +15,7 @@ */ #include "hybrid/executor/subgraph_executor.h" +#include "graph/ge_context.h" #include "hybrid/executor/worker/task_compile_engine.h" #include "hybrid/executor/worker/execution_engine.h" #include "hybrid/node_executor/node_executor.h" @@ -220,6 +221,7 @@ Status SubgraphExecutor::PrepareNodes() { // only do shape inference and compilation for nodes with dynamic shapes. if (node_item.is_dynamic) { auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status { + GetContext().SetSessionId(context_->session_id); GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state)); return PrepareForExecution(context_, *p_node_state); }); @@ -306,6 +308,7 @@ Status SubgraphExecutor::LaunchTasks() { Status SubgraphExecutor::ScheduleTasks() { GELOGD("[%s] Start to schedule prepare workers.", graph_item_->GetName().c_str()); auto prepare_future = std::async([&]() -> Status { + GetContext().SetSessionId(context_->session_id); auto ret = PrepareNodes(); ready_queue_.Push(nullptr); return ret; diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index 11311968..a3e1efb5 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -91,8 +91,8 @@ class HybridModel { GeRootModelPtr ge_root_model_; std::map input_nodes_; std::map constant_op_nodes_; - std::map device_variable_nodes_; //lint !e148 - std::map host_variable_nodes_; //lint !e148 + std::map device_variable_nodes_; + std::map host_variable_nodes_; std::map> variable_tensors_; std::map> task_defs_; std::map known_shape_sub_models_; diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc index a740aa7d..4a019487 100644 --- a/ge/hybrid/model/node_item.cc +++ b/ge/hybrid/model/node_item.cc @@ -116,10 +116,8 @@ Status NodeItem::Init() { (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); shape_inference_type = static_cast(unknown_shape_type_val); - bool test_is_dynamic = false; - NodeUtils::GetNodeUnknownShapeStatus(*node, test_is_dynamic); (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); - GELOGI("node name = %s, is_dynamic = %d, test_is_dynamic = %d", this->node_name.c_str(), is_dynamic, test_is_dynamic); + GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); if (!is_dynamic) { GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic), "[%s] Failed to get shape status.", diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index 3c4065ea..675f721c 100755 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h index fb1966b4..1d76fde9 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/controlop/control_op_executor.cc b/ge/hybrid/node_executor/controlop/control_op_executor.cc index 5f9dde2a..21ef1f43 100644 --- a/ge/hybrid/node_executor/controlop/control_op_executor.cc +++ b/ge/hybrid/node_executor/controlop/control_op_executor.cc @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "control_op_executor.h" #include "graph/utils/node_utils.h" #include "graph/utils/type_utils.h" diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index 0d6f52e8..1d5c6405 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.h b/ge/hybrid/node_executor/hccl/hccl_node_executor.h index 8aecc3ad..ddf6eb3a 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.h +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc index 3bf71013..71b98ff6 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.h index bfa24325..1adfcc18 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel/kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/kernel.h index 0a9f32b7..def21032 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc index ff5a7c6d..47e6e534 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.h index 6677ce4a..d2c9eaec 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc index 37b07e37..378de8a3 100755 --- a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h index 30557064..bd714e81 100755 --- a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc index 2a836458..db5c0f9c 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.h index f20d6221..ad477178 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel_factory.cc b/ge/hybrid/node_executor/host_cpu/kernel_factory.cc index aabae999..83899fa6 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel_factory.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel_factory.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel_factory.h b/ge/hybrid/node_executor/host_cpu/kernel_factory.h index d03f12fc..23b74fdd 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel_factory.h +++ b/ge/hybrid/node_executor/host_cpu/kernel_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/inc/graph_pass.h b/ge/inc/graph_pass.h index a8732cb4..642b94ea 100644 --- a/ge/inc/graph_pass.h +++ b/ge/inc/graph_pass.h @@ -1,93 +1,93 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_INC_GRAPH_PASS_H_ -#define GE_INC_GRAPH_PASS_H_ - -#include -#include - -#include "common/op/attr_value_util.h" -#include "common/op/ge_op_utils.h" -#include "common/types.h" -#include "framework/common/debug/ge_log.h" -#include "graph/compute_graph.h" -#include "graph/utils/attr_utils.h" -#include "graph/utils/graph_utils.h" -#include "inc/pass.h" - -namespace ge { -/// -/// @ingroup domi_omg -/// @brief graph pass -/// @author -/// -class GraphPass : public Pass { - public: - /// - /// run graph pass - /// @param [in] graph graph to be optimized - /// @return SUCCESS optimize successfully - /// @return NOT_CHANGED not optimized - /// @return others optimized failed - /// @author - /// - virtual Status Run(ge::ComputeGraphPtr graph) = 0; - virtual Status ClearStatus() { return SUCCESS; }; - static void RecordOriginalNames(std::vector original_nodes, const ge::NodePtr &node) { - GE_CHECK_NOTNULL_JUST_RETURN(node); - std::vector original_names; - for (ge::NodePtr &node_tmp : original_nodes) { - std::vector names_tmp; - ge::OpDescPtr opdesc_tmp = node_tmp->GetOpDesc(); - GE_CHECK_NOTNULL_JUST_RETURN(opdesc_tmp); - Status ret = ge::AttrUtils::GetListStr(opdesc_tmp, "_datadump_original_op_names", names_tmp); - if (ret != domi::SUCCESS) { - GELOGW("get the original_op_names fail."); - } - if (names_tmp.size() != 0) { - original_names.insert(original_names.end(), names_tmp.begin(), names_tmp.end()); - } else { - original_names.push_back(opdesc_tmp->GetName()); - } - } - - if (original_names.size() == 0) { - std::string tmp; - original_names.push_back(tmp); - } - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(node->GetOpDesc(), "_datadump_original_op_names", original_names), - return, "Set original_op_names fail."); - } - - static bool IsConstNode(const ge::NodePtr &node) { - GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, GELOGE(FAILED, "Node GetOpDesc is nullptr"); return false); - if (node->GetOpDesc()->GetType() == CONSTANTOP) { - return true; - } else if (node->GetOpDesc()->GetType() == FRAMEWORKOP) { - string type; - GE_CHK_BOOL_EXEC(ge::AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type), - return false, "Get original_type for op %s fail!", node->GetName().c_str()); - GE_IF_BOOL_EXEC(type == CONSTANT, GELOGI("Is const op"); return true); - return false; - } else { - return false; - } - } -}; -} // namespace ge - -#endif // GE_INC_GRAPH_PASS_H_ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_INC_GRAPH_PASS_H_ +#define GE_INC_GRAPH_PASS_H_ + +#include +#include + +#include "common/op/attr_value_util.h" +#include "common/op/ge_op_utils.h" +#include "common/types.h" +#include "framework/common/debug/ge_log.h" +#include "graph/compute_graph.h" +#include "graph/utils/attr_utils.h" +#include "graph/utils/graph_utils.h" +#include "inc/pass.h" + +namespace ge { +/// +/// @ingroup domi_omg +/// @brief graph pass +/// @author +/// +class GraphPass : public Pass { + public: + /// + /// run graph pass + /// @param [in] graph graph to be optimized + /// @return SUCCESS optimize successfully + /// @return NOT_CHANGED not optimized + /// @return others optimized failed + /// @author + /// + virtual Status Run(ge::ComputeGraphPtr graph) = 0; + virtual Status ClearStatus() { return SUCCESS; }; + static void RecordOriginalNames(std::vector original_nodes, const ge::NodePtr &node) { + GE_CHECK_NOTNULL_JUST_RETURN(node); + std::vector original_names; + for (ge::NodePtr &node_tmp : original_nodes) { + std::vector names_tmp; + ge::OpDescPtr opdesc_tmp = node_tmp->GetOpDesc(); + GE_CHECK_NOTNULL_JUST_RETURN(opdesc_tmp); + Status ret = ge::AttrUtils::GetListStr(opdesc_tmp, "_datadump_original_op_names", names_tmp); + if (ret != domi::SUCCESS) { + GELOGW("get the original_op_names fail."); + } + if (names_tmp.size() != 0) { + original_names.insert(original_names.end(), names_tmp.begin(), names_tmp.end()); + } else { + original_names.push_back(opdesc_tmp->GetName()); + } + } + + if (original_names.size() == 0) { + std::string tmp; + original_names.push_back(tmp); + } + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(node->GetOpDesc(), "_datadump_original_op_names", original_names), + return, "Set original_op_names fail."); + } + + static bool IsConstNode(const ge::NodePtr &node) { + GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, GELOGE(FAILED, "Node GetOpDesc is nullptr"); return false); + if (node->GetOpDesc()->GetType() == CONSTANTOP) { + return true; + } else if (node->GetOpDesc()->GetType() == FRAMEWORKOP) { + string type; + GE_CHK_BOOL_EXEC(ge::AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type), + return false, "Get original_type for op %s fail!", node->GetName().c_str()); + GE_IF_BOOL_EXEC(type == CONSTANT, GELOGI("Is const op"); return true); + return false; + } else { + return false; + } + } +}; +} // namespace ge + +#endif // GE_INC_GRAPH_PASS_H_ diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index fda21f63..404b99ee 100755 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -60,6 +60,8 @@ static std::shared_ptr instancePtr_ = nullptr; // Initial each module of GE, if one failed, release all Status GELib::Initialize(const map &options) { + + GELOGI("initial start"); GEEVENT("[GEPERFTRACE] GE Init Start"); // Multiple initializations are not allowed @@ -166,8 +168,10 @@ Status GELib::SystemInitialize(const map &options) { } } - // In train and infer, profiling is always needed. InitOptions(options); + + // In train and infer, profiling is always needed. + InitProfiling(this->options_); auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); GE_IF_BOOL_EXEC(model_manager->EnableExceptionDump(options) != SUCCESS, @@ -177,21 +181,19 @@ Status GELib::SystemInitialize(const map &options) { // 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer // these two case with logical device id if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { - InitProfiling(this->options_, true); status = InitSystemWithOptions(this->options_); } else { - InitProfiling(this->options_); status = InitSystemWithoutOptions(); } return status; } -void GELib::InitProfiling(Options &options, bool convert_2_phy_device_id) { +void GELib::InitProfiling(Options &options) { GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id); std::lock_guard lock(status_mutex_); GetContext().Init(); // Profiling init - if (ProfilingManager::Instance().Init(options, convert_2_phy_device_id) != SUCCESS) { + if (ProfilingManager::Instance().Init(options) != SUCCESS) { GELOGW("Profiling init failed."); } } diff --git a/ge/init/gelib.h b/ge/init/gelib.h index cefbaa50..e1200881 100644 --- a/ge/init/gelib.h +++ b/ge/init/gelib.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,7 +68,7 @@ class GELib { // get incre build cache path const std::string &GetIncreBuildCachePath() const { return incre_build_cache_path_; } - void InitProfiling(Options &options, bool convert_2_phy_device_id = false); + void InitProfiling(Options &options); void ShutDownProfiling(); Status InitSystemWithoutOptions(); diff --git a/ge/ir_build/atc_ir_common.cc b/ge/ir_build/atc_ir_common.cc index e4bfe978..f14f475e 100755 --- a/ge/ir_build/atc_ir_common.cc +++ b/ge/ir_build/atc_ir_common.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "atc_ir_common.h" #include "common/util/error_manager/error_manager.h" #include "external/ge/ge_api_types.h" @@ -504,7 +505,7 @@ void PrintOptionMap(std::map &options, std::string tip for (auto iter = options.begin(); iter != options.end(); iter++) { std::string key = iter->first; std::string option_name = iter->second; - GELOGI("%s set successfully, key=%s, value=%s", tips.c_str(), key.c_str(), option_name.c_str()); + GELOGI("%s set successfully, option_key=%s, option_value=%s", tips.c_str(), key.c_str(), option_name.c_str()); } } diff --git a/ge/ir_build/atc_ir_common.h b/ge/ir_build/atc_ir_common.h index 47361167..1ab4b9fa 100644 --- a/ge/ir_build/atc_ir_common.h +++ b/ge/ir_build/atc_ir_common.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef FRAMEWORK_DOMI_ATC_IR_COMMON_H_ #define FRAMEWORK_DOMI_ATC_IR_COMMON_H_ diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 544bcc21..e6f20445 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "external/ge/ge_ir_build.h" #include @@ -99,6 +100,13 @@ static graphStatus CheckGlobalOptions(std::map &global return ge::GRAPH_PARAM_INVALID, "check optypelist_for_implmode and op_select_implmode failed!"); global_options[ge::ir_option::OP_SELECT_IMPL_MODE] = op_select_implmode; + // set precision mode default value + std::string precision_mode = global_options.find(ge::ir_option::PRECISION_MODE) == + global_options.end() + ? "force_fp16" + : global_options[ge::ir_option::PRECISION_MODE]; + global_options[ge::ir_option::PRECISION_MODE] = precision_mode; + return GRAPH_SUCCESS; } @@ -291,7 +299,7 @@ graphStatus Impl::Init(const std::map &options) { } void Impl::SetRtSocVersion() { - auto &global_options = GetMutableGlobalOptions(); + const auto &global_options = GetMutableGlobalOptions(); auto it = global_options.find(ge::SOC_VERSION); if (it != global_options.end()) { const char *soc_version = it->second.c_str(); diff --git a/ge/model/ge_model.cc b/ge/model/ge_model.cc index eb6ca158..acaeff0d 100755 --- a/ge/model/ge_model.cc +++ b/ge/model/ge_model.cc @@ -23,6 +23,7 @@ namespace ge { void GeModel::Init() { (void)AttrUtils::SetInt(this, ATTR_MODEL_MEMORY_SIZE, 0); + (void)AttrUtils::SetInt(this, ATTR_MODEL_P2P_MEMORY_SIZE, 0); (void)AttrUtils::SetInt(this, ATTR_MODEL_STREAM_NUM, 0); (void)AttrUtils::SetInt(this, ATTR_MODEL_EVENT_NUM, 0); (void)AttrUtils::SetInt(this, ATTR_MODEL_LABEL_NUM, 0); diff --git a/ge/model/ge_model.h b/ge/model/ge_model.h index 5676c3b6..beb054ba 100755 --- a/ge/model/ge_model.h +++ b/ge/model/ge_model.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/model/ge_root_model.cc b/ge/model/ge_root_model.cc index 68f868dd..aee119fa 100644 --- a/ge/model/ge_root_model.cc +++ b/ge/model/ge_root_model.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/model/ge_root_model.h b/ge/model/ge_root_model.h index 53174064..f4a120bb 100755 --- a/ge/model/ge_root_model.h +++ b/ge/model/ge_root_model.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include #include "graph/compute_graph.h" #include "model/ge_model.h" diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.cc b/ge/opskernel_manager/ops_kernel_builder_manager.cc new file mode 100644 index 00000000..5ebc112d --- /dev/null +++ b/ge/opskernel_manager/ops_kernel_builder_manager.cc @@ -0,0 +1,165 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "init/gelib.h" +#include "ops_kernel_builder_manager.h" +#include "register/ops_kernel_builder_registry.h" + +namespace ge { +namespace { +const std::vector kBasicBuilderLibs = { + "libge_local_opskernel_builder.so", + "libhost_cpu_opskernel_builder.so", + "librts_kernel_builder.so", + "libaicpu_builder.so", + "libaicpu_tf_builder.so" +}; + +const std::vector kHcclBuilderLibs = { + "libhcom_opskernel_builder.so", + "libhvd_opskernel_builder.so" +}; +} // namespace +OpsKernelBuilderManager::~OpsKernelBuilderManager() { + // it's OK to call Finalize multiply times + (void) Finalize(); +} + +OpsKernelBuilderManager &OpsKernelBuilderManager::Instance() { + static OpsKernelBuilderManager instance; + return instance; +} + +Status OpsKernelBuilderManager::Initialize(const map &options) { + std::string lib_paths; + GE_CHK_STATUS_RET_NOLOG(GetLibPaths(options, lib_paths)); + GE_CHK_STATUS_RET(plugin_manager_.LoadSo(lib_paths), "Failed to load libs"); + + auto &kernel_builders = OpsKernelBuilderRegistry::GetInstance().GetAll(); + GELOGI("Number of OpBuild = %zu", kernel_builders.size()); + + for (const auto &it : kernel_builders) { + const std::string &kernel_lib_name = it.first; + GELOGI("Initialize ops kernel util for %s", kernel_lib_name.c_str()); + GE_CHECK_NOTNULL(it.second); + GE_CHK_STATUS_RET(it.second->Initialize(options), + "Failed to invoke Initialize, kernel lib name = %s", + kernel_lib_name.c_str()); + + ops_kernel_builders_.emplace(kernel_lib_name, it.second); + } + + return SUCCESS; +} + +Status OpsKernelBuilderManager::Finalize() { + OpsKernelBuilderRegistry::GetInstance().UnregisterAll(); + for (const auto &it : ops_kernel_builders_) { + const std::string &kernel_lib_name = it.first; + GELOGI("Finalize ops kernel util for %s", kernel_lib_name.c_str()); + auto ret = it.second->Finalize(); + if (ret != SUCCESS) { + GELOGW("Failed to invoke Finalize, kernel lib name = %s", + kernel_lib_name.c_str()); + } + } + + ops_kernel_builders_.clear(); + return SUCCESS; +} + +const map &OpsKernelBuilderManager::GetAllOpsKernelBuilders() const { + return ops_kernel_builders_; +} + +OpsKernelBuilderPtr OpsKernelBuilderManager::GetOpsKernelBuilder(const string &name) const { + auto it = ops_kernel_builders_.find(name); + if (it != ops_kernel_builders_.end()) { + return it->second; + } + + GELOGW("Failed to get opsKernelInfoStore object by name. OpKernelLibName is %s", name.c_str()); + return nullptr; +} + +Status OpsKernelBuilderManager::GetLibPaths(const std::map &options, std::string &lib_paths) { + GELOGD("Start to execute GetLibPaths"); + std::string path_base = PluginManager::GetPath(); + std::string so_path = "plugin/opskernel/"; + std::string path = path_base + so_path; + std::string all_lib_paths; + for (const auto &lib_name : kBasicBuilderLibs) { + all_lib_paths += (path + lib_name + ":"); + } + + auto iter = options.find(OPTION_EXEC_HCCL_FLAG); + if (iter == options.end() || iter->second != "0") { + for (const auto &lib_name : kHcclBuilderLibs) { + all_lib_paths += (path + lib_name + ":"); + } + } + + lib_paths = std::move(all_lib_paths); + GELOGI("Get lib paths by default. paths = %s", lib_paths.c_str()); + return SUCCESS; +} + +Status OpsKernelBuilderManager::CalcOpRunningParam(Node &node) const { + auto op_desc = node.GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + const std::string &lib_name = op_desc->GetOpKernelLibName(); + auto it = ops_kernel_builders_.find(lib_name); + if (it == ops_kernel_builders_.end()) { + GELOGE(INTERNAL_ERROR, + "Failed to get OpKernelStore. libName = %s, node = %s", + lib_name.c_str(), + op_desc->GetName().c_str()); + return INTERNAL_ERROR; + } + + GELOGD("To invoke CalcOpRunningParam, node = %s, lib name = %s", op_desc->GetName().c_str(), lib_name.c_str()); + GE_CHK_STATUS_RET(it->second->CalcOpRunningParam(node), + "Failed to invoke CalcOpRunningParam, libName = %s, node = %s", + lib_name.c_str(), + op_desc->GetName().c_str()); + GELOGD("Done invoking CalcOpRunningParam successfully"); + return SUCCESS; +} + +Status OpsKernelBuilderManager::GenerateTask(const Node &node, + RunContext &context, + std::vector &tasks) const { + auto op_desc = node.GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + const std::string &lib_name = op_desc->GetOpKernelLibName(); + auto it = ops_kernel_builders_.find(lib_name); + if (it == ops_kernel_builders_.end()) { + GELOGE(INTERNAL_ERROR, + "Failed to get OpKernelStore. libName = %s, node = %s", + lib_name.c_str(), + op_desc->GetName().c_str()); + return INTERNAL_ERROR; + } + + GELOGD("To invoke GenerateTask, node = %s, lib name = %s", op_desc->GetName().c_str(), lib_name.c_str()); + GE_CHK_STATUS_RET(it->second->GenerateTask(node, context, tasks), + "Failed to invoke GenerateTask, libName = %s, node = %s", + lib_name.c_str(), + op_desc->GetName().c_str()); + GELOGD("Done invoking GenerateTask successfully"); + return SUCCESS; +} +} // namespace ge \ No newline at end of file diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.h b/ge/opskernel_manager/ops_kernel_builder_manager.h new file mode 100644 index 00000000..597ddd03 --- /dev/null +++ b/ge/opskernel_manager/ops_kernel_builder_manager.h @@ -0,0 +1,57 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_OPSKERNEL_MANAGER_OPS_KERNEL_BUILDER_MANAGER_H_ +#define GE_OPSKERNEL_MANAGER_OPS_KERNEL_BUILDER_MANAGER_H_ + +#include "common/ge/plugin_manager.h" +#include "common/opskernel/ops_kernel_builder.h" +#include "external/ge/ge_api_error_codes.h" + +namespace ge { +using OpsKernelBuilderPtr = std::shared_ptr; +class OpsKernelBuilderManager { + public: + ~OpsKernelBuilderManager(); + + static OpsKernelBuilderManager& Instance(); + + // opsKernelManager initialize, load all opsKernelInfoStore and graph_optimizer + Status Initialize(const std::map &options); + + // opsKernelManager finalize, unload all opsKernelInfoStore and graph_optimizer + Status Finalize(); + + // get opsKernelIBuilder by name + OpsKernelBuilderPtr GetOpsKernelBuilder(const std::string &name) const; + + // get all opsKernelBuilders + const std::map &GetAllOpsKernelBuilders() const; + + Status CalcOpRunningParam(Node &node) const; + + Status GenerateTask(const Node &node, RunContext &context, + std::vector &tasks) const; + + private: + OpsKernelBuilderManager() = default; + static Status GetLibPaths(const std::map &options, std::string &lib_paths); + + PluginManager plugin_manager_; + std::map ops_kernel_builders_{}; +}; +} // namespace ge +#endif // GE_OPSKERNEL_MANAGER_OPS_KERNEL_BUILDER_MANAGER_H_ diff --git a/ge/opskernel_manager/ops_kernel_manager.cc b/ge/opskernel_manager/ops_kernel_manager.cc index e810b1de..12894292 100644 --- a/ge/opskernel_manager/ops_kernel_manager.cc +++ b/ge/opskernel_manager/ops_kernel_manager.cc @@ -175,25 +175,25 @@ Status OpsKernelManager::ParsePluginOptions(const map &options, } else if (flag == 1) { enable_flag = true; } else { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), + GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } } catch (std::invalid_argument &) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.", + GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.", iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } catch (std::out_of_range &) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.feFlag, its value %s is out of range, it must be 0 or 1.", + GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:ge.feFlag, its value %s is out of range, it must be 0 or 1.", iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } catch (...) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), + GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } } else { - GELOGI("Not find key %s, set to default value false.", plugin_name.c_str()); + GELOGI("Not find option_key %s, set to default value false.", plugin_name.c_str()); enable_flag = false; } diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc index 3e765fa1..afe961ba 100755 --- a/ge/session/inner_session.cc +++ b/ge/session/inner_session.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ #include "graph/ge_context.h" #include "graph/ge_global_options.h" #include "graph/ge_local_context.h" +#include "graph/common/local_context.h" #include "graph/load/new_model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/utils/tensor_adapter.h" @@ -56,7 +57,7 @@ Status CheckReuseMemoryOption(const std::map &options) { static std::mutex mutex_; // BuildGraph and RunGraph use bool InnerSession::is_dump_server_inited_ = false; InnerSession::InnerSession(uint64_t session_id, const std::map &options) - : init_flag_(false), session_id_(session_id), options_(options), graph_manager_(domi::GetContext()) {} + : init_flag_(false), session_id_(session_id), options_(options) {} Status InnerSession::Initialize() { if (init_flag_) { @@ -155,7 +156,7 @@ Status InnerSession::AddGraph(uint32_t graph_id, const Graph &graph, return GE_SESS_INIT_FAILED; } UpdateThreadContext(options); - Status ret = graph_manager_.AddGraph(graph_id, graph, options); + Status ret = graph_manager_.AddGraph(graph_id, graph, options, domi::GetContext()); if (ret != SUCCESS) { GELOGE(ret, "[InnerSession:%lu] add graph %u failed.", session_id_, graph_id); return ret; @@ -279,6 +280,7 @@ void InnerSession::UpdateThreadContext(const std::map GetThreadLocalContext().SetSessionOption(options_); GetThreadLocalContext().SetGraphOption(options); GetContext().SetSessionId(session_id_); + SetRtSocVersion(); } void InnerSession::UpdateThreadContext(uint32_t graph_id) { @@ -332,4 +334,17 @@ Status InnerSession::RemoveDumpProperties() { } return SUCCESS; } + +void InnerSession::SetRtSocVersion() { + const auto &global_options = GetMutableGlobalOptions(); + auto it = global_options.find(ge::SOC_VERSION); + if (it != global_options.end()) { + const char *soc_version = it->second.c_str(); + rtError_t rt_ret = rtSetSocVersion(soc_version); + if (rt_ret != RT_ERROR_NONE) { + GELOGW("Set soc version %s failed. ret:0x%X", soc_version, rt_ret); + } + GELOGI("Set soc version %s success.", soc_version); + } +} } // namespace ge diff --git a/ge/session/inner_session.h b/ge/session/inner_session.h index 3d1a8059..60b9577d 100644 --- a/ge/session/inner_session.h +++ b/ge/session/inner_session.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,6 +68,8 @@ class InnerSession { Status RemoveDumpProperties(); + void SetRtSocVersion(); + private: bool init_flag_; uint64_t session_id_; diff --git a/ge/session/omg.cc b/ge/session/omg.cc index e90b4635..12079791 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -427,6 +427,32 @@ Status CheckOutNode(ge::OpDescPtr op_desc, int32_t index) { } return domi::SUCCESS; } +Status GetDefaultOutInfo(ge::ComputeGraphPtr &compute_graph, + std::vector> &output_nodes_info) { + std::vector> default_out_nodes = domi::GetContext().default_out_nodes; + if (domi::GetContext().type == domi::CAFFE && !default_out_nodes.empty()) { + for (uint32_t i = 0; i < default_out_nodes.size(); ++i) { + ge::NodePtr out_node = compute_graph->FindNode(default_out_nodes[i].first); + if (out_node == nullptr) { + ErrorManager::GetInstance().ATCReportErrMessage("E10016", {"parameter", "opname"}, + {"out_nodes", default_out_nodes[i].first}); + GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", default_out_nodes[i].first.c_str()); + return domi::FAILED; + } + output_nodes_info.push_back(std::make_pair(out_node, default_out_nodes[i].second)); + GELOGD("Get default output node:%s.", out_node->GetName().c_str()); + } + return domi::SUCCESS; + } + + for (ge::NodePtr node : compute_graph->GetDirectNode()) { + if (!node->GetInAllNodes().empty() && node->GetOutAllNodes().empty()) { + Status ret = GetOutputLeaf(node, output_nodes_info); + GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail."); + } + } + return domi::SUCCESS; +} Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output) { ge::ComputeGraphPtr compute_graph = ge::GraphUtils::GetComputeGraph(graph); @@ -477,11 +503,9 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const } // default output node (leaf) if (user_out_nodes.empty()) { - for (ge::NodePtr node : compute_graph->GetDirectNode()) { - if (!node->GetInAllNodes().empty() && node->GetOutAllNodes().empty()) { - Status ret = GetOutputLeaf(node, output_nodes_info); - GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail."); - } + if (GetDefaultOutInfo(compute_graph, output_nodes_info) != SUCCESS) { + GELOGE(domi::FAILED, "Get default output info failed."); + return domi::FAILED; } } GetOutputNodesNameAndIndex(output_nodes_info, output_nodes_name); @@ -525,6 +549,7 @@ Status GetOutputLeaf(NodePtr node, std::vector> if (node->GetType() != NETOUTPUT) { for (size_t index = 0; index < size; ++index) { output_nodes_info.push_back(std::make_pair(node, index)); + GELOGD("Get output leaf node:%s.", node->GetName().c_str()); } } else { const auto in_anchors = node->GetAllInDataAnchors(); @@ -853,65 +878,66 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con uint8_t *model_data = nullptr; uint32_t model_len = 0; - - // Parse the contents of the file to get the modeldef object - ret = ModelParserBase::ParseModelContent(model, model_data, model_len); - if (ret == SUCCESS) { - OmFileLoadHelper omFileLoadHelper; - ge::graphStatus status = omFileLoadHelper.Init(model_data, model_len); - if (status != ge::GRAPH_SUCCESS) { - GELOGE(ge::FAILED, "Om file init failed."); - if (model.model_data != nullptr) { - delete[](char *) model.model_data; - model.model_data = nullptr; + try { + // Parse the contents of the file to get the modeldef object + ret = ModelParserBase::ParseModelContent(model, model_data, model_len); + if (ret == SUCCESS) { + OmFileLoadHelper omFileLoadHelper; + ge::graphStatus status = omFileLoadHelper.Init(model_data, model_len); + if (status != ge::GRAPH_SUCCESS) { + GELOGE(ge::FAILED, "Om file init failed."); + if (model.model_data != nullptr) { + delete[](char *) model.model_data; + model.model_data = nullptr; + } + return status; } - return status; - } - ModelPartition ir_part; - status = omFileLoadHelper.GetModelPartition(MODEL_DEF, ir_part); - if (status != ge::GRAPH_SUCCESS) { - GELOGE(ge::FAILED, "Get model part failed."); - if (model.model_data != nullptr) { - delete[](char *) model.model_data; - model.model_data = nullptr; + ModelPartition ir_part; + status = omFileLoadHelper.GetModelPartition(MODEL_DEF, ir_part); + if (status != ge::GRAPH_SUCCESS) { + GELOGE(ge::FAILED, "Get model part failed."); + if (model.model_data != nullptr) { + delete[](char *) model.model_data; + model.model_data = nullptr; + } + return status; } - return status; - } - ge::proto::ModelDef model_def; + ge::proto::ModelDef model_def; - // De serialization - bool flag = ReadProtoFromArray(ir_part.data, ir_part.size, &model_def); - if (flag) { - GetGroupName(model_def); + // De serialization + bool flag = ReadProtoFromArray(ir_part.data, ir_part.size, &model_def); + if (flag) { + GetGroupName(model_def); - json j; - Pb2Json::Message2Json(model_def, kOmBlackFields, j, true); + json j; + Pb2Json::Message2Json(model_def, kOmBlackFields, j, true); - ret = ModelSaver::SaveJsonToFile(json_file, j); + ret = ModelSaver::SaveJsonToFile(json_file, j); + } else { + ret = INTERNAL_ERROR; + GELOGE(ret, "ReadProtoFromArray failed."); + } } else { - ret = INTERNAL_ERROR; - GELOGE(ret, "ReadProtoFromArray failed."); + GELOGE(PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); } - } else { - GELOGE(PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); - } - if (model.model_data != nullptr) { - delete[](char *) model.model_data; - model.model_data = nullptr; + if (model.model_data != nullptr) { + delete[](char *) model.model_data; + model.model_data = nullptr; + } + return ret; + } catch (const std::exception &e) { + GELOGE(FAILED, "Convert om model to json failed, exception message : %s.", e.what()); + return FAILED; } - - return ret; } FMK_FUNC_HOST_VISIBILITY Status ConvertPbtxtToJson(const char *model_file, const char *json_file) { ge::ModelData model; - // Mode 2 does not need to verify the priority, and a default value of 0 is passed int32_t priority = 0; - // Load model from file Status ret = ModelParserBase::LoadFromFile(model_file, "", priority, model); auto free_model_data = [](void **ptr) -> void { @@ -925,35 +951,36 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertPbtxtToJson(const char *model_file, const GELOGE(ret, "LoadFromFile failed."); return ret; } - bool flag = false; - ge::proto::ModelDef model_def; + try { + bool flag = false; + ge::proto::ModelDef model_def; flag = google::protobuf::TextFormat::ParseFromString(reinterpret_cast(model.model_data), &model_def); + + if (!flag) { + free_model_data(&model.model_data); + GELOGE(FAILED, "ParseFromString fail."); + return FAILED; + } + GetGroupName(model_def); + json j; + Pb2Json::Message2Json(model_def, kOmBlackFields, j, true); + ret = ModelSaver::SaveJsonToFile(json_file, j); + if (ret != SUCCESS) { + free_model_data(&model.model_data); + GELOGE(ret, "Save json to file fail."); + return ret; + } + free_model_data(&model.model_data); + return SUCCESS; } catch (google::protobuf::FatalException &e) { free_model_data(&model.model_data); GELOGE(FAILED, "ParseFromString fail. exception message : %s", e.what()); return FAILED; - } - - if (!flag) { - free_model_data(&model.model_data); - GELOGE(FAILED, "ParseFromString fail."); + } catch (const std::exception &e) { + GELOGE(FAILED, "Convert pbtxt to json failed, exception message : %s.", e.what()); return FAILED; } - - GetGroupName(model_def); - json j; - Pb2Json::Message2Json(model_def, kOmBlackFields, j, true); - ret = ModelSaver::SaveJsonToFile(json_file, j); - if (ret != SUCCESS) { - free_model_data(&model.model_data); - GELOGE(ret, "Save json to file fail."); - return ret; - } - - free_model_data(&model.model_data); - - return SUCCESS; } FMK_FUNC_HOST_VISIBILITY Status ConvertFwkModelToJson(const domi::FrameworkType framework, const char *model_file, @@ -1010,6 +1037,7 @@ void UpdateOmgCtxWithParserCtx() { domi::GetContext().input_nodes_format_map = GetParserContext().input_nodes_format_map; domi::GetContext().out_top_names = GetParserContext().out_top_names; domi::GetContext().user_out_nodes_top_vec = GetParserContext().user_out_nodes_top_vec; + domi::GetContext().default_out_nodes = GetParserContext().default_out_nodes; } void UpdateParserCtxWithOmgCtx() { diff --git a/ge/session/readme.txt b/ge/session/readme.txt deleted file mode 100644 index d8d0f393..00000000 --- a/ge/session/readme.txt +++ /dev/null @@ -1,3 +0,0 @@ -GE -SessionManager -InnerSession diff --git a/ge/session/session_manager.cc b/ge/session/session_manager.cc index 6f8c9432..ed215fa1 100755 --- a/ge/session/session_manager.cc +++ b/ge/session/session_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/session/session_manager.h b/ge/session/session_manager.h index 88864f61..1a385c71 100644 --- a/ge/session/session_manager.h +++ b/ge/session/session_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 647f1618..bef3a558 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -295,7 +295,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, if (op_task_->GetOpTaskType() == OP_TASK_TBE) { return ExecuteTbeTask(input_desc, inputs, output_desc, outputs); } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { - return op_task_->LaunchKernel(input_desc, inputs, output_desc, outputs, stream_); + return op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); } else { GELOGE(UNSUPPORTED, "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", diff --git a/ge/single_op/task/aicpu_task_builder.cc b/ge/single_op/task/aicpu_task_builder.cc index a70ae91d..62526d63 100755 --- a/ge/single_op/task/aicpu_task_builder.cc +++ b/ge/single_op/task/aicpu_task_builder.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #include "single_op/task/aicpu_task_builder.h" #include diff --git a/ge/single_op/task/aicpu_task_builder.h b/ge/single_op/task/aicpu_task_builder.h index 6dcd7a0f..b212cb4f 100755 --- a/ge/single_op/task/aicpu_task_builder.h +++ b/ge/single_op/task/aicpu_task_builder.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef GE_SINGLE_OP_TASK_AICPU_TASK_BUILDER_H_ #define GE_SINGLE_OP_TASK_AICPU_TASK_BUILDER_H_ diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index f778f189..b138983a 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -34,6 +34,11 @@ constexpr int kLaunchRetryTimes = 1000; constexpr int kSleepTime = 10; constexpr uint64_t kReleaseFlag = 1; constexpr int kCopyNum = 2; +void FreeHbm(void *var) { + if (var) { + (void)rtFree(var); + } +} } Status OpTask::OpenDump(const std::vector &io_addr, rtStream_t stream) { @@ -343,49 +348,23 @@ Status AiCpuBaseTask::UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensor } AiCpuTask::~AiCpuTask() { - if (args_ != nullptr) { - (void)rtFree(args_); - } - - if (io_addr_ != nullptr) { - (void)rtFree(io_addr_); - } - - if (dynamic_flag_ && workspace_addr_ != nullptr) { - (void)rtFree(workspace_addr_); - } - if (copy_workspace_buf_ != nullptr) { - (void)rtFree(copy_workspace_buf_); - } - - if (copy_ioaddr_dev_ != nullptr) { - (void)rtFree(copy_ioaddr_dev_); - } - - if (copy_input_release_flag_dev_ != nullptr) { - (void)rtFree(copy_input_release_flag_dev_); - } - - if (copy_input_data_size_dev_ != nullptr) { - (void)rtFree(copy_input_data_size_dev_); - } - - if (copy_input_src_dev_ != nullptr) { - (void)rtFree(copy_input_src_dev_); - } - - if (copy_input_dst_dev_ != nullptr) { - (void)rtFree(copy_input_dst_dev_); - } - - if (copy_task_args_buf_ != nullptr) { - (void)rtFree(copy_task_args_buf_); - } - + FreeHbm(args_); + FreeHbm(io_addr_); + if (dynamic_flag_) { + FreeHbm(workspace_addr_); + } + FreeHbm(copy_workspace_buf_); + FreeHbm(copy_ioaddr_dev_); + FreeHbm(copy_input_release_flag_dev_); + FreeHbm(copy_input_data_size_dev_); + FreeHbm(copy_input_src_dev_); + FreeHbm(copy_input_dst_dev_); + FreeHbm(copy_task_args_buf_); for (auto summary : output_summary_) { - if (summary != nullptr) { - (void)rtFree(summary); - } + FreeHbm(summary); + } + for (auto out_shape : out_shape_hbm_) { + FreeHbm(out_shape); } } @@ -412,8 +391,7 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { return SUCCESS; } -Status AiCpuTask::PrepareCopyInputs(vector &outputs, - const std::vector &out_shape_hbm) { +Status AiCpuTask::PrepareCopyInputs(vector &outputs) { std::vector copy_input_release_flag; std::vector copy_input_data_size; std::vector copy_input_src; @@ -426,11 +404,15 @@ Status AiCpuTask::PrepareCopyInputs(vector &outputs, summary.raw_data_ptr, summary.raw_data_size); auto output = outputs[i]; copy_input_release_flag.emplace_back(kReleaseFlag); - copy_input_data_size.emplace_back(summary.raw_data_size); + if (summary.raw_data_size > 0) { + copy_input_data_size.emplace_back(output.length); + } else { + copy_input_data_size.emplace_back(summary.raw_data_size); + } copy_input_src.emplace_back(summary.raw_data_ptr); - copy_input_dst.emplace_back(reinterpret_cast(output)); + copy_input_dst.emplace_back(reinterpret_cast(output.data)); - const auto &shape_buffer = out_shape_hbm[i]; + const auto &shape_buffer = out_shape_hbm_[i]; copy_input_release_flag.emplace_back(kReleaseFlag); copy_input_data_size.emplace_back(summary.shape_data_size); copy_input_src.emplace_back(summary.shape_data_ptr); @@ -450,7 +432,7 @@ Status AiCpuTask::PrepareCopyInputs(vector &outputs, return SUCCESS; } -Status AiCpuTask::ReadResultSummaryAndPrepareMemory(std::vector &out_shape_hbm) { +Status AiCpuTask::ReadResultSummaryAndPrepareMemory() { for (size_t i = 0; i < num_outputs_; ++i) { auto &result_summary = output_summary_host_[i]; @@ -459,17 +441,17 @@ Status AiCpuTask::ReadResultSummaryAndPrepareMemory(std::vector &out_sha RT_MEMCPY_DEVICE_TO_HOST)); auto shape_data_size = result_summary.shape_data_size; void *shape_buffer = nullptr; - GE_MAKE_GUARD_RTMEM(shape_buffer); - GE_CHK_RT_RET(rtMalloc(&shape_buffer, shape_data_size, RT_MEMORY_HBM)); - out_shape_hbm.emplace_back(shape_buffer); + if (shape_data_size > 0) { + GE_CHK_RT_RET(rtMalloc(&shape_buffer, shape_data_size, RT_MEMORY_HBM)); + } + out_shape_hbm_.emplace_back(shape_buffer); } return SUCCESS; } -Status AiCpuTask::CopyDataToHbm(vector &outputs, - const std::vector &out_shape_hbm, +Status AiCpuTask::CopyDataToHbm(vector &outputs, rtStream_t stream) { - GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs, out_shape_hbm)); + GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs)); GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, stream)); @@ -477,22 +459,23 @@ Status AiCpuTask::CopyDataToHbm(vector &outputs, return SUCCESS; } -Status AiCpuTask::UpdateShapeByHbmBuffer(vector &output_desc, - const std::vector &out_shape_hbm) { +Status AiCpuTask::UpdateShapeByHbmBuffer(vector &output_desc) { for (size_t i = 0; i < num_outputs_; ++i) { const auto &result_summary = output_summary_host_[i]; std::vector shape_dims; - const auto &shape_hbm = out_shape_hbm[i]; - - uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); - std::unique_ptr shape_addr(new(std::nothrow) int64_t[dim_num]()); - GE_CHECK_NOTNULL(shape_addr); - GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, - shape_hbm, result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST)); - - for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { - shape_dims.emplace_back(shape_addr[dim_idx]); - GELOGD("Node [%zu]th output dim[%u]=%ld.", i, dim_idx, shape_addr[dim_idx]); + if (result_summary.shape_data_size > 0) { + const auto &shape_hbm = out_shape_hbm_[i]; + + uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); + std::unique_ptr shape_addr(new(std::nothrow) int64_t[dim_num]()); + GE_CHECK_NOTNULL(shape_addr); + GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, + shape_hbm, result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST)); + + for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { + shape_dims.emplace_back(shape_addr[dim_idx]); + GELOGD("Node [%zu]th output dim[%u]=%ld.", i, dim_idx, shape_addr[dim_idx]); + } } GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]), @@ -502,7 +485,8 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector &output_desc, } Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output_desc, - vector &outputs, rtStream_t stream) { + vector &outputs, + rtStream_t stream) { if (num_outputs_ == 0) { GELOGI("Output num is 0, there is no need to update the output and size."); return SUCCESS; @@ -510,16 +494,24 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output GELOGI("Update shape and data by result summary begin."); - std::vector out_shape_hbm; - GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(out_shape_hbm), + for (auto out_shape : out_shape_hbm_) { + FreeHbm(out_shape); + } + out_shape_hbm_.clear(); + GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(), "Read ResultSummary and update output shape failed."); - GE_CHK_STATUS_RET(CopyDataToHbm(outputs, out_shape_hbm, stream), + GE_CHK_STATUS_RET(CopyDataToHbm(outputs, stream), "Copy data to output failed."); - GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc, out_shape_hbm), + GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc), "Update shape by hbm buffer failed."); + for (auto out_shape : out_shape_hbm_) { + FreeHbm(out_shape); + } + out_shape_hbm_.clear(); + GELOGI("Update shape and data by result summary end."); return SUCCESS; } @@ -624,11 +616,19 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { } Status AiCpuTask::LaunchKernel(const std::vector &input_desc, - const std::vector &inputs, + const std::vector &input_buffers, std::vector &output_desc, - std::vector &outputs, + std::vector &output_buffers, rtStream_t stream) { GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc)); + std::vector inputs; + std::vector outputs; + for (auto &buffer : input_buffers) { + inputs.emplace_back(buffer.data); + } + for (auto &buffer : output_buffers) { + outputs.emplace_back(buffer.data); + } GE_CHK_STATUS_RET_NOLOG(SetIO(inputs, outputs)); GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); GE_CHK_RT_RET(rtStreamSynchronize(stream)); @@ -636,7 +636,7 @@ Status AiCpuTask::LaunchKernel(const std::vector &input_desc, if (unknown_type_ == DEPEND_SHAPE_RANGE) { GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); } else if (unknown_type_ == DEPEND_COMPUTE) { - GE_CHK_STATUS_RET_NOLOG(UpdateShapeAndDataByResultSummary(output_desc, outputs, stream)); + GE_CHK_STATUS_RET_NOLOG(UpdateShapeAndDataByResultSummary(output_desc, output_buffers, stream)); } return SUCCESS; @@ -682,9 +682,9 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { } Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, - const std::vector &inputs, + const std::vector &input_buffers, std::vector &output_desc, - std::vector &outputs, + std::vector &output_buffers, rtStream_t stream) { GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED, "AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", @@ -695,11 +695,11 @@ Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, size_t arg_index = 0; auto *task_io_addr = reinterpret_cast(io_addr_); GE_CHECK_NOTNULL(task_io_addr); - for (auto &input : inputs) { - task_io_addr[arg_index++] = reinterpret_cast(input); + for (auto &input : input_buffers) { + task_io_addr[arg_index++] = reinterpret_cast(input.data); } - for (auto &output : outputs) { - task_io_addr[arg_index++] = reinterpret_cast(output); + for (auto &output : output_buffers) { + task_io_addr[arg_index++] = reinterpret_cast(output.data); } GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index e541426b..57be92ef 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -61,9 +61,9 @@ class OpTask { const OpDescPtr &GetOpdesc() const {return op_desc_;} Status OpenDump(const std::vector &io_addr, rtStream_t stream); virtual Status LaunchKernel(const std::vector &input_desc, - const std::vector &inputs, + const std::vector &input_buffers, std::vector &output_desc, - std::vector &outputs, + std::vector &output_buffers, rtStream_t stream) { return UNSUPPORTED; } @@ -155,10 +155,10 @@ class AiCpuTask : public AiCpuBaseTask { const void *GetIOAddr() const override; Status LaunchKernel(const std::vector &input_desc, - const std::vector &inputs, + const std::vector &input_buffers, std::vector &output_desc, - std::vector &outputs, - rtStream_t stream) override; + std::vector &output_buffers, + rtStream_t stream) override; Status SetMemCopyTask(const domi::KernelExDef &kernel_def); private: @@ -167,16 +167,14 @@ class AiCpuTask : public AiCpuBaseTask { // for copy task. Status InitForSummaryAndCopy(); Status UpdateShapeAndDataByResultSummary(vector &output_desc, - vector &outputs, + vector &outputs, rtStream_t stream); - Status ReadResultSummaryAndPrepareMemory(std::vector &out_shape_hbm); + Status ReadResultSummaryAndPrepareMemory(); - Status CopyDataToHbm(vector &outputs, const std::vector &out_shape_hbm, rtStream_t stream); - Status PrepareCopyInputs(vector &outputs, - const std::vector &out_shape_hbm); + Status CopyDataToHbm(vector &outputs, rtStream_t stream); + Status PrepareCopyInputs(vector &outputs); - Status UpdateShapeByHbmBuffer(vector &output_desc, - const std::vector &out_shape_hbm); + Status UpdateShapeByHbmBuffer(vector &output_desc); friend class AiCpuTaskBuilder; void *workspace_addr_ = nullptr; @@ -200,6 +198,8 @@ class AiCpuTask : public AiCpuBaseTask { void *copy_input_data_size_dev_; void *copy_input_src_dev_; void *copy_input_dst_dev_; + + vector out_shape_hbm_; }; class AiCpuCCTask : public AiCpuBaseTask { @@ -220,9 +220,9 @@ class AiCpuCCTask : public AiCpuBaseTask { size_t GetArgSize() const; Status LaunchKernel(const std::vector &input_desc, - const std::vector &inputs, + const std::vector &input_buffers, std::vector &output_desc, - std::vector &outputs, + std::vector &output_buffers, rtStream_t stream) override; private: diff --git a/ge/stub/gen_stubapi.py b/ge/stub/gen_stubapi.py index 0c5e712b..f2a6a287 100644 --- a/ge/stub/gen_stubapi.py +++ b/ge/stub/gen_stubapi.py @@ -102,7 +102,7 @@ pattern_func = re.compile(r"""(^[\s]*) #leading with space,we will find ([a-zA-Z~_] # void int likely .* [)] #we find ) -(?!.*{) # we do not want the case int abc() const { return 1;} +(?!.*{) # we do not want the case int abc() const .*) (;.*) #we want to find ; and after for we will replace these later \n$ diff --git a/inc/common/blocking_queue.h b/inc/common/blocking_queue.h deleted file mode 100644 index 12b02773..00000000 --- a/inc/common/blocking_queue.h +++ /dev/null @@ -1,141 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_COMMON_BLOCKING_QUEUE_H_ -#define INC_COMMON_BLOCKING_QUEUE_H_ - -#include -#include -#include -#include - -static const int kDefaultMaxQueueSize = 2048; - -template -class BlockingQueue { - public: - explicit BlockingQueue(uint32_t max_size = kDefaultMaxQueueSize) : max_size_(max_size), is_stoped_(false) {} - - ~BlockingQueue() {} - - bool Pop(T &item) { - std::unique_lock lock(mutex_); - - while (queue_.empty() && !is_stoped_) { - empty_cond_.wait(lock); - } - - if (is_stoped_) { - return false; - } - - item = std::move(queue_.front()); - queue_.pop_front(); - - full_cond_.notify_one(); - - return true; - } - - bool Push(const T &item, bool is_wait = true) { - std::unique_lock lock(mutex_); - - while (queue_.size() >= max_size_ && !is_stoped_) { - if (!is_wait) { - return false; - } - full_cond_.wait(lock); - } - - if (is_stoped_) { - return false; - } - - queue_.push_back(item); - - empty_cond_.notify_one(); - - return true; - } - - bool Push(T &&item, bool is_wait = true) { - std::unique_lock lock(mutex_); - - while (queue_.size() >= max_size_ && !is_stoped_) { - if (!is_wait) { - return false; - } - full_cond_.wait(lock); - } - - if (is_stoped_) { - return false; - } - - queue_.emplace_back(std::move(item)); - - empty_cond_.notify_one(); - - return true; - } - - void Stop() { - { - std::unique_lock lock(mutex_); - is_stoped_ = true; - } - - full_cond_.notify_all(); - empty_cond_.notify_all(); - } - - void Restart() { - std::unique_lock lock(mutex_); - is_stoped_ = false; - } - - // if the queue is stoped ,need call this function to release the unprocessed items - std::list GetRemainItems() { - std::unique_lock lock(mutex_); - - if (!is_stoped_) { - return std::list(); - } - - return queue_; - } - - bool IsFull() { - std::unique_lock lock(mutex_); - return queue_.size() >= max_size_; - } - - void Clear() { - std::unique_lock lock(mutex_); - queue_.clear(); - } - - private: - std::list queue_; - std::mutex mutex_; - std::condition_variable empty_cond_; - std::condition_variable full_cond_; - uint32_t max_size_; - - bool is_stoped_; -}; - -#endif // INC_COMMON_BLOCKING_QUEUE_H_ diff --git a/inc/common/dynamic_aipp.h b/inc/common/dynamic_aipp.h deleted file mode 100644 index a687853f..00000000 --- a/inc/common/dynamic_aipp.h +++ /dev/null @@ -1,104 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_COMMON_DYNAMIC_AIPP_H_ -#define INC_COMMON_DYNAMIC_AIPP_H_ - -#include - -/** - * @ingroup dnn - * @brief struct define of dynamic aipp batch parameter. - */ -typedef struct tagAippDynamicBatchPara { - int8_t cropSwitch; // crop switch - int8_t scfSwitch; // resize switch - int8_t paddingSwitch; // 0: unable padding - // 1: padding config value,sfr_filling_hblank_ch0 ~ sfr_filling_hblank_ch2 - // 2: padding source picture data, single row/collumn copy - // 3: padding source picture data, block copy - // 4: padding source picture data, mirror copy - int8_t rotateSwitch; // rotate switch,0: non-ratate, - // 1: ratate 90° clockwise,2: ratate 180° clockwise,3: ratate 270° clockwise - int8_t reserve[4]; - int32_t cropStartPosW; // the start horizontal position of cropping - int32_t cropStartPosH; // the start vertical position of cropping - int32_t cropSizeW; // crop width - int32_t cropSizeH; // crop height - - int32_t scfInputSizeW; // input width of scf - int32_t scfInputSizeH; // input height of scf - int32_t scfOutputSizeW; // output width of scf - int32_t scfOutputSizeH; // output height of scf - - int32_t paddingSizeTop; // top padding size - int32_t paddingSizeBottom; // bottom padding size - int32_t paddingSizeLeft; // left padding size - int32_t paddingSizeRight; // right padding size - - int16_t dtcPixelMeanChn0; // mean value of channel 0 - int16_t dtcPixelMeanChn1; // mean value of channel 1 - int16_t dtcPixelMeanChn2; // mean value of channel 2 - int16_t dtcPixelMeanChn3; // mean value of channel 3 - - uint16_t dtcPixelMinChn0; // min value of channel 0 - uint16_t dtcPixelMinChn1; // min value of channel 1 - uint16_t dtcPixelMinChn2; // min value of channel 2 - uint16_t dtcPixelMinChn3; // min value of channel 3 - uint16_t dtcPixelVarReciChn0; // sfr_dtc_pixel_variance_reci_ch0 - uint16_t dtcPixelVarReciChn1; // sfr_dtc_pixel_variance_reci_ch1 - uint16_t dtcPixelVarReciChn2; // sfr_dtc_pixel_variance_reci_ch2 - uint16_t dtcPixelVarReciChn3; // sfr_dtc_pixel_variance_reci_ch3 - - int8_t reserve1[16]; // 32B assign, for ub copy -} kAippDynamicBatchPara; - -/** - * @ingroup dnn - * @brief struct define of dynamic aipp parameter. lite:64+96*batchNum byte ; tiny:64+64*batchNum byte - */ -typedef struct tagAippDynamicPara { - uint8_t inputFormat; // input format:YUV420SP_U8/XRGB8888_U8/RGB888_U8 - int8_t cscSwitch; // csc switch - int8_t rbuvSwapSwitch; // rb/ub swap switch - int8_t axSwapSwitch; // RGBA->ARGB, YUVA->AYUV swap switch - int8_t batchNum; // batch parameter number - int8_t reserve1[3]; - int32_t srcImageSizeW; // source image width - int32_t srcImageSizeH; // source image height - int16_t cscMatrixR0C0; // csc_matrix_r0_c0 - int16_t cscMatrixR0C1; // csc_matrix_r0_c1 - int16_t cscMatrixR0C2; // csc_matrix_r0_c2 - int16_t cscMatrixR1C0; // csc_matrix_r1_c0 - int16_t cscMatrixR1C1; // csc_matrix_r1_c1 - int16_t cscMatrixR1C2; // csc_matrix_r1_c2 - int16_t cscMatrixR2C0; // csc_matrix_r2_c0 - int16_t cscMatrixR2C1; // csc_matrix_r2_c1 - int16_t cscMatrixR2C2; // csc_matrix_r2_c2 - int16_t reserve2[3]; - uint8_t cscOutputBiasR0; // output Bias for RGB to YUV, element of row 0, unsigned number - uint8_t cscOutputBiasR1; // output Bias for RGB to YUV, element of row 1, unsigned number - uint8_t cscOutputBiasR2; // output Bias for RGB to YUV, element of row 2, unsigned number - uint8_t cscInputBiasR0; // input Bias for YUV to RGB, element of row 0, unsigned number - uint8_t cscInputBiasR1; // input Bias for YUV to RGB, element of row 1, unsigned number - uint8_t cscInputBiasR2; // input Bias for YUV to RGB, element of row 2, unsigned number - uint8_t reserve3[2]; - int8_t reserve4[16]; // 32B assign, for ub copy - - kAippDynamicBatchPara aippBatchPara; // allow transfer several batch para. -} kAippDynamicPara; - -#endif // INC_COMMON_DYNAMIC_AIPP_H_ diff --git a/inc/common/npu_error_define.h b/inc/common/npu_error_define.h deleted file mode 100644 index a4515cf6..00000000 --- a/inc/common/npu_error_define.h +++ /dev/null @@ -1,94 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_COMMON_NPU_ERROR_DEFINE_H_ -#define INC_COMMON_NPU_ERROR_DEFINE_H_ - -typedef enum tagHiAiNpuLocal { - HIAI_HOST = 1, - HIAI_DEVICE = 2, -} HiAiNpuLocal; - -typedef enum tagHiAiNpuCodeType { - ERROR_CODE = 1, - EXCEPTION_CODE = 2, -} HiAiNpuCodeType; - -typedef enum tagHiAiNpuErrLevel { - NONE_LEVEL = 0, - SUGGESTION_LEVEL = 1, - NORMAL_LEVEL = 2, - SERIOUS_LEVEL = 3, - CRITICAL_ERROR = 4, -} HiAiNpuErrLevel; - -typedef enum tagHiAiNpuModuleId { - HIAI_DRIVER = 1, - HIAI_CTRLCPU = 2, - HIAI_TS = 3, - HIAI_RUNTIME = 4, - HIAI_AICPU = 5, - HIAI_CCE = 6, - HIAI_TVM = 7, - HIAI_FRAMEWORK = 8, - HiAI_ENGINE = 9, - HIAI_DVPP = 10, - HIAI_AIPP = 11, - HIAI_LOWPOWER = 12, - HIAI_MDC = 13, - HIAI_COMPILE = 14, - HIAI_TOOLCHIAN = 15, - HIAI_ALG = 16, - HIAI_PROFILING = 17, - HIAI_HCCL = 18, - HIAI_SIMULATION = 19, - HIAI_BIOS = 20, - HIAI_SEC = 21, - HIAI_TINY = 22, - HIAI_DP = 23, -} HiAiNpuModuleId; - -/* bit 31-bit30 to be hiai local */ -#define HIAI_NPULOCAL_MASK 0xC0000000 -#define SHIFT_LOCAL_MASK 30 -#define HIAI_NPULOCAL_VAL_MASK 0x3 -/* bit 29 -bit28 to be hiai aicpu code type */ -#define HIAI_CODE_TYPE_MASK 0x30000000 -#define SHIFT_CODE_MASK 28 -#define HIAI_CODE_TYPE_VAL_MASK 0x3 -/* bit 27 -bit25 to be hiai error level */ -#define HIAI_ERROR_LEVEL_MASK 0x0E000000 -#define SHIFT_ERROR_LVL_MASK 25 -#define HIAI_ERROR_LEVEL_VAL_MASK 0x7 -/* bit 24 -bit17 to be hiai mod */ -#define HIAI_MODE_ID_MASK 0x01FE0000 -#define SHIFT_MODE_MASK 17 -#define HIAI_MODE_ID_VAL_MASK 0xFF - -#define HIAI_NPU_LOC_BIT(a) \ - (HIAI_NPULOCAL_MASK & ((unsigned int)((HiAiNpuLocal)(a)) & HIAI_NPULOCAL_VAL_MASK) << SHIFT_LOCAL_MASK) -#define HIAI_NPU_CODE_TYPE_BIT(a) \ - (HIAI_CODE_TYPE_MASK & ((unsigned int)((HiAiNpuCodeType)(a)) & HIAI_CODE_TYPE_VAL_MASK) << SHIFT_CODE_MASK) -#define HIAI_NPU_ERR_LEV_BIT(a) \ - (HIAI_ERROR_LEVEL_MASK & ((unsigned int)((HiAiNpuErrLevel)(a)) & HIAI_ERROR_LEVEL_VAL_MASK) << SHIFT_ERROR_LVL_MASK) -#define HIAI_NPU_MOD_ID_BIT(a) \ - (HIAI_MODE_ID_MASK & ((unsigned int)((HiAiNpuModuleId)(a)) & HIAI_MODE_ID_VAL_MASK) << SHIFT_MODE_MASK) - -#define HIAI_NPU_ERR_CODE_HEAD(npuLocal, codeType, errLevel, moduleId) \ - (HIAI_NPU_LOC_BIT(npuLocal) + HIAI_NPU_CODE_TYPE_BIT(codeType) + HIAI_NPU_ERR_LEV_BIT(errLevel) + \ - HIAI_NPU_MOD_ID_BIT(moduleId)) - -#endif // INC_COMMON_NPU_ERROR_DEFINE_H_ diff --git a/inc/common/opskernel/ge_task_info.h b/inc/common/opskernel/ge_task_info.h deleted file mode 100644 index 9f3c409d..00000000 --- a/inc/common/opskernel/ge_task_info.h +++ /dev/null @@ -1,74 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_ -#define INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_ - -#include -#include -#include -#include - -using std::string; -namespace ge { -// when need to eliminate GETaskKernelHcclInfo, so not need DAVINCI_TRAIN/DAVINCI_CLOUD -struct GETaskKernelHcclInfo { - string input_name; - string hccl_type; - void *inputDataAddr; - void *outputDataAddr; - void *workSpaceAddr; - int32_t count; - int32_t dataType; - int32_t opType; - int64_t rootId; - uint64_t workSpaceMemSize; - std::vector dims; - std::vector hcclStreamList; -}; - -struct GETaskInfo { - uint32_t id; - uint16_t type; - uint32_t streamID; - void *stream; // rtKernelLaunch input argument - void *event; - void *privateDef; - uint32_t privateDefLen; - void *opsKernelStorePtr; - - std::vector kernelHcclInfo; -}; - -struct HcomOpertion { - std::string hcclType; - void *inputPtr; - void *outputPtr; - uint64_t count; - int32_t dataType; - int32_t opType; - int32_t root; -}; - -struct HcomRemoteAccessAddrInfo { - uint32_t remotetRankID; - uint64_t remoteAddr; // host embedding table address - uint64_t localAddr; // device HBM address - uint64_t length; // memory Length in Bytes -}; - -} // namespace ge -#endif // INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_ diff --git a/inc/common/opskernel/ops_kernel_info_store.h b/inc/common/opskernel/ops_kernel_info_store.h deleted file mode 100644 index ce1464d4..00000000 --- a/inc/common/opskernel/ops_kernel_info_store.h +++ /dev/null @@ -1,88 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_STORE_H_ -#define INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_STORE_H_ - -#include -#include -#include -#include -#include "./ge_task_info.h" -#include "./ops_kernel_info_types.h" -#include "cce/aicpu_engine_struct.h" -#include "cce/fwk_adpt_struct.h" -#include "common/ge_inner_error_codes.h" -#include "graph/node.h" -#include "proto/task.pb.h" -using std::map; -using std::string; -using std::to_string; -using std::vector; - -namespace ge { -class OpDesc; - -class OpsKernelInfoStore { - public: - OpsKernelInfoStore() {} - - virtual ~OpsKernelInfoStore() {} - - // initialize opsKernelInfoStore - virtual Status Initialize(const map &options) = 0; /*lint -e148*/ - - // close opsKernelInfoStore - virtual Status Finalize() = 0; /*lint -e148*/ - - virtual Status CreateSession(const std::map &session_options) { return SUCCESS; } - - virtual Status DestroySession(const std::map &session_options) { return SUCCESS; } - - // get all opsKernelInfo - virtual void GetAllOpsKernelInfo(map &infos) const = 0; - - // whether the opsKernelInfoStore is supported based on the operator attribute - virtual bool CheckSupported(const OpDescPtr &opDescPtr, std::string &un_supported_reason) const = 0; - - virtual bool CheckAccuracySupported(const OpDescPtr &opDescPtr, std::string &un_supported_reason, - bool realQuery = false) const { - return CheckSupported(opDescPtr, un_supported_reason); - } - // opsFlag opsFlag[0] indicates constant folding is supported or not - virtual void opsFlagCheck(const ge::Node &node, std::string &opsFlag){}; - - // memory allocation requirement - virtual Status CalcOpRunningParam(Node &node) = 0; /*lint -e148*/ - - // generate task for op。 - virtual Status GenerateTask(const Node &node, RunContext &context, - std::vector &tasks) = 0; /*lint -e148*/ - - // only call fe engine interface to compile single op - virtual Status CompileOp(vector &node_vec) { return SUCCESS; } - virtual Status CompileOpRun(vector &node_vec) { return SUCCESS; } - // load task for op - virtual Status LoadTask(GETaskInfo &task) { return SUCCESS; } - - // only call aicpu interface to generate task struct - virtual Status GenSingleOpRunTask(const NodePtr &node, STR_FWK_OP_KERNEL &task, string &task_info) { return SUCCESS; } - - // only call aicpu interface to generate task struct - virtual Status GenMemCopyTask(uint64_t count, STR_FWK_OP_KERNEL &task, string &task_info) { return SUCCESS; } -}; -} // namespace ge -#endif // INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_STORE_H_ diff --git a/inc/common/opskernel/ops_kernel_info_types.h b/inc/common/opskernel/ops_kernel_info_types.h deleted file mode 100644 index 684c1abc..00000000 --- a/inc/common/opskernel/ops_kernel_info_types.h +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_TYPES_H_ -#define INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_TYPES_H_ - -#include -#include -#include -#include "graph/buffer.h" -#include "runtime/rt_model.h" - -using std::string; - -namespace ge { -/*lint -e148*/ -struct RunContext { - rtModel_t model; - rtStream_t stream; - uint64_t sessionId; - uint64_t dataMemSize; - uint8_t *dataMemBase; - uint64_t weightMemSize; - uint8_t *weightMemBase; - ge::Buffer weightsBuffer; - std::vector graphStreamList; // all streams of graph, order by ge stream id(0,1,...) - std::vector graphEventList; // all events of graph, order by ge event id(0,1,...) - std::vector graphLabelList; // all labels of graph, order by ge label id(0,1,...) -}; - -/*lint +e148*/ - -struct Task { - uint32_t id; - uint16_t type; - void *stream; - void *event; -}; - -struct OpInfo { - string engine; // which engin - /*lint -e148*/ - string opKernelLib; // which opsKernelStore - int computeCost; // compute cost - bool flagPartial; // whether to support is related to shape - bool flagAsync; // Whether to support asynchronous - bool isAtomic; // whether to support atomic addr clean - string opFileName; // op file name - string opFuncName; // op function name -}; -} // namespace ge - -#endif // INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_TYPES_H_ diff --git a/inc/common/optimizer/graph_optimizer.h b/inc/common/optimizer/graph_optimizer.h deleted file mode 100644 index 253aaae1..00000000 --- a/inc/common/optimizer/graph_optimizer.h +++ /dev/null @@ -1,71 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_H_ -#define INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_H_ - -#include -#include -#include "./graph_optimizer_types.h" -#include "common/ge_inner_error_codes.h" -#include "common/opskernel/ops_kernel_info_types.h" -#include "graph/compute_graph.h" - -using std::map; -using std::string; - -/*lint -e148*/ -namespace ge { -class GraphOptimizer { - public: - virtual ~GraphOptimizer() {} - - // initialize graphOptimizer - virtual Status Initialize(const map &options) = 0; - - // close graphOptimizer - virtual Status Finalize() = 0; - - // optimize original graph for FE quant optimize - virtual Status OptimizeGraphPrepare(ComputeGraph &graph) { return SUCCESS; } - - // optimize graph before build for RTS - virtual Status OptimizeGraphBeforeBuild(ComputeGraph &graph) { return SUCCESS; } - - // optimize original graph, using in graph preparation stage - virtual Status OptimizeOriginalGraph(ComputeGraph &graph) = 0; - - // optimize original graph, using for conversion operator insert in graph preparation stage - virtual Status OptimizeOriginalGraphJudgeInsert(ComputeGraph &graph) { return SUCCESS; } - - // optimize fused graph - virtual Status OptimizeFusedGraph(ComputeGraph &graph) = 0; - - // optimize whole graph, using after graph merged stage - virtual Status OptimizeWholeGraph(ComputeGraph &graph) = 0; - - // get attribute of graph optimizer - virtual Status GetAttributes(GraphOptimizerAttribute &attrs) const = 0; - - // optimize streamed Graph - virtual Status OptimizeStreamGraph(ComputeGraph &graph, const RunContext &context) { return SUCCESS; } - - // op compile - virtual Status OptimizeFusedGraphAfterGraphSlice(ComputeGraph &graph) { return SUCCESS; } -}; -} // namespace ge -/*lint +e148*/ -#endif // INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_H_ diff --git a/inc/common/util/ai_core/common/aicore_util_attr_define.h b/inc/common/util/ai_core/common/aicore_util_attr_define.h deleted file mode 100644 index ba28d7b3..00000000 --- a/inc/common/util/ai_core/common/aicore_util_attr_define.h +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_COMMON_UTILS_AI_CORE_COMMON_ATTR_DEFINE_H_ -#define INC_COMMON_UTILS_AI_CORE_COMMON_ATTR_DEFINE_H_ - -#include - -namespace fe { -static const std::string SCOPE_ID_ATTR = "fusion_scope"; - -static const std::string FE_IMPLY_TYPE = "_fe_imply_type"; - -static const std::string PARENT_OP_TYPE = "parentOpType"; - -static const std::string ATTR_NAME_TASK_L2_FUSION_INFO_EXTEND_PTR = "task_l2_fusion_info_extend_content"; - -static const std::string ATTR_DATA_DUMP_REF = "_datadump_ref"; - -static const std::string ATTR_NAME_L2_FUSION_EXTEND_PTR = "l2_fusion_extend_content"; - -static const std::string L1_OPTIMIZED = "l1_optimized"; - -static const std::string L2_OPTIMIZED = "l2_optimized"; - -static const std::string OP_SLICE_INFO = "_op_slice_info"; -} // namespace fe -#endif diff --git a/inc/common/util/ai_core/common/aicore_util_types.h b/inc/common/util/ai_core/common/aicore_util_types.h deleted file mode 100644 index b2615dc9..00000000 --- a/inc/common/util/ai_core/common/aicore_util_types.h +++ /dev/null @@ -1,118 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_COMMON_UTILS_AI_CORE_COMMON_TYPES_H_ -#define INC_COMMON_UTILS_AI_CORE_COMMON_TYPES_H_ - -#include "graph/anchor.h" -#include "graph/types.h" -#include "runtime/kernel.h" -#include -#include -#include - -namespace fe { -struct FusionOpSrc { - uint32_t src_op_id; - ge::AnchorPtr src_anchor; - int32_t fusion_src_index; - int32_t fusion_dst_index; -}; - -struct FusionOpDst { - uint32_t dst_op_id; - ge::AnchorPtr dst_anchor; -}; - -struct FusionDataFlow { - std::pair edge; - std::pair node_dataindex_pair; -}; - -typedef struct tagL2FusionData { - uint32_t l2Index; - uint64_t l2Addr; - uint64_t l2PageNum; -} L2FusionData_t; -typedef std::map L2FusionDataMap_t; - -typedef struct tagFeSmDesc { - rtL2Ctrl_t l2ctrl; - std::string nodeName[8]; - uint8_t outputIndex[8]; -} feSmDesc_t; - -typedef struct TagTaskL2FusionInfo { - std::string nodeName; - feSmDesc_t l2Info; - L2FusionDataMap_t input; - L2FusionDataMap_t output; - uint32_t isUsed; -} TaskL2FusionInfo_t; - -using L2FusionInfoPtr = std::shared_ptr; - -typedef struct ToOpStruct { - int64_t opL1Space = 0; - std::vector opL1FusionType; - int64_t opL1WorkspaceFlag = 0; // for workspace flag - int64_t opL1WorkspaceSize = 0; - std::vector> validInputShape; - std::vector> validOutputShape; - std::vector> sliceInputOffset; // conv & pooling & ReadSelect - std::vector> sliceOutputOffset; // WriteSelect - std::vector totalShape; - uint32_t splitIndex = 0; - ToOpStruct() { - // set invalid value for essential variable - opL1Space = -1; - opL1WorkspaceSize = -1; - } -} ToOpStruct_t; - -enum OpImplType { - EN_IMPL_CUSTOM_CONSTANT_CCE = 0, // custom constant op - EN_IMPL_CUSTOM_TIK, // custom tik op - EN_IMPL_CUSTOM_TBE, // custom tbe op - EN_IMPL_HW_CONSTANT_CCE, // Huawei built-in constant op - EN_IMPL_HW_GENERAL_CCE, // Huawei built-in cce op - EN_IMPL_HW_TIK, // Huawei built-in tik op - EN_IMPL_HW_TBE, // Huawei built-in tbe op - EN_IMPL_RL, // RL op - EN_IMPL_PLUGIN_TBE, // Huawei built-in tbe plugin op - EN_IMPL_VECTOR_CORE_HW_TBE, // Huawei built-in tbe op - EN_IMPL_VECTOR_CORE_CUSTOM_TBE, // custom tbe op - EN_IMPL_NON_PERSISTENT_CUSTOM_TBE, // custom tbe op - EN_RESERVED // reserved value -}; - -static const std::map DATATYPE_SIZE_MAP{{ge::DT_FLOAT, sizeof(float)}, - {ge::DT_FLOAT16, sizeof(int16_t)}, - {ge::DT_INT8, sizeof(int8_t)}, - {ge::DT_INT32, sizeof(int32_t)}, - {ge::DT_UINT8, sizeof(uint8_t)}, - {ge::DT_UINT32, sizeof(uint32_t)}, - {ge::DT_INT16, sizeof(int16_t)}, - {ge::DT_UINT16, sizeof(uint16_t)}, - {ge::DT_INT64, sizeof(int64_t)}, - {ge::DT_UINT64, sizeof(uint64_t)}, - {ge::DT_DOUBLE, sizeof(double)}, - {ge::DT_BOOL, sizeof(bool)}, - {ge::DT_DUAL, sizeof(float) + sizeof(int8_t)}, - {ge::DT_DUAL_SUB_UINT8, sizeof(int8_t)}, - {ge::DT_DUAL_SUB_INT8, sizeof(int8_t)}}; -} // namespace fe -#endif diff --git a/inc/common/util/ai_core/common/graph_comm.h b/inc/common/util/ai_core/common/graph_comm.h deleted file mode 100644 index d672e056..00000000 --- a/inc/common/util/ai_core/common/graph_comm.h +++ /dev/null @@ -1,107 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_COMMON_UTILS_AI_CORE_COMMON_GRAPH_COMMON_H_ -#define INC_COMMON_UTILS_AI_CORE_COMMON_GRAPH_COMMON_H_ - -#include "graph/compute_graph.h" -#include "common/aicore_util_types.h" -#include "register/graph_optimizer/graph_optimize_register_error_codes.h" - -#include -#include -#include -#include - -namespace fe { - -using kScopeNodeMap_t = std::map>; -using kScopeNodePair_t = std::pair>; - -class GraphCommImpl; -using GraphCommImplPtr = std::unique_ptr; - -class GraphComm { - public: - GraphComm(const string &engineName); - virtual ~GraphComm(); - GraphComm(const GraphComm &in) = delete; - GraphComm &operator=(const GraphComm &in) = delete; - - Status GetscopeNodeMap(ge::ComputeGraph &graph, kScopeNodeMap_t &fusionMap); - - Status CopyFusionOpNodes(vector &fusInputEdgeList, vector &fusOutputEdgeList, - vector &fusNodelist, ge::OpDescPtr fusionOpDesc, - ge::ComputeGraphPtr fusionGraph); - - Status CopyFusionOpEdges(ge::OpDescPtr fusionOpDesc, ge::ComputeGraph &origGraph, ge::ComputeGraphPtr fusionGraph); - - Status GetNodeDataFlowMap(const ge::NodePtr &fusNode, - std::map> &fusionOpAnchorsMap, - ge::kFusionDataFlowVec_t &fusDataflowList, const int &mapType); - - Status GetFusionNodeEdgeList(std::vector &fusNodelist, std::vector &fusInputEdgeList, - std::vector &fusOutputEdgeList); - void ClearFusionSrc(); - - void ClearFusionDst(); - - void AddFusionOutputSrc(const uint32_t &src_op_id, const ge::AnchorPtr &src_anchor, const int32_t &fusion_src_index, - std::pair &node_dataindex_pair); - - void AddFusionInputSrc(const uint32_t &src_op_id, const ge::AnchorPtr &src_anchor, const int32_t &fusion_dst_index, - std::pair &node_dataindex_pair); - - void SaveFusionDst(const uint32_t &dst_op_id, ge::AnchorPtr dst_anchor); - - bool IsFusionDstExist(const uint32_t &dst_op_id, const ge::AnchorPtr &dst_anchor); - - bool GetFusionSrc(const uint32_t &src_op_id, const ge::AnchorPtr &src_anchor, int32_t &fusion_src_index, - int32_t &fusion_dst_index); - - Status GetFusionNodeCtrlEdgeList(vector &fusNodelist, vector &fusInputCtrlEdgeList, - vector &fusOutputCtrlEdgeList); - - Status MergeFusionNodeEdgeList(ge::NodePtr &fusNode, vector &fusNodelist, - vector &fusInputEdgeList, vector &fusOutputEdgeList); - - Status MergeFusionNodeCtrlEdgeList(ge::NodePtr &fusNode, vector &fusNodelist, - vector &fusInputEdgeList, - vector &fusOutputEdgeList); - - string GetEngineName(); - - private: - Status MergeFusionNodeInputEdgeList(ge::NodePtr fusNode, std::vector &fusNodelist, - std::vector &fusInputEdgeList); - Status MergeFusionNodeOutputEdgeList(ge::NodePtr fusNode, std::vector &fusNodelist, - std::vector &fusOutputEdgeList); - - string engineName_; - - std::vector exist_fusion_src_list_; - std::vector exist_fusion_dst_list_; - - // std::vector> - ge::kFusionDataFlowVec_t fusion_input_dataflow_list_; - - // std::vector> - ge::kFusionDataFlowVec_t fusion_output_dataflow_list_; - - GraphCommImplPtr graphCommImplPtr_; -}; -} // namespace fe -#endif diff --git a/inc/common/util/ai_core/common/scope_allocator.h b/inc/common/util/ai_core/common/scope_allocator.h deleted file mode 100644 index 6cebb286..00000000 --- a/inc/common/util/ai_core/common/scope_allocator.h +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_COMMON_UTILS_AI_CORE_COMMON_SCOPE_ALLOCATOR_H_ -#define INC_COMMON_UTILS_AI_CORE_COMMON_SCOPE_ALLOCATOR_H_ - -#include "graph/op_desc.h" - -namespace fe { -class ScopeAllocator { - public: - ScopeAllocator(); - virtual ~ScopeAllocator(); - ScopeAllocator(const ScopeAllocator& in) = delete; - ScopeAllocator& operator=(const ScopeAllocator& in) = delete; - - public: - void Init(); - int64_t GetCurrentScopeId(); - int64_t AllocateScopeId(void); - bool HasScopeAttr(ge::ConstOpDescPtr opdef); - bool GetScopeAttr(ge::ConstOpDescPtr opdef, int64_t& scopeId); - bool SetScopeAttr(ge::OpDescPtr opdef, int64_t scopeId); - bool ResetScopeId(int64_t scopeId); - - private: - int64_t scopeId; -}; -} // namespace fe -#endif diff --git a/inc/common/util/ai_core/param_calculate/tensorsize_calculator.h b/inc/common/util/ai_core/param_calculate/tensorsize_calculator.h deleted file mode 100644 index c82cca4b..00000000 --- a/inc/common/util/ai_core/param_calculate/tensorsize_calculator.h +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TENSORSIZE_CALCULATOR_H -#define TENSORSIZE_CALCULATOR_H - -#include "graph_optimizer/graph_optimize_register_error_codes.h" - -#include -#include -#include "graph/compute_graph.h" -#include "graph/op_desc.h" - -namespace fe { -class TensorSizeCalculator { - public: - /** - * Calculate the tensor size of input and output of each opdesc - * @param opDesc opdesc object - * @param opImplType op impl type - * @return status SUCCESS or FAILED - */ - static Status CalculateOpTensorSize(ge::OpDesc &opDesc); - - private: - static Status CalcInputOpTensorSize(ge::OpDesc &opDesc, int32_t &outputRealCalcFlag); - - static Status CalcOutputOpTensorSize(ge::OpDesc &opDesc, int32_t &outputRealCalcFlag); -}; -} // namespace fe - -#endif // TENSORSIZE_CALCULATOR_H diff --git a/inc/common/util/compress/compress.h b/inc/common/util/compress/compress.h deleted file mode 100644 index e350f9e5..00000000 --- a/inc/common/util/compress/compress.h +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef COMPRESS_H -#define COMPRESS_H - -#include - -enum CmpStatus { RET_SUCCESS = 0, RET_ERROR = -1 }; - -struct CompressConfig { - size_t inputSize; // length of data to compress - size_t engineNum; // how many decompress engines - size_t maxRatio; // how much size of a basic compression block, only 64 supported now (8x: 64 4x: 32) - size_t channel; // channels of L2 or DDR. For load balance - size_t fractalSize; // size of compressing block - bool isTight; // whether compose compressed data tightly - size_t init_offset; -}; - -CmpStatus CompressWeights(char* input, const CompressConfig& compressConfig, char* indexs, char* output, - size_t& compressedLength); - -#endif // COMPRESS_H diff --git a/inc/common/util/error_manager/error_manager.h b/inc/common/util/error_manager/error_manager.h deleted file mode 100644 index 438e68a7..00000000 --- a/inc/common/util/error_manager/error_manager.h +++ /dev/null @@ -1,94 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ERROR_MANAGER_H_ -#define ERROR_MANAGER_H_ - -#include -#include -#include - -class ErrorManager { - public: - /// - /// @brief Obtain ErrorManager instance - /// @return ErrorManager instance - /// - static ErrorManager &GetInstance(); - - /// - /// @brief init - /// @param [in] path: current so path - /// @return int 0(success) -1(fail) - /// - int Init(std::string path); - - /// - /// @brief Report error message - /// @param [in] error_code: error code - /// @param [in] args_map: parameter map - /// @return int 0(success) -1(fail) - /// - int ReportErrMessage(std::string error_code, const std::map &args_map); - - /// - /// @brief output error message - /// @param [in] handle: print handle - /// @return int 0(success) -1(fail) - /// - int OutputErrMessage(int handle); - - /// - /// @brief output message - /// @param [in] handle: print handle - /// @return int 0(success) -1(fail) - /// - int OutputMessage(int handle); - - /// - /// @brief Report error message - /// @param [in] key: vector parameter key - /// @param [in] value: vector parameter value - /// - void ATCReportErrMessage(std::string error_code, const std::vector &key = {}, - const std::vector &value = {}); - - private: - struct ErrorInfo { - std::string error_id; - std::string error_message; - std::vector arg_list; - }; - - ErrorManager() {} - ~ErrorManager() {} - - ErrorManager(const ErrorManager &) = delete; - ErrorManager(ErrorManager &&) = delete; - ErrorManager &operator=(const ErrorManager &) = delete; - ErrorManager &operator=(ErrorManager &&) = delete; - - int ParseJsonFile(std::string path); - - int ReadJsonFile(const std::string &file_path, void *handle); - - bool is_init_ = false; - std::map error_map_; - std::vector error_messages_; - std::vector warning_messages_; -}; - -#endif // ERROR_MANAGER_H_ diff --git a/inc/common/util/platform_info.h b/inc/common/util/platform_info.h deleted file mode 100644 index 8d2a0579..00000000 --- a/inc/common/util/platform_info.h +++ /dev/null @@ -1,101 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PLATFORM_INFO_H -#define PLATFORM_INFO_H - -#include -#include -#include -#include "platform_info_def.h" - -using std::map; -using std::string; -using std::vector; - -namespace fe { -class PlatformInfoManager { - public: - PlatformInfoManager(const PlatformInfoManager &) = delete; - PlatformInfoManager &operator=(const PlatformInfoManager &) = delete; - - static PlatformInfoManager &Instance(); - uint32_t InitializePlatformInfo(); - uint32_t Finalize(); - - uint32_t GetPlatformInfo(const string SoCVersion, PlatformInfo &platformInfo, OptionalInfo &optiCompilationInfo); - - uint32_t GetPlatformInfoWithOutSocVersion(PlatformInfo &platformInfo, OptionalInfo &optiCompilationInfo); - - void SetOptionalCompilationInfo(OptionalInfo &optiCompilationInfo); - - private: - PlatformInfoManager(); - ~PlatformInfoManager(); - - uint32_t LoadIniFile(string iniFileRealPath); - - void Trim(string &str); - - uint32_t LoadConfigFile(string realPath); - - string RealPath(const std::string &path); - - string GetSoFilePath(); - - void ParseVersion(map &versionMap, string &socVersion, PlatformInfo &platformInfoTemp); - - void ParseSocInfo(map &socInfoMap, PlatformInfo &platformInfoTemp); - - void ParseCubeOfAICoreSpec(map &aiCoreSpecMap, PlatformInfo &platformInfoTemp); - - void ParseBufferOfAICoreSpec(map &aiCoreSpecMap, PlatformInfo &platformInfoTemp); - - void ParseUBOfAICoreSpec(map &aiCoreSpecMap, PlatformInfo &platformInfoTemp); - - void ParseUnzipOfAICoreSpec(map &aiCoreSpecMap, PlatformInfo &platformInfoTemp); - - void ParseAICoreSpec(map &aiCoreSpecMap, PlatformInfo &platformInfoTemp); - - void ParseBufferOfAICoreMemoryRates(map &aiCoreMemoryRatesMap, PlatformInfo &platformInfoTemp); - - void ParseAICoreMemoryRates(map &aiCoreMemoryRatesMap, PlatformInfo &platformInfoTemp); - - void ParseUBOfAICoreMemoryRates(map &aiCoreMemoryRatesMap, PlatformInfo &platformInfoTemp); - - void ParseAICoreintrinsicDtypeMap(map &aiCoreintrinsicDtypeMap, PlatformInfo &platformInfoTemp); - - void ParseVectorCoreSpec(map &vectorCoreSpecMap, PlatformInfo &platformInfoTemp); - - void ParseVectorCoreMemoryRates(map &vectorCoreMemoryRatesMap, PlatformInfo &platformInfoTemp); - - void ParseCPUCache(map &CPUCacheMap, PlatformInfo &platformInfoTemp); - - void ParseVectorCoreintrinsicDtypeMap(map &vectorCoreintrinsicDtypeMap, - PlatformInfo &platformInfoTemp); - - uint32_t ParsePlatformInfoFromStrToStruct(map> &contentInfoMap, string &socVersion, - PlatformInfo &platformInfoTemp); - - uint32_t AssemblePlatformInfoVector(map> &contentInfoMap); - - private: - bool initFlag_; - map platformInfoMap_; - OptionalInfo optiCompilationInfo_; -}; -} // namespace fe -#endif diff --git a/inc/common/util/platform_info_def.h b/inc/common/util/platform_info_def.h deleted file mode 100644 index c660e8f1..00000000 --- a/inc/common/util/platform_info_def.h +++ /dev/null @@ -1,140 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PLATFORM_INFO_DEF_H -#define PLATFORM_INFO_DEF_H - -#include -#include -#include - -using std::map; -using std::string; -using std::vector; - -namespace fe { -enum MemoryType { DDR = 0, HBM }; - -enum L2Type { Cache = 0, Buff }; - -typedef struct tagStrInfo { - string aicVersion; - string ccecAICVersion; - string ccecAIVVersion; - string isSupportAIcpuCompiler; -} StrInfo; - -typedef struct tagSoCInfo { - uint32_t aiCoreCnt; - uint32_t vectorCoreCnt; - uint32_t aiCpuCnt; - MemoryType memoryType; - uint64_t memorySize; - L2Type l2Type; - uint64_t l2Size; - uint32_t l2PageNum; -} SoCInfo; - -typedef struct tagAiCoreSpec { - double cubeFreq; - uint64_t cubeMSize; - uint64_t cubeNSize; - uint64_t cubeKSize; - uint64_t vecCalcSize; - uint64_t l0ASize; - uint64_t l0BSize; - uint64_t l0CSize; - uint64_t l1Size; - uint64_t smaskBuffer; - uint64_t ubSize; - uint64_t ubblockSize; - uint64_t ubbankSize; - uint64_t ubbankNum; - uint64_t ubburstInOneBlock; - uint64_t ubbankGroupNum; - uint32_t unzipEngines; - uint32_t unzipMaxRatios; - uint32_t unzipChannels; - uint8_t unzipIsTight; -} AiCoreSpec; - -typedef struct tagAiCoreMemoryRates { - double ddrRate; - double ddrReadRate; - double ddrWriteRate; - double l2Rate; - double l2ReadRate; - double l2WriteRate; - double l1ToL0ARate; - double l1ToL0BRate; - double l1ToUBRate; - double l0CToUBRate; - double ubToL2Rate; - double ubToDdrRate; - double ubToL1Rate; -} AiCoreMemoryRates; - -typedef struct tagVectorCoreSpec { - double vecFreq; - uint64_t vecCalcSize; - uint64_t smaskBuffer; - uint64_t ubSize; - uint64_t ubblockSize; - uint64_t ubbankSize; - uint64_t ubbankNum; - uint64_t ubburstInOneBlock; - uint64_t ubbankGroupNum; - uint64_t vectorRegSize; - uint64_t predicateRegSize; - uint64_t addressRegSize; -} VectorCoreSpec; - -typedef struct tagVectorCoreMemoryRates { - double ddrRate; - double ddrReadRate; - double ddrWriteRate; - double l2Rate; - double l2ReadRate; - double l2WriteRate; - double ubToL2Rate; - double ubToDdrRate; -} VectorCoreMemoryRates; - -typedef struct tagCPUCache { - uint32_t AICPUSyncBySW; - uint32_t TSCPUSyncBySW; -} CPUCache; - -typedef struct tagPlatformInfo { - StrInfo strInfo; - SoCInfo socInfo; - AiCoreSpec aiCoreSpec; - AiCoreMemoryRates aiCoreMemoryRates; - map> aiCoreIntrinsicDtypeMap; - VectorCoreSpec vectorCoreSpec; - VectorCoreMemoryRates vectorCoreMemoryRates; - CPUCache cpucache; - map> vectorCoreIntrinsicDtypeMap; -} PlatformInfo; - -typedef struct tagOptionalInfo { - string socVersion; - string coreType; - uint32_t aiCoreNum; - string l1FusionFlag; -} OptionalInfo; -} // namespace fe -#endif diff --git a/inc/external/ge/ge_api_error_codes.h b/inc/external/ge/ge_api_error_codes.h index 7b045d54..e7f52724 100644 --- a/inc/external/ge/ge_api_error_codes.h +++ b/inc/external/ge/ge_api_error_codes.h @@ -70,7 +70,7 @@ using Status = uint32_t; // General error code GE_ERRORNO(0, 0, 0, 0, 0, SUCCESS, 0, "success"); -GE_ERRORNO(0b11, 0b11, 0b111, 0xFF, 0b11111, FAILED, 0xFFF, "failed"); /*lint !e401*/ +GE_ERRORNO(0b11, 0b11, 0b111, 0xFF, 0b11111, FAILED, 0xFFF, "failed"); } // namespace ge #endif // INC_EXTERNAL_GE_GE_API_ERROR_CODES_H_ diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 68743bc8..1c6b7a3e 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -238,10 +238,10 @@ enum GraphRunMode { PREDICTION = 0, TRAIN }; // Input/Output tensor info struct InputTensorInfo { - uint32_t data_type; // data type - std::vector dims; // shape description - void *data; // tensor data - int64_t length; // tensor length + uint32_t data_type; // data type + std::vector dims; // shape description + void *data; // tensor data + int64_t length; // tensor length }; struct OutputTensorInfo { @@ -250,8 +250,11 @@ struct OutputTensorInfo { std::unique_ptr data; // tensor data int64_t length; // tensor length OutputTensorInfo() : data_type(0), dims({}), data(nullptr), length(0) {} - OutputTensorInfo(OutputTensorInfo &&out) - : data_type(out.data_type), dims(out.dims), data(std::move(out.data)), length(out.length) {} + OutputTensorInfo(OutputTensorInfo &&out) : + data_type(out.data_type), + dims(out.dims), + data(std::move(out.data)), + length(out.length) {} OutputTensorInfo &operator=(OutputTensorInfo &&out) { if (this != &out) { @@ -270,55 +273,67 @@ using Status = uint32_t; using RunAsyncCallback = std::function &)>; // for ir build namespace ir_option { -static const char *const INPUT_FORMAT = "input_format"; -static const char *const INPUT_SHAPE = "input_shape"; -static const char *const OP_NAME_MAP = "op_name_map"; -static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; -static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; -static const char *const DYNAMIC_DIMS = kDynamicDims; -static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); -static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); -static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; -static const char *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); -static const char *const CORE_TYPE = ge::CORE_TYPE.c_str(); -static const char *const SOC_VERSION = ge::SOC_VERSION.c_str(); -static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; -static const char *const AICORE_NUM = ge::AICORE_NUM.c_str(); -static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); -static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str(); -static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str(); -static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); -static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); -static const char *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str(); -static const char *const COMPRESS_WEIGHT_CONF = "compress_weight_conf"; -static const char *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str(); -static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str(); -static const char *const LOG_LEVEL = "log"; -static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str(); - -// for interface: aclgrphBuildModel -const std::set ir_builder_suppported_options = { - INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, - DYNAMIC_BATCH_SIZE, DYNAMIC_IMAGE_SIZE, DYNAMIC_DIMS, - INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY, - AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES, - INPUT_FP16_NODES, LOG_LEVEL}; -// for interface: aclgrphBuildInitialize -const std::set global_options = {CORE_TYPE, - SOC_VERSION, - BUFFER_OPTIMIZE, - ENABLE_COMPRESS_WEIGHT, - COMPRESS_WEIGHT_CONF, - PRECISION_MODE, - EXEC_DISABLE_REUSED_MEMORY, - AUTO_TUNE_MODE, - ENABLE_SINGLE_STREAM, - AICORE_NUM, - FUSION_SWITCH_FILE, - ENABLE_SMALL_CHANNEL, - OP_SELECT_IMPL_MODE, - OPTYPELIST_FOR_IMPLMODE}; -} // namespace ir_option + static const char *const INPUT_FORMAT = "input_format"; + static const char *const INPUT_SHAPE = "input_shape"; + static const char *const OP_NAME_MAP = "op_name_map"; + static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; + static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; + static const char *const DYNAMIC_DIMS = kDynamicDims; + static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); + static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); + static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; + static const char *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); + static const char *const CORE_TYPE = ge::CORE_TYPE.c_str(); + static const char *const SOC_VERSION = ge::SOC_VERSION.c_str(); + static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; + static const char *const AICORE_NUM = ge::AICORE_NUM.c_str(); + static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); + static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str(); + static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str(); + static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); + static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); + static const char *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str(); + static const char *const COMPRESS_WEIGHT_CONF = "compress_weight_conf"; + static const char *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str(); + static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str(); + static const char *const LOG_LEVEL = "log"; + static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str(); + + // for interface: aclgrphBuildModel + const std::set ir_builder_suppported_options = { + INPUT_FORMAT, + INPUT_SHAPE, + OP_NAME_MAP, + DYNAMIC_BATCH_SIZE, + DYNAMIC_IMAGE_SIZE, + DYNAMIC_DIMS, + INSERT_OP_FILE, + PRECISION_MODE, + EXEC_DISABLE_REUSED_MEMORY, + AUTO_TUNE_MODE, + OUTPUT_TYPE, + OUT_NODES, + INPUT_FP16_NODES, + LOG_LEVEL + }; + // for interface: aclgrphBuildInitialize + const std::set global_options = { + CORE_TYPE, + SOC_VERSION, + BUFFER_OPTIMIZE, + ENABLE_COMPRESS_WEIGHT, + COMPRESS_WEIGHT_CONF, + PRECISION_MODE, + EXEC_DISABLE_REUSED_MEMORY, + AUTO_TUNE_MODE, + ENABLE_SINGLE_STREAM, + AICORE_NUM, + FUSION_SWITCH_FILE, + ENABLE_SMALL_CHANNEL, + OP_SELECT_IMPL_MODE, + OPTYPELIST_FOR_IMPLMODE + }; +} } // namespace ge #endif // INC_EXTERNAL_GE_GE_API_TYPES_H_ diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h index acf6991a..cbe7f6d3 100644 --- a/inc/external/ge/ge_ir_build.h +++ b/inc/external/ge/ge_ir_build.h @@ -27,11 +27,12 @@ namespace { #define IR_MAJOR_VERSION (int(1)) #define IR_MINOR_VERSION (int(0)) #define IR_PATCH_VERSION (int(0)) -} // namespace +} -namespace ge { +namespace ge{ -struct ModelBufferData { +struct ModelBufferData +{ std::shared_ptr data = nullptr; uint64_t length; }; @@ -63,8 +64,7 @@ void aclgrphBuildFinalize(); * @retval GRAPH_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map &build_options, - ModelBufferData &model); +graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map &build_options, ModelBufferData& model); /** * @ingroup AscendCL @@ -75,7 +75,7 @@ graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map +#include #if defined(BUILD_VERSION_PERF) #define DOMI_LOGE(fmt, ...) #else @@ -83,12 +83,12 @@ } while (0); // If expr is not GRAPH_SUCCESS, print the log and return FAILED -#define GE_CHK_GRAPH_STATUS_RET(expr, ...) \ - do { \ - if ((expr) != ge::GRAPH_SUCCESS) { \ - DOMI_LOGE(__VA_ARGS__); \ - return FAILED; \ - } \ +#define GE_CHK_GRAPH_STATUS_RET(expr, ...) \ + do { \ + if ((expr) != ge::GRAPH_SUCCESS) { \ + DOMI_LOGE(__VA_ARGS__); \ + return FAILED; \ + } \ } while (0); // If expr is not SUCCESS, print the log and execute a custom statement @@ -99,13 +99,13 @@ } while (0); // If expr is not true, print the log and return the specified status -#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \ - do { \ - bool b = (expr); \ - if (!b) { \ - GELOGE(_status, __VA_ARGS__); \ - return _status; \ - } \ +#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \ + do { \ + bool b = (expr); \ + if (!b) { \ + GELOGE(_status, __VA_ARGS__); \ + return _status; \ + } \ } while (0); // If expr is not true, print the log and return the specified status diff --git a/inc/framework/common/ge_inner_error_codes.h b/inc/framework/common/ge_inner_error_codes.h index 3ab6cf06..79957687 100644 --- a/inc/framework/common/ge_inner_error_codes.h +++ b/inc/framework/common/ge_inner_error_codes.h @@ -14,7 +14,6 @@ * limitations under the License. */ -/*lint -e* */ #ifndef INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ #define INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ @@ -125,13 +124,13 @@ GE_ERRORNO_CLIENT(GE_CLI_GE_ALREADY_INITIALIZED, 10, "GE is already initialized. GE_ERRORNO_CLIENT(GE_CLI_GE_NOT_INITIALIZED, 11, "GE is not yet initialized or is finalized."); // 1343229963 // Init module error code definition -GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 -GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 -GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 -GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported."); // 1343234051 -GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done."); // 1343234052 +GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 +GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 +GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 +GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported."); // 1343234051 +GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done."); // 1343234052 GE_ERRORNO_INIT(GE_PROF_MODE_CONFLICT, 5, - "Profiling command mode which is preferred is running, the api mode will not work."); // 1343234053 + "Profiling command mode which is preferred is running, the api mode will not work."); // 1343234053 // Session module error code definition GE_ERRORNO_SESSION(GE_SESS_INIT_FAILED, 0, "Failed to initialize session."); // 1343238144 @@ -216,8 +215,8 @@ GE_ERRORNO_ENGINE(GE_ENG_FINALIZE_FAILED, 1, "Engine finalize failed."); GE_ERRORNO_ENGINE(GE_ENG_MEMTYPE_ERROR, 2, "Memory type HBM is necessary when engine is in device"); // 1343246338 // Optimize errocode -GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted."); // 1343242303 -GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed."); // 1343242304 +GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted."); // 1343242303 +GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed."); // 1343242304 // Ops module error code definition GE_ERRORNO_OPS(GE_OPS_KERNEL_STORE_INIT_FAILED, 0, "Failed to initialize OpsKernelInfoStore."); // 1343250432 @@ -304,6 +303,7 @@ GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, 16, "Failed to allocate wei GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_VAR_MEM_FAILED, 17, "Failed to allocate variable memory."); GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 18, "GE AIPP is not exist."); GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 19, "GE Dynamic AIPP is not support to query temporarily."); +GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_P2P_MEM_FAILED, 20, "Failed to allocate P2P memory"); // Generator module error code definition GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, 1, "Graph manager initialize failed."); diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 9a4fd1f9..91815fc2 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -28,9 +28,16 @@ #include "external/ge/ge_api_types.h" namespace ge { -enum RuntimeType { HOST = 0, DEVICE = 1 }; +enum RuntimeType { + HOST = 0, + DEVICE = 1 +}; -enum PerfLevel { GEN_TASK_WITH_FUSION = -1, GEN_TASK_WITHOUT_L2FUSION = 3, GEN_TASK_WITHOUT_FUSION = 4 }; +enum PerfLevel { + GEN_TASK_WITH_FUSION = -1, + GEN_TASK_WITHOUT_L2FUSION = 3, + GEN_TASK_WITHOUT_FUSION = 4 +}; enum FrameworkType { CAFFE = 0, @@ -48,6 +55,13 @@ enum OpEngineType { ENGINE_AIVECTOR = 4 // not support }; +enum InputAippType{ + DATA_WITHOUT_AIPP = 0, + DATA_WITH_STATIC_AIPP, + DATA_WITH_DYNAMIC_AIPP, + DYNAMIC_AIPP_NODE +}; + const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; @@ -93,7 +107,7 @@ struct OutputData { struct Command { std::string cmd_type; // Command type std::vector cmd_params; // Command params - uint64_t module_index; // prof module + uint64_t module_index; // prof module }; // The definition of I/O shape description diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h index fbe7e73f..949d8b4c 100644 --- a/inc/framework/common/helper/model_helper.h +++ b/inc/framework/common/helper/model_helper.h @@ -32,10 +32,10 @@ class ModelHelper { ModelHelper() = default; ~ModelHelper(); - Status SaveToOmModel(const GeModelPtr& ge_model, const SaveParam& save_param, const std::string& output_file, - ge::ModelBufferData& model); + Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, + const std::string &output_file, ge::ModelBufferData &model); Status SaveOriginalGraphToOmModel(const ge::Graph& graph, const std::string& output_file); - Status LoadModel(const ge::ModelData& model_data); + Status LoadModel(const ge::ModelData &model_data); Status GetModelBufferData(ge::ModelBufferData& model); const ModelFileHeader* GetFileHeader() const { return file_header_; } @@ -44,15 +44,15 @@ class ModelHelper { void SetSaveMode(bool val) { is_offline_ = val; } bool GetSaveMode(void) const { return is_offline_; } - Status GetBaseNameFromFileName(const std::string& file_name, std::string& base_name); - Status GetModelNameFromMergedGraphName(const std::string& graph_name, std::string& model_name); + Status GetBaseNameFromFileName(const std::string &file_name, std::string &base_name); + Status GetModelNameFromMergedGraphName(const std::string &graph_name, std::string &model_name); private: bool is_assign_model_ = false; bool is_offline_ = true; ModelFileHeader* file_header_ = nullptr; // Encrypted model need delete temp model and unencrypted model need not delete model - uint8_t* model_addr_tmp_ = nullptr; + uint8_t *model_addr_tmp_ = nullptr; uint32_t model_len_tmp_ = 0; GeModelPtr model_; @@ -66,8 +66,8 @@ class ModelHelper { Status LoadTBEKernelStore(OmFileLoadHelper& om_load_helper); Status LoadCustAICPUKernelStore(OmFileLoadHelper& om_load_helper); Status ReleaseLocalModelData() noexcept; - Status SaveModelPartition(std::shared_ptr& om_file_save_helper, ModelPartitionType type, - const uint8_t* data, size_t size); + Status SaveModelPartition(std::shared_ptr& om_file_save_helper, + ModelPartitionType type, const uint8_t* data, size_t size); }; } // namespace ge #endif // INC_FRAMEWORK_COMMON_HELPER_MODEL_HELPER_H_ diff --git a/inc/framework/common/helper/om_file_helper.h b/inc/framework/common/helper/om_file_helper.h index fec7e294..4ca54b50 100644 --- a/inc/framework/common/helper/om_file_helper.h +++ b/inc/framework/common/helper/om_file_helper.h @@ -32,7 +32,7 @@ using std::vector; namespace ge { struct ModelPartition { ModelPartitionType type; - uint8_t *data = 0; + uint8_t* data = 0; uint32_t size = 0; }; @@ -81,8 +81,8 @@ class OmFileSaveHelper { const std::vector &GetModelPartitions() const; - Status SaveModel(const SaveParam &save_param, const char *target_file, ge::ModelBufferData &model, - bool is_offline = true); + Status SaveModel(const SaveParam &save_param, const char *target_file, + ge::ModelBufferData& model, bool is_offline = true); Status SaveModelToFile(const char *output_file, ge::ModelBufferData &model, bool is_offline = true); diff --git a/inc/framework/common/op/attr_value_util.h b/inc/framework/common/op/attr_value_util.h index 8a90cfa2..e3803b78 100644 --- a/inc/framework/common/op/attr_value_util.h +++ b/inc/framework/common/op/attr_value_util.h @@ -21,7 +21,6 @@ #include #include -#include "common/types.h" #include "graph/debug/ge_attr_define.h" #include "proto/om.pb.h" @@ -156,6 +155,6 @@ bool GetAttrDefListValue(const std::string &key, int idx, int32_t *value, const bool GetAttrDefListValue(const std::string &key, int idx, uint32_t *value, const AttrDefMap &attr); bool GetAttrDefListValue(const std::string &key, int idx, float *value, const AttrDefMap &attr); bool GetAttrDefListValue(const std::string &key, int idx, double *value, const AttrDefMap &attr); -} // namespace ge +} #endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h index 87cf54d8..4718b180 100644 --- a/inc/framework/common/op/ge_op_utils.h +++ b/inc/framework/common/op/ge_op_utils.h @@ -22,7 +22,8 @@ #include #include "common/op/attr_value_util.h" -#include "common/types.h" +#include "register/register_types.h" +#include "register/register_error_codes.h" #include "common/util.h" #include "graph/attr_value.h" #include "graph/ge_tensor.h" diff --git a/inc/framework/common/scope_guard.h b/inc/framework/common/scope_guard.h index 2154648d..001a0e75 100644 --- a/inc/framework/common/scope_guard.h +++ b/inc/framework/common/scope_guard.h @@ -42,9 +42,8 @@ class ScopeGuard { if (on_exit_scope_ != nullptr) { try { on_exit_scope_(); - } catch (std::bad_function_call &e) { - } catch (...) { - } + } catch (std::bad_function_call &e) { } + catch (...) { } } } } diff --git a/inc/framework/common/string_util.h b/inc/framework/common/string_util.h index 918a3950..b74eddcf 100644 --- a/inc/framework/common/string_util.h +++ b/inc/framework/common/string_util.h @@ -36,8 +36,8 @@ class StringUtils { #endif return s; } - // lint -esym(551,*) - static std::string &Rtrim(std::string &s) { /*lint !e618*/ + + static std::string &Rtrim(std::string &s) { #if __cplusplus >= 201103L (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return !std::isspace(c); })); #else @@ -45,7 +45,7 @@ class StringUtils { #endif return s; } - // lint -esym(551,*) + /// /// @ingroup domi_common /// @brief delete spaces at the beginning and end of a string diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index ad284d07..91c68434 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -434,6 +434,7 @@ REGISTER_OPTYPE_DECLARE(HCOMREDUCESCATTER, "HcomReduceScatter"); REGISTER_OPTYPE_DECLARE(HCOMSEND, "HcomSend"); REGISTER_OPTYPE_DECLARE(HCOMRECEIVE, "HcomReceive"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEREAD, "HcomRemoteRead"); +REGISTER_OPTYPE_DECLARE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEWRITE, "HcomRemoteWrite"); REGISTER_OPTYPE_DECLARE(VARASSIGN, "VarAssign"); @@ -565,10 +566,10 @@ enum ModelCheckType { /// @brief dynamic input type /// enum DynamicInputType { - FIXED = 0, // default mode - DYNAMIC_BATCH = 1, - DYNAMIC_IMAGE = 2, - DYNAMIC_DIMS = 3 + FIXED = 0, // default mode + DYNAMIC_BATCH = 1, + DYNAMIC_IMAGE = 2, + DYNAMIC_DIMS = 3 }; /// diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h index b1c278d8..42ab3868 100644 --- a/inc/framework/common/util.h +++ b/inc/framework/common/util.h @@ -30,12 +30,12 @@ #include "framework/common/ge_inner_error_codes.h" #include "mmpa/mmpa_api.h" -#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ - do { \ - if (size <= 0) { \ - DOMI_LOGE("param[%s] is not a positive number", #size); \ - return PARAM_INVALID; \ - } \ +#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ + do { \ + if (size <= 0) { \ + DOMI_LOGE("param[%s] is not a positive number", #size); \ + return PARAM_INVALID; \ + } \ } while (0) #define CHECK_FALSE_EXEC(expr, exec_expr, ...) \ @@ -113,84 +113,84 @@ } while (0) // Check if the parameter is null. If yes, return PARAM_INVALID and record the error -#define GE_CHECK_NOTNULL(val) \ - do { \ - if (val == nullptr) { \ - DOMI_LOGE("param[%s] must not be null.", #val); \ - return ge::PARAM_INVALID; \ - } \ +#define GE_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + return ge::PARAM_INVALID; \ + } \ } while (0) // Check if the parameter is null. If yes, just return and record the error -#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ - do { \ - if (val == nullptr) { \ - DOMI_LOGE("param[%s] must not be null.", #val); \ - return; \ - } \ +#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + return; \ + } \ } while (0) // Check whether the parameter is null. If so, execute the exec_expr expression and record the error log -#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ - do { \ - if (val == nullptr) { \ - DOMI_LOGE("param[%s] must not be null.", #val); \ - exec_expr; \ - } \ +#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + exec_expr; \ + } \ } while (0) // Check whether the parameter is null. If yes, return directly and record the error log -#define GE_RT_VOID_CHECK_NOTNULL(val) \ - do { \ - if (val == nullptr) { \ - DOMI_LOGE("param[%s] must not be null.", #val); \ - return; \ - } \ +#define GE_RT_VOID_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + return; \ + } \ } while (0) // Check if the parameter is null. If yes, return false and record the error log -#define GE_RT_FALSE_CHECK_NOTNULL(val) \ - do { \ - if (val == nullptr) { \ - DOMI_LOGE("param[%s] must not be null.", #val); \ - return false; \ - } \ +#define GE_RT_FALSE_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + return false; \ + } \ } while (0) // Check if the parameter is out of bounds -#define GE_CHECK_SIZE(size) \ - do { \ - if (size == 0) { \ - DOMI_LOGE("param[%s] is out of range", #size); \ - return ge::PARAM_INVALID; \ - } \ +#define GE_CHECK_SIZE(size) \ + do { \ + if (size == 0) { \ + DOMI_LOGE("param[%s] is out of range", #size); \ + return ge::PARAM_INVALID; \ + } \ } while (0) // Check if the container is empty -#define GE_CHECK_VECTOR_NOT_EMPTY(vector) \ - do { \ - if (vector.empty()) { \ - DOMI_LOGE("param[%s] is empty!", #vector); \ - return ge::FAILED; \ - } \ +#define GE_CHECK_VECTOR_NOT_EMPTY(vector) \ + do { \ + if (vector.empty()) { \ + DOMI_LOGE("param[%s] is empty!", #vector); \ + return ge::FAILED; \ + } \ } while (0) // Check if the value on the left is greater than or equal to the value on the right -#define GE_CHECK_GE(lhs, rhs) \ - do { \ - if (lhs < rhs) { \ - DOMI_LOGE("param[%s] is less than[%s]", #lhs, #rhs); \ - return ge::PARAM_INVALID; \ - } \ +#define GE_CHECK_GE(lhs, rhs) \ + do { \ + if (lhs < rhs) { \ + DOMI_LOGE("param[%s] is less than[%s]", #lhs, #rhs); \ + return ge::PARAM_INVALID; \ + } \ } while (0) // Check if the value on the left is less than or equal to the value on the right -#define GE_CHECK_LE(lhs, rhs) \ - do { \ - if (lhs > rhs) { \ - DOMI_LOGE("param[%s] is greater than[%s]", #lhs, #rhs); \ - return ge::PARAM_INVALID; \ - } \ +#define GE_CHECK_LE(lhs, rhs) \ + do { \ + if (lhs > rhs) { \ + DOMI_LOGE("param[%s] is greater than[%s]", #lhs, #rhs); \ + return ge::PARAM_INVALID; \ + } \ } while (0) #define GE_DELETE_NEW_SINGLE(var) \ @@ -345,7 +345,7 @@ std::string ToString(const google::protobuf::RepeatedField &rpd_field) { /// @return Timestamp, in microseconds (US) /// /// -uint64_t GetCurrentTimestap(); +uint64_t GetCurrentTimestamp(); /// /// @ingroup domi_common diff --git a/inc/framework/engine/dnnengine.h b/inc/framework/engine/dnnengine.h index 65897ac5..1bcf5e07 100644 --- a/inc/framework/engine/dnnengine.h +++ b/inc/framework/engine/dnnengine.h @@ -30,6 +30,7 @@ enum PriorityEnum { COST_0 = 0, COST_1, COST_2, + COST_3, COST_9 = 9, COST_10 = 10, }; diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 00846112..ba90fd03 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -38,14 +38,14 @@ class DynamicSingleOp; struct RunModelData { uint32_t index; // Data index uint32_t modelId; - std::vector blobs; // All input/output data buffer - uint32_t timestamp; // Data creation time - uint32_t timeout; // Processing timeout - uint64_t request_id = 0; // Request ID - uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 - uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 - uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 - std::vector dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty + std::vector blobs; // All input/output data buffer + uint32_t timestamp; // Data creation time + uint32_t timeout; // Processing timeout + uint64_t request_id = 0; // Request ID + uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 + uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 + uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 + std::vector dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty }; class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { @@ -163,6 +163,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); ge::Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); + ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); + ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector &input_desc, std::vector &output_desc); @@ -262,8 +264,10 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, DynamicSingleOp **single_op); - static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector &input_desc, - const std::vector &inputs, std::vector &output_desc, + static ge::Status ExecuteAsync(DynamicSingleOp *executor, + const std::vector &input_desc, + const std::vector &inputs, + std::vector &output_desc, std::vector &outputs); static ge::Status ReleaseSingleOpResource(void *stream); diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h index 37bca897..4902a021 100644 --- a/inc/framework/generator/ge_generator.h +++ b/inc/framework/generator/ge_generator.h @@ -53,7 +53,7 @@ class GeGenerator { Status GenerateOfflineModel(const Graph &graph, const std::string &file_name_prefix, const std::vector &inputs = std::vector()); - Status GenerateOnlineModel(const Graph &graph, const vector &inputs, ge::ModelBufferData &model); + Status GenerateOnlineModel(const Graph &graph, const vector &inputs, ge::ModelBufferData& model); Status GenerateInfershapeGraph(const Graph &graph); @@ -77,15 +77,16 @@ class GeGenerator { /// @param [in] engine_type: specific engine. /// @param [out] model_buff: model buff of single op. /// @return SUCCESS or FAILED - Status BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, - OpEngineType engine_type, ModelBufferData &model_buff); + Status BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, + const vector &outputs, OpEngineType engine_type, + ModelBufferData &model_buff); private: - Status GenerateModel(const Graph &graph, const string &file_name_prefix, const vector &inputs, - ge::ModelBufferData &model, bool is_offline = true); + Status GenerateModel(const Graph &graph, const string &file_name_prefix, + const vector &inputs, ge::ModelBufferData& model, bool is_offline = true); Status BuildSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, - const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, - bool is_offline = true); + const string &model_file_name, OpEngineType engine_type, + ModelBufferData &model_buff, bool is_offline = true); class Impl; diff --git a/inc/framework/memory/memory_api.h b/inc/framework/memory/memory_api.h index ebb7e68c..7c87fe74 100644 --- a/inc/framework/memory/memory_api.h +++ b/inc/framework/memory/memory_api.h @@ -21,6 +21,7 @@ #include #include "ge/ge_api_error_codes.h" +#include "graph//types.h" #include "runtime/mem.h" namespace ge { @@ -35,6 +36,12 @@ struct HostVarInfo { uint64_t var_size; }; +struct TensorInfo { + std::string var_name; + std::vector dims; + DataType data_type; +}; + /// /// \param size [in] rdma pool memory size to be allocated. /// \param mem_type [in] memory type for rdma pool. @@ -47,6 +54,13 @@ Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_MEMORY_HBM); /// \return Status result of function Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t mem_type = RT_MEMORY_HBM); +/// +/// \param tensor_info [in] description for tensor stored shared memory. +/// \param dev_addr [out] malloced shared memory addr. +/// \param memory_size [out] malloced shared memory size. +/// \return Status result of function +Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size); + /// /// \param var_name [in] var_name name of host variable. /// \param base_addr [out] base_addr vase addr of host variable. diff --git a/inc/framework/memory/memory_assigner.h b/inc/framework/memory/memory_assigner.h index bbec014b..4552fa7c 100644 --- a/inc/framework/memory/memory_assigner.h +++ b/inc/framework/memory/memory_assigner.h @@ -33,7 +33,7 @@ class MemoryAssigner { MemoryAssigner &operator=(const MemoryAssigner &) = delete; - Status AssignMemory(bool is_loop_graph, size_t &mem_offset, size_t &zero_copy_mem_size); + Status AssignMemory(bool is_loop_graph, map &mem_offset, size_t &zero_copy_mem_size); private: ge::ComputeGraphPtr compute_graph_; diff --git a/inc/framework/omg/omg.h b/inc/framework/omg/omg.h index 45a8896d..71f94c98 100644 --- a/inc/framework/omg/omg.h +++ b/inc/framework/omg/omg.h @@ -21,7 +21,6 @@ #include #include #include -#include "framework/common/types.h" #include "framework/omg/omg_inner_types.h" #include "framework/omg/parser/parser_inner_ctx.h" #include "proto/ge_ir.pb.h" @@ -92,8 +91,6 @@ void GetGroupName(ge::proto::ModelDef &model); void FindParserSo(const string &path, vector &fileList, string &caffe_parser_path); -Status CheckCustomAiCpuOpLib(); - Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index 2f91d7aa..c48d1649 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -25,7 +25,6 @@ #include #include #include "framework/common/fmk_error_codes.h" -#include "framework/common/types.h" #include "register/register_fmk_types.h" using domi::DOMI_TENSOR_ND; @@ -92,12 +91,15 @@ struct OmgContext { std::map> out_nodes_map; // user-designate out nodes (this is used for determing the orders) std::vector> user_out_nodes; + // default out nodes (this is used for determing the orders) + std::vector> default_out_nodes; + // save the output node of the network, value = topName, + // topName indicates the output name of the operator. + std::vector user_out_nodes_top_vec; // net out nodes (where user_out_nodes or leaf nodes) std::vector net_out_nodes; // net out nodes top names(only caffe has top) std::vector out_top_names; - // path for the aicpu custom operator so_file - std::vector aicpu_op_run_paths; // preferential format used by the entire network domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED; domi::FrameworkType type = domi::FRAMEWORK_RESERVED; diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h index 00c220f1..94253bf4 100644 --- a/third_party/fwkacllib/inc/hccl/base.h +++ b/third_party/fwkacllib/inc/hccl/base.h @@ -61,6 +61,16 @@ struct model_feature { float *gradient_time; /**< The BP compution time of each gradient */ }; +/** + * @brief Memory Register Address Struct for Remote Access + */ +struct MemRegisterAddr { + u64 addr; + u64 length; +}; + +const u32 HCCL_MAX_MEM_REGISTER_NUM = 8; // The max number of memory register address. + enum GradSplitForceMode { FORCE_NONE, /**< no force */ FORCE_SIZE, /**< force split gradient by size */ diff --git a/third_party/fwkacllib/inc/hccl/hccl_types.h b/third_party/fwkacllib/inc/hccl/hccl_types.h index 276516e7..50a64795 100644 --- a/third_party/fwkacllib/inc/hccl/hccl_types.h +++ b/third_party/fwkacllib/inc/hccl/hccl_types.h @@ -81,6 +81,8 @@ typedef enum { HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ + HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ + HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ HCCL_DATA_TYPE_RESERVED /**< reserved */ } HcclDataType; diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h index 4399d3a8..90b96ac7 100644 --- a/third_party/fwkacllib/inc/hccl/hcom.h +++ b/third_party/fwkacllib/inc/hccl/hcom.h @@ -270,6 +270,15 @@ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmen */ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); +/** + * @brief Register memories and init resources for remote access. + * + * @param addrList memory addresses for remote access. + * @param count number of remote memory addresses. + * @return HcclResult + */ +extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); + #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/mmpa_api.h b/third_party/fwkacllib/inc/mmpa/mmpa_api.h index ce1c9720..f1e30538 100644 --- a/third_party/fwkacllib/inc/mmpa/mmpa_api.h +++ b/third_party/fwkacllib/inc/mmpa/mmpa_api.h @@ -20,7 +20,7 @@ #define LINUX 0 #define WIN 1 -#if(OS_TYPE == LINUX) //lint !e553 +#if(OS_TYPE == LINUX) #ifndef _GNU_SOURCE #define _GNU_SOURCE @@ -84,7 +84,7 @@ #endif -#if(OS_TYPE == WIN) //lint !e553 +#if(OS_TYPE == WIN) #include #include #include "Windows.h" diff --git a/third_party/fwkacllib/inc/ops/aipp.h b/third_party/fwkacllib/inc/ops/aipp.h index 07d25fc7..bed984bd 100644 --- a/third_party/fwkacllib/inc/ops/aipp.h +++ b/third_party/fwkacllib/inc/ops/aipp.h @@ -18,8 +18,8 @@ * \file aipp.h * \brief */ -#ifndef GE_OP_AIPP_H -#define GE_OP_AIPP_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_AIPP_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_AIPP_H_ #include "graph/operator_reg.h" @@ -40,6 +40,8 @@ image normalization (by subtracting the mean value or multiplying a factor), ima *features: The AIPP-processed output tensor of type float16 or uint8. *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. +*@par Restrictions: +*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly. */ REG_OP(Aipp) .INPUT(images, TensorType{DT_UINT8}) @@ -71,4 +73,4 @@ REG_OP(AippData) .OP_END_FACTORY_REG(AippData) } // namespace ge -#endif // GE_OP_AIPP_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_AIPP_H_ diff --git a/third_party/fwkacllib/inc/ops/all_ops.h b/third_party/fwkacllib/inc/ops/all_ops.h index 84ff3d08..1ac83783 100644 --- a/third_party/fwkacllib/inc/ops/all_ops.h +++ b/third_party/fwkacllib/inc/ops/all_ops.h @@ -18,8 +18,8 @@ * \file all_ops.h * \brief */ -#ifndef BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ -#define BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ #include "aipp.h" #include "array_ops.h" @@ -76,4 +76,4 @@ #include "transformation_ops.h" #include "condtake_ops.h" #include "warp_perspective_ops.h" -#endif // BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index 1af02b05..e1f64421 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -18,8 +18,8 @@ * \file array_ops.h * \brief */ -#ifndef GE_OP_ARRAY_OPS_H_ -#define GE_OP_ARRAY_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -659,8 +659,7 @@ REG_OP(IdentityN) .OP_END_FACTORY_REG(IdentityN) /** -*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape is changed, without - changing the data. \n +*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape is changed, without changing the data. \n *@par Inputs: *@li x: A tensor. @@ -738,8 +737,7 @@ REG_OP(Reshape) *x: A tensor. \n *@par Attributes: -*axis: An optional list of int32 or int64. If not specified, squeezes all dimensions of size 1. -If specified, only squeezes the dimensions listed. It is an error to squeeze a dimension that is not 1. \n +*axis: An optional list of int32 or int64. If not specified, squeezes all dimensions of size 1. If specified, only squeezes the dimensions listed. It is an error to squeeze a dimension that is not 1. \n *@par Outputs: *y: A tensor. \n @@ -754,8 +752,7 @@ REG_OP(Squeeze) .OP_END_FACTORY_REG(Squeeze) /** -*@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of -indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n +*@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n *@par Inputs: *x: A tensor. \n @@ -889,14 +886,29 @@ REG_OP(ReadVariableOp) .ATTR(dtype, Int, DT_INT32) .OP_END_FACTORY_REG(ReadVariableOp) +/** +*@brief Mark outputs of one sub graph which partitioned by engine type. + +*@par Inputs: +*x: A tensor. \n + +*@par Outputs: +*y: A tensor. \n + +*@par Attributes: +*@li peerIndex: The index of the corresponding 'placeholder' node it's connected to. +*@li parentOpType: Op type of original node. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ REG_OP(End) .INPUT(x, TensorType::ALL()) .OUTPUT(y, TensorType::ALL()) - .ATTR(peerIndex, Int, 0) // the index of the corresponding 'placeholder' node it's connected to - .ATTR(parentOpType, String, "") // op type of original node + .ATTR(peerIndex, Int, 0) + .ATTR(parentOpType, String, "") .OP_END_FACTORY_REG(End) - /** *@brief Operations for writing summary data, for use in analysis and visualization. @@ -964,8 +976,7 @@ REG_OP(ShapeN) *@par Attributes: *@li dtype: Optional. The data type of the output tensor. Defaults to "int32". -*@li init: An optional bool. If true, initializes the returned tensor with the default value of "dtype". -Defaults to "false". \n +*@li init: An optional bool. If true, initializes the returned tensor with the default value of "dtype". Defaults to "false". \n *@par Outputs: *y: A tensor. \n @@ -1144,4 +1155,4 @@ REG_OP(EditDistance) } // namespace ge -#endif // GE_OP_ARRAY_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/audio_ops.h b/third_party/fwkacllib/inc/ops/audio_ops.h index 149c57d5..d9883253 100644 --- a/third_party/fwkacllib/inc/ops/audio_ops.h +++ b/third_party/fwkacllib/inc/ops/audio_ops.h @@ -18,8 +18,8 @@ * \file audio_ops.h * \brief */ -#ifndef GE_OP_AUDIO_OPS_H_ -#define GE_OP_AUDIO_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_ #include "graph/operator_reg.h" @@ -159,4 +159,4 @@ REG_OP(EncodeWav) .OP_END_FACTORY_REG(EncodeWav) } // namespace ge -#endif // GE_OP_AUDIO_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h index 0e1562c0..8a1c5a7b 100644 --- a/third_party/fwkacllib/inc/ops/batch_ops.h +++ b/third_party/fwkacllib/inc/ops/batch_ops.h @@ -18,8 +18,8 @@ * \file batch_ops.h * \brief */ -#ifndef GE_OP_BATCH_OPS_H_ -#define GE_OP_BATCH_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_BATCH_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_BATCH_OPS_H_ #include "graph/operator_reg.h" @@ -158,4 +158,4 @@ REG_OP(UnbatchGrad) .OP_END_FACTORY_REG(UnbatchGrad) } // namespace ge -#endif // GE_OP_BATCH_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_BATCH_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/bitwise_ops.h b/third_party/fwkacllib/inc/ops/bitwise_ops.h index 5b35a38a..5c83e161 100644 --- a/third_party/fwkacllib/inc/ops/bitwise_ops.h +++ b/third_party/fwkacllib/inc/ops/bitwise_ops.h @@ -18,8 +18,8 @@ * \file bitwise_ops.h * \brief */ -#ifndef GE_OP_BITWISE_OPS_H_ -#define GE_OP_BITWISE_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_BITWISE_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_BITWISE_OPS_H_ #include "graph/operator_reg.h" @@ -56,4 +56,4 @@ REG_OP(RightShift) } // namespace ge -#endif // GE_OP_BITWISE_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_BITWISE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h index f1b4e7a9..550e8b7d 100644 --- a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h +++ b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h @@ -18,8 +18,8 @@ * \file boosted_trees_ops.h * \brief */ -#ifndef GE_OP_BOOSTED_TREES_OPS_H_ -#define GE_OP_BOOSTED_TREES_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_BOOSTED_TREES_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_BOOSTED_TREES_OPS_H_ #include "graph/operator_reg.h" @@ -61,4 +61,4 @@ REG_OP(BoostedTreesBucketize) } // namespace ge -#endif // GE_OP_BOOSTED_TREES_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_BOOSTED_TREES_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h index 9b9ce314..e20607bf 100644 --- a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h +++ b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h @@ -18,8 +18,8 @@ * \file candidate_sampling_ops.h * \brief */ -#ifndef GE_OP_CANDIDATE_SAMPLING_OPS_H_ -#define GE_OP_CANDIDATE_SAMPLING_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_ #include "graph/operator_reg.h" @@ -412,4 +412,4 @@ REG_OP(ComputeAccidentalHits) } // namespace ge -#endif // GE_OP_CANDIDATE_SAMPLING_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/condtake_ops.h b/third_party/fwkacllib/inc/ops/condtake_ops.h index 554c18f1..5e91eb07 100644 --- a/third_party/fwkacllib/inc/ops/condtake_ops.h +++ b/third_party/fwkacllib/inc/ops/condtake_ops.h @@ -18,8 +18,8 @@ * \file condtake_ops.h * \brief */ -#ifndef GE_OP_CONDTAKE_OPS_H_ -#define GE_OP_CONDTAKE_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_CONDTAKE_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_CONDTAKE_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -56,4 +56,4 @@ REG_OP(CondTake) .OP_END_FACTORY_REG(CondTake) } // namespace ge -#endif // GE_OP_ARRAY_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_CONDTAKE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/control_flow_ops.h b/third_party/fwkacllib/inc/ops/control_flow_ops.h index e2fd4715..7196b14f 100644 --- a/third_party/fwkacllib/inc/ops/control_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h @@ -18,8 +18,8 @@ * \file control_flow_ops.h * \brief */ -#ifndef GE_CONTROL_FLOW_OPS_H_ -#define GE_CONTROL_FLOW_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_CONTROL_FLOW_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_CONTROL_FLOW_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -404,4 +404,4 @@ REG_OP(MapIndex) .OP_END_FACTORY_REG(MapIndex) } // namespace ge -#endif // GE_CONTROL_FLOW_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_CONTROL_FLOW_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/ctc_ops.h b/third_party/fwkacllib/inc/ops/ctc_ops.h index 383568dc..2c75fd09 100644 --- a/third_party/fwkacllib/inc/ops/ctc_ops.h +++ b/third_party/fwkacllib/inc/ops/ctc_ops.h @@ -18,8 +18,8 @@ * \file ctc_ops.h * \brief */ -#ifndef GE_OP_CTC_OPS_H -#define GE_OP_CTC_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_ #include "graph/operator.h" #include "graph/operator_reg.h" @@ -139,4 +139,4 @@ REG_OP(CTCBeamSearchDecoder) } // namespace ge -#endif //GE_OP_CTC_OPS_H \ No newline at end of file +#endif // OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h index 3bfcfe01..461b3617 100644 --- a/third_party/fwkacllib/inc/ops/data_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h @@ -18,8 +18,8 @@ * \file data_flow_ops.h * \brief */ -#ifndef GE_OP_DATA_FLOW_OPS_H_ -#define GE_OP_DATA_FLOW_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ #include #include "graph/operator_reg.h" @@ -2242,4 +2242,4 @@ REG_OP(OutfeedEnqueueOp) } // namespace ge -#endif // GE_OP_DATA_FLOW_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index 2313b4a0..536dea63 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -18,8 +18,8 @@ * \file elewise_calculation_ops.h * \brief */ -#ifndef GE_OP_ELEWISE_CALCULATION_OPS_H -#define GE_OP_ELEWISE_CALCULATION_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ #include "graph/operator_reg.h" namespace ge { @@ -1143,6 +1143,9 @@ REG_OP(Add) *@par Third-party framework compatibility: * Compatible with the TensorFlow operator LRN. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(FusedMulAdd) @@ -2464,6 +2467,8 @@ REG_OP(PopulationCount) * @li y3: A Tensor. Must be one of the following types: float16, float32. * @li y4: A Tensor. Must be one of the following types: float16, float32. \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LambNextMVWithDecay) .INPUT(input_mul3, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2510,6 +2515,9 @@ REG_OP(LambNextMVWithDecay) *@li y2: A Tensor. Has the same type as "input_mul3". *@li y3: A Tensor. Has the same type as "input_mul3". *@li y4: A Tensor. Has the same type as "input_mul3". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LambNextMV) .INPUT(input_mul3, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2548,6 +2556,8 @@ REG_OP(LambNextMV) * @li y1: A Tensor of the same type as "input_square". * @li y2: A Tensor of the same type as "input_square". \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LambNextRight) .INPUT(input_square, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2578,6 +2588,8 @@ REG_OP(LambNextRight) *@par Outputs: *y: A Tensor of the same type as "input_greater1". \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LambUpdateWithLr) .INPUT(input_greater1, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2608,6 +2620,8 @@ REG_OP(LambUpdateWithLr) *@par Outputs: *y: A Tensor of the same type as input. \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LambUpdateWithLrV2) .INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2643,6 +2657,8 @@ REG_OP(LambUpdateWithLrV2) * @li output1: A Tensor. Must be one of the following types: float16, float32. * @li output2: A Tensor. Must be one of the following types: float16, float32. \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(AdamApplyOneWithDecay) .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2683,6 +2699,8 @@ REG_OP(AdamApplyOneWithDecay) * @li output1: A Tensor. Must be one of the following types: float16, float32. * @li output2: A Tensor. Must be one of the following types: float16, float32. \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(AdamApplyOne) .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2723,6 +2741,8 @@ REG_OP(AdamApplyOne) * @li output1: A Tensor. Must be one of the following types: float16, float32. * @li output2: A Tensor. Must be one of the following types: float16, float32. \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(AdamApplyOneWithDecayAssign) .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2763,6 +2783,8 @@ REG_OP(AdamApplyOneWithDecayAssign) * @li output1: A Tensor. Must be one of the following types: float16, float32. * @li output2: A Tensor. Must be one of the following types: float16, float32. \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(AdamApplyOneAssign) .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2793,6 +2815,8 @@ REG_OP(AdamApplyOneAssign) *@par Outputs: *y: A Tensor of the same type as "x". \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ClipByNormNoDivSum) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2817,6 +2841,9 @@ REG_OP(ClipByNormNoDivSum) *Two outputs, including: \n *@li y1: A Tensor. Has the same type as "x". *@li y2: A Tensor. Has the same type as "x". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(SquareSumV2) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2839,6 +2866,9 @@ REG_OP(SquareSumV2) *@par Outputs: y: A Tensor. Has the same type as "x". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(SquareSumV1) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2857,6 +2887,9 @@ REG_OP(SquareSumV1) *@par Outputs: y1: A Tensor. Has the same type as "x1".The result of "x1". y2: A Tensor. Has the same type as "x2".The result of "x2". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(SquareSumAll) .INPUT(x1, TensorType({DT_FLOAT})) @@ -2876,6 +2909,9 @@ REG_OP(SquareSumAll) *@par Outputs: * y: A Tensor. Has the same type as "x1". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(FusedMulAddN) .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) @@ -2942,6 +2978,9 @@ If false, don’t keep these dimensions. Default:False. \n *@par Outputs: *@li output0: A Tensor result of which input0 dot multily input1. *@li output1: A Tensor result of which input0 dot multily input1, then reducesum it. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ConfusionMulGrad) .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2965,6 +3004,9 @@ REG_OP(ConfusionMulGrad) *@li y1: A Tensor of shape and dtype of first output, which should have \n shape (1,) and dtype as input. *@li y2: A Tensor of shape and dtype of second output, should be same shape and type as input. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(FusedMulAddNL2loss) .INPUT(x1, TensorType::NumberType()) @@ -3186,6 +3228,9 @@ REG_OP(KLDiv) *y: A Tensor. Has the same type as "x". \n *@par Third-party framework compatibility + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(TensorMove) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL})) @@ -3197,23 +3242,21 @@ REG_OP(TensorMove) *@par Inputs: *One inputs, including: -* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32, bool. \n +* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. \n *@par Outputs: -*x: A Tensor. Has the same type as "x". \n +*output_x: A Tensor. Has the same type as "x". \n *@par Third-party framework compatibility */ REG_OP(TensorRedirect) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, - DT_INT64, DT_INT16, DT_UINT16, DT_DOUBLE, - DT_COMPLEX64})) + DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32})) .OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, - DT_INT64, DT_INT16, DT_UINT16, DT_DOUBLE, - DT_COMPLEX64})) + DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32})) .OP_END_FACTORY_REG(TensorRedirect) } // namespace ge -#endif // GE_OP_ELEWISE_CALCULATION_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h index 1e67c41f..598d3ad3 100644 --- a/third_party/fwkacllib/inc/ops/functional_ops.h +++ b/third_party/fwkacllib/inc/ops/functional_ops.h @@ -18,8 +18,8 @@ * \file functional_ops.h * \brief */ -#ifndef GE_FUNCTIONAL_OPS_H_ -#define GE_FUNCTIONAL_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -330,4 +330,4 @@ REG_OP(StatefulPartitionedCall) } // namespace ge -#endif // GE_FUNCTIONAL_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/get_data_ops.h b/third_party/fwkacllib/inc/ops/get_data_ops.h index 33a64903..33dc4f14 100644 --- a/third_party/fwkacllib/inc/ops/get_data_ops.h +++ b/third_party/fwkacllib/inc/ops/get_data_ops.h @@ -18,8 +18,8 @@ * \file get_data_ops.h * \brief */ -#ifndef GE_OP_GET_DATA_OPS_H_ -#define GE_OP_GET_DATA_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_GET_DATA_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_GET_DATA_OPS_H_ #include "graph/operator_reg.h" @@ -100,4 +100,4 @@ REG_OP(DeviceQueueDataset) } // namespace ge -#endif // GE_OP_GET_DATA_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_GET_DATA_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/ops/hcom_ops.h index 7e985efc..1fe9055c 100644 --- a/third_party/fwkacllib/inc/ops/hcom_ops.h +++ b/third_party/fwkacllib/inc/ops/hcom_ops.h @@ -18,8 +18,8 @@ * \file hcom_ops.h * \brief huawei collective communication library ops. */ -#ifndef GE_OP_HCOM_OPS_H_ -#define GE_OP_HCOM_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_ #include "graph/operator_reg.h" @@ -41,8 +41,8 @@ namespace ge { as the name of a world group. */ REG_OP(HcomAllGather) - .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) + .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) .REQUIRED_ATTR(rank_size, Int) .REQUIRED_ATTR(group, String) .ATTR(alpha, Float, 1.0) @@ -99,8 +99,8 @@ REG_OP(HcomAllReduce) as the name of a world group. */ REG_OP(HcomBroadcast) - .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) - .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) .REQUIRED_ATTR(root_rank, Int) .REQUIRED_ATTR(group, String) .ATTR(alpha, Float, 1.0) @@ -157,7 +157,7 @@ REG_OP(HcomReduceScatter) * @see HcomReceive */ REG_OP(HcomSend) - .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) + .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) .REQUIRED_ATTR(group, String) .REQUIRED_ATTR(sr_tag, Int) .REQUIRED_ATTR(dest_rank, Int) @@ -190,7 +190,7 @@ REG_OP(HcomSend) * @see HcomSend */ REG_OP(HcomReceive) - .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) .REQUIRED_ATTR(group, String) .REQUIRED_ATTR(sr_tag, Int) .REQUIRED_ATTR(src_rank, Int) @@ -200,5 +200,30 @@ REG_OP(HcomReceive) .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomReceive) +/** + * @brief Performs Remote Read of input tensors + * @par Inputs: + * remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length + * @par Outputs: + * local: A Tensor. whose value is length / size_of(Type) + */ +REG_OP(HcomRemoteRead) + .INPUT(remote, TensorType({DT_INT64, DT_UINT64})) + .OUTPUT(local, TensorType::ALL()) + .REQUIRED_ATTR(dtype, Type) + .OP_END_FACTORY_REG(HcomRemoteRead) + +/** + * @brief Performs Remote Write of input tensors + * @par Inputs: + * remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length + * @par Inputs: + * local: A Tensor. whose value is length / size_of(Type) + */ +REG_OP(HcomRemoteWrite) + .INPUT(remote, TensorType({DT_INT64, DT_UINT64})) + .INPUT(local, TensorType::ALL()) + .OP_END_FACTORY_REG(HcomRemoteWrite) + } // namespace ge -#endif // GE_OP_HCOM_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/hvd_ops.h b/third_party/fwkacllib/inc/ops/hvd_ops.h index bde8486c..a49ec5ed 100644 --- a/third_party/fwkacllib/inc/ops/hvd_ops.h +++ b/third_party/fwkacllib/inc/ops/hvd_ops.h @@ -18,8 +18,8 @@ * \file hvd_ops.h * \brief Horovod collective communication library ops. */ -#ifndef GE_OP_HVD_OPS_H_ -#define GE_OP_HVD_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_HVD_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_HVD_OPS_H_ #include "graph/operator_reg.h" @@ -78,4 +78,4 @@ REG_OP(HorovodBroadcast) .OP_END_FACTORY_REG(HorovodBroadcast) } // namespace ge -#endif // GE_OP_HVD_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_HVD_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index 27fb79a9..ce3262f9 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -18,8 +18,8 @@ * \file image_ops.h * \brief */ -#ifndef GE_OP_MAGE_OPS_H_ -#define GE_OP_MAGE_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ #include "graph/operator_reg.h" @@ -1344,4 +1344,4 @@ REG_OP(SpatialTransformerD) } // namespace ge -#endif // GE_OP_MAGE_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/internal_ops.h b/third_party/fwkacllib/inc/ops/internal_ops.h index 2f9906fc..9dde14a5 100644 --- a/third_party/fwkacllib/inc/ops/internal_ops.h +++ b/third_party/fwkacllib/inc/ops/internal_ops.h @@ -18,8 +18,8 @@ * \file internal_ops.h * \brief */ -#ifndef GE_OP_INTERNAL_OPS_H_ -#define GE_OP_INTERNAL_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_INTERNAL_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_INTERNAL_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -68,6 +68,9 @@ REG_OP(CacheUpdate) *@par Outputs: *The output is dynamic for attribute func_name. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(InternalDataMove) .INPUT(x, TensorType::ALL()) @@ -78,4 +81,4 @@ REG_OP(InternalDataMove) } // namespace ge -#endif // GE_OP_INTERNAL_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_INTERNAL_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h index 5d98f999..7a6fbc59 100644 --- a/third_party/fwkacllib/inc/ops/linalg_ops.h +++ b/third_party/fwkacllib/inc/ops/linalg_ops.h @@ -18,8 +18,8 @@ * \file linalg_ops.h * \brief */ -#ifndef GE_OP_LINALG_OPS_H_ -#define GE_OP_LINALG_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -432,4 +432,4 @@ REG_OP(TridiagonalSolve) } // namespace ge -#endif // GE_OP_LINALG_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/logging_ops.h b/third_party/fwkacllib/inc/ops/logging_ops.h index db9097ce..bc8ae2b8 100644 --- a/third_party/fwkacllib/inc/ops/logging_ops.h +++ b/third_party/fwkacllib/inc/ops/logging_ops.h @@ -18,8 +18,8 @@ * \file logging_ops.h * \brief */ -#ifndef GE_OP_LOGGING_OPS_H -#define GE_OP_LOGGING_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_LOGGING_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_LOGGING_OPS_H_ #include "graph/operator.h" #include "graph/operator_reg.h" @@ -113,4 +113,4 @@ REG_OP(PrintV2) .OP_END_FACTORY_REG(PrintV2) } // namespace ge -#endif // GE_OP_LOGGING_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_LOGGING_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/lookup_ops.h b/third_party/fwkacllib/inc/ops/lookup_ops.h index 84b138c4..b37ab048 100644 --- a/third_party/fwkacllib/inc/ops/lookup_ops.h +++ b/third_party/fwkacllib/inc/ops/lookup_ops.h @@ -18,8 +18,8 @@ * \file lookup_ops.h * \brief */ -#ifndef GE_OP_LOOKUP_OPS_H_ -#define GE_OP_LOOKUP_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_ #include "graph/operator_reg.h" @@ -305,4 +305,4 @@ REG_OP(MutableHashTable) .OP_END_FACTORY_REG(MutableHashTable) } // namespace ge -#endif // GE_OP_LOOKUP_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 3d7ff1d9..2b0783bf 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -18,8 +18,8 @@ * \file math_ops.h * \brief */ -#ifndef GE_OP_MATH_OPS_H_ -#define GE_OP_MATH_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -511,6 +511,23 @@ REG_OP(IsFinite) .OUTPUT(y, TensorType({DT_BOOL})) .OP_END_FACTORY_REG(IsFinite) +/** + * *@brief Compute element-wise infiniteness, return a boolean tensor. + * + * *@par Inputs: + * *x:A Tensor. + * + * *@par Outputs: + * *y:A Tensor. Has the same shape as x. + * + * *@par Third-party framework compatibility. + * *Compatible with tensorflow IsInf operator. + * */ +REG_OP(IsInf) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_BOOL})) + .OP_END_FACTORY_REG(IsInf) + /** * *@brief Computes the complex absolute value of a tensor. * @@ -677,4 +694,4 @@ REG_OP(IFMR) .OP_END_FACTORY_REG(IFMR) } // namespace ge -#endif // GE_OP_MATH_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index bceff0cd..ed23d3f6 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -18,8 +18,8 @@ * \file matrix_calculation_ops.h * \brief */ -#ifndef GE_OP_MATRIX_CALCULATION_OPS_H -#define GE_OP_MATRIX_CALCULATION_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ #include "graph/operator_reg.h" @@ -95,6 +95,10 @@ REG_OP(MatMulV2) /** *@brief Performs Matrix-to-matrix Multiply, producing c=alpha[0]*a*b+beta[0]*c . \n +*@attention Constraints: +* For better performance, The k-axis must be aligned to 16 (input type +* is float16) or 32 (input type is int8). \n + *@par Inputs: *Five inputs, including: *@li a: A matrix Tensor. Must be one of the following types: float16, int8. @@ -398,8 +402,8 @@ REG_OP(TensorScatterUpdate) *Must be one of the following types: float16, float32, int32, int8, uint8 *@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", the operation - * will be protected by a lock . \n +* use_locking: An optional bool. Defaults to "False". If "True", the operation +* will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -430,7 +434,7 @@ REG_OP(ScatterAdd) *@par Attributes: *@li use_locking: An optional bool. Defaults to "False". If "True", - * the operation will be protected by a lock . \n +* the operation will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -459,7 +463,7 @@ REG_OP(ScatterDiv) *Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", - * the operation will be protected by a lock . \n +* the operation will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -488,7 +492,7 @@ REG_OP(ScatterNdAdd) *Must be one of the following types: int32 *@li updates: An ND Tensor. \n -*Must be one of the following types: float16, float32, int32, int8, uint8 +* Must be one of the following types: float16, float32, int32, int8, uint8 *@par Outputs: *y: A Tensor. Has the same type and format as input "x" . \n @@ -517,10 +521,10 @@ REG_OP(TensorScatterAdd) *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", - * the operation will be protected by a lock . \n +* the operation will be protected by a lock . \n *@par Outputs: -*var: A Tensor. Has the same type and format as input "var" . \n +* var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterNdSub. @@ -549,7 +553,7 @@ REG_OP(ScatterNdSub) *Must be one of the following types: float16, float32, int32, int8, uint8 *@par Outputs: -*y: A Tensor. Has the same type and format as input "x" . \n +* y: A Tensor. Has the same type and format as input "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator TensorScatterSub. @@ -574,10 +578,10 @@ REG_OP(TensorScatterSub) *Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", - * the operation will be protected by a lock . \n +* the operation will be protected by a lock . \n *@par Outputs: -*var: A Tensor. Has the same type and format as input "var" . \n +* var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterSub. @@ -647,7 +651,7 @@ REG_OP(DiagPart) *@li num_output: Reserved. *@li transpose: A bool, specifying weight whether to transpose, either "true" or "false". Defaults to "false". *@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1. - * The product of the subsequent dimensions starting form first dimension or the second dimension is "K". +* The product of the subsequent dimensions starting form first dimension or the second dimension is "K". *@li offset_x: Reserved . \n *@par Outputs: @@ -764,7 +768,7 @@ REG_OP(ConfusionMatrix) *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", the operation - * will be protected by a lock . \n +* will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -797,7 +801,7 @@ REG_OP(ScatterMul) *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", the operation - * will be protected by a lock . \n +* will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -830,7 +834,7 @@ REG_OP(ScatterMin) *@par Attributes: *use_locking: An optional bool. Defaults to "False". - * If "True", the operation will be protected by a lock . \n +* If "True", the operation will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -863,7 +867,7 @@ REG_OP(ScatterMax) *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", - * the operation will be protected by a lock . \n +* the operation will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -977,4 +981,4 @@ REG_OP(MatrixDiagV2) } // namespace ge -#endif // GE_OP_MATRIX_CALCULATION_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/max_pool_v3.h b/third_party/fwkacllib/inc/ops/max_pool_v3.h new file mode 100644 index 00000000..960ea03e --- /dev/null +++ b/third_party/fwkacllib/inc/ops/max_pool_v3.h @@ -0,0 +1,77 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef OPS_BUILT_IN_OP_PROTO_INC_MAX_POOL_V3_H_ +#define BUILT_IN_OP_PROTO_INC_MAX_POOL_V3_H_ + +#include "graph/operator_reg.h" + +namespace ge { + +/** +* @brief Performs max pooling on the input . \n + +* @par Inputs: +* One input: +* x: An NC1HWC0 Tensor. Supported type:float16, float32, double, int8, int16, +* int32, int64, uint8, uint16, qint8 + +* @par Attributes: +* @li ksize: A required list of int8, int16, int32, or int64 values, +* specifying the size of the window for each dimension of the input tensor. +* No default value. +* @li strides: A required list of int8, int16, int32, or int64 values, +* specifying the stride of the sliding window for each dimension of +* the input tensor. No default value. +* @li padding_mode: A required string. Defaults to "CALCULATED". +* @li pads:A required list of int8, int16, int32, or int64 values, +* a data to caculate when padding_mode is "SAME" and "CALCULATED". +* @li data_format: An optional string. Defaults to "NHWC" . +* @li global_pooling bool, Whether to use the global pooling. +* If global_pooling = true, kernel size and paddings will be ignored. +* Default False +* @li ceil_mode:global_pooling (bool) – (bool) Whether to use the global pooling. +* If global_pooling = true, kernel size and paddings will be ignored. +* Default False \n + +* @par Outputs: +* y: A Tensor. Has the same type and format as input "x" . \n + +* @attention Constraints: +* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, +* ksize[1] * ksize[2] <= 255. +* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, +* strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. +* @li "padding" is "SAME" "VALID" or "CACULATE" . + + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator MaxPool. +*/ +REG_OP(MaxPoolV3) + .INPUT(x,TensorType({DT_FLOAT16, DT_FLOAT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32})) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .ATTR(padding_mode, String, "CALCULATED") + .ATTR(pads, ListInt, {0,0,0,0}) + .ATTR(data_format, String, "NCHW") + .ATTR(global_pooling,Bool,false) + .ATTR(ceil_mode, Bool, false) + .OP_END_FACTORY_REG(MaxPoolV3) +} // namespace ge + +#endif // OPS_BUILT_IN_OP_PROTO_INC_MAX_POOL_V3_H_ diff --git a/third_party/fwkacllib/inc/ops/max_pool_v3_grad.h b/third_party/fwkacllib/inc/ops/max_pool_v3_grad.h new file mode 100644 index 00000000..fbb96bdf --- /dev/null +++ b/third_party/fwkacllib/inc/ops/max_pool_v3_grad.h @@ -0,0 +1,80 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + *\file max_pool_v3_grad.h + *\brief + */ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_MAX_POOL_V3_GRAD_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_MAX_POOL_V3_GRAD_H_ + +#include "graph/operator_reg.h" + +namespace ge { + +/** +* @brief Computes gradients of the maxpooling function . \n + +* @par Inputs: +* @li orig_input: A mutable NC1HWC0 tensor of type RealNumberType. +* @li orig_output: A mutable NC1HWC0 tensor of type RealNumberTypex. +* @li grad: A mutable NC1HWC0 tensor of type RealNumberType . \n + +* @par Attributes: +* @li ksize: A required list of int8, int16, int32, or int64 values, +* specifying the size of the window for each dimension of the input tensor. +* No default value. +* @li strides: A required list of int8, int16, int32, or int64 values, +* specifying the stride of the sliding window for each dimension of +* the input tensor. No default value. +* @li padding_mode: A required string. Defaults to "CALCULATED". +* @li pads:A required list of int8, int16, int32, or int64 values, +* a data to caculate when padding_mode is "SAME" and "CALCULATED". +* @li data_format: An optional string. Defaults to "NHWC" . +* @li global_pooling bool, Whether to use the global pooling. +* If global_pooling = true, kernel size and paddings will be ignored. +* Default False +* @li ceil_mode:global_pooling (bool) – (bool) Whether to use the global pooling. +* If global_pooling = true, kernel size and paddings will be ignored. +* Default False \n + +* @par Outputs: +* y: A mutable tensor. Has the same shape and type as "x1" . \n + +* @attention Constraints: +* @li Computing gradients of global pooling is not supported, which means +* "ksize < x1". +* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator MaxPoolGrad. +*/ +REG_OP(MaxPoolV3Grad) + .INPUT(orig_input, TensorType::RealNumberType()) + .INPUT(orig_output, TensorType::RealNumberType()) + .INPUT(grad, TensorType::RealNumberType()) + .OUTPUT(out_grad, TensorType::RealNumberType()) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .ATTR(padding_mod, String, "CALCULATED") + .ATTR(pads, ListInt, {0, 0, 0, 0}) + .ATTR(data_format, String, "NCHW") + .ATTR(global_pooling, Bool, false) + .ATTR(ceil_mode, Bool, false) + .OP_END_FACTORY_REG(MaxPoolV3Grad) +} // namespace ge + +#endif // OPS_BUILT_IN_OP_PROTO_INC_MAX_POOL_V3_GRAD_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index 4fa85cbc..0c6a5dff 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -18,8 +18,8 @@ * \file nn_batch_norm_ops.h * \brief */ -#ifndef GE_OP_NN_BATCH_NORM_OPS_H -#define GE_OP_NN_BATCH_NORM_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_BATCH_NORM_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_NN_BATCH_NORM_OPS_H_ #include "graph/operator_reg.h" @@ -378,4 +378,4 @@ REG_OP(BNInferenceD) } // namespace ge -#endif // GE_OP_NN_BATCH_NORM_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_BATCH_NORM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index 5a02c1ca..e2d610c5 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -18,8 +18,8 @@ * \file nn_calculation_ops.h * \brief */ -#ifndef GE_OP_NN_CALCULATION_OPS_H -#define GE_OP_NN_CALCULATION_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ #include "graph/operator_reg.h" @@ -460,9 +460,9 @@ REG_OP(Conv2DBackpropInputD) *@par Attributes: * Six attributes: * @li strides: A tuple or list of 2 integers. The stride of the sliding window - * for H/W dimension. + * for H/W dimension, defaults to [1,1]. * @li pads: A tuple or list of 4 integers. The [top, bottom, left, right] - * padding on the feature map. + * padding on the feature map, defaults to [0,0,0,0]. * @li dilations: A tuple or list of 4 integers. The dilation factor for each * dimension of input, defaults to [1,1,1,1]. * @li groups: Number of blocked connections from input channels to @@ -482,8 +482,8 @@ REG_OP(Deconvolution) .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) - .REQUIRED_ATTR(strides, ListInt) - .REQUIRED_ATTR(pads, ListInt) + .ATTR(strides, ListInt, {1, 1}) + .ATTR(pads, ListInt, {0, 0, 0, 0}) .ATTR(dilations, ListInt, {1, 1, 1, 1}) .ATTR(groups, Int, 1) .ATTR(data_format, String, "NCHW") @@ -585,14 +585,138 @@ REG_OP(Conv2DBackpropFilterD) /** *@brief Computes a 2D convolution given 4D "x" and "filter" tensors. *@par Inputs: +*@li x: A 4D tensor of input images. With "NHWC" format, the shape is +* [batch, in_height, in_width, in_channels]. +*@li filter: A 4D tensor of filters. Has the same type as "x". With "HWCN" +* format, the shape is [filter_height, filter_width, in_channels, +* out_channels]. + +*@li bias: An optional 1D tensor. Shape is [out_channels]. +*@li offset_w: An optional 1D tensor for quantized convolution. Shape is +* [out_channels]. Not supported. +*\n +*\n +* Note that there is a strict data type mapping between the input and output +* tensors: +*@verbatim + |Tensor | x | filter | bias | offset_w | y + -----------|---------|---------|---------|----------|-------- + |Data Type | float16 | float16 | float16 | _ | float16 + | |---------|---------|---------|----------|-------- + | | float32 | float32 | float32 | _ | float32 + | |---------|---------|---------|----------|-------- + | | int8 | int8 | int32 | int8 | int32 + -----------|---------|---------|---------|----------|-------- + |Format | NCHW | NCHW | ND | ND | NCHW + | | NHWC | HWCN | | | NHWC +@endverbatim +* Type float32 is allowed only in mixed precision (float32->float16) scenarios. +* Mixed precision is enabled by default. +* \n +* +*@par Attributes: +*@li strides: Required. A list of 4 integers. Specifying the strides of the +* convolution along the height and width. The dimension order is determined +* by the data format of "x". By default the N and C dimensions are set to 1. +*@li pads: Required. A list of 4 integers. Specifying the top, bottom, left +* and right padding. +* @li dilations: Optional. A list of 4 integers. Specifying the dilation rate +* to use for dilated convolution. Has the same dimension order and value as +* "strides". Dilation > 1 is not supported for quantized convolution. Defaults +* to [1, 1, 1, 1]. +* @li groups: Optional. An integer of type int32, for the number of blocked +* connections from input channels to output channels. Input channels and output +* channels must both be divisible by "groups". "x" in_channels must be equal to +* "filter" in_channels * groups. Defaults to 1. +* @li offset_x: Optional. An integer of type int32, for quantized convolution. +* Defaults to 0. +* @li data_format: Reserved and optional. A string from: "NHWC" and "NCHW". +* Specifying the data format of the input and output images. Defaults to +* "NHWC". +*\n +*\n +* The following value range restrictions must be met: +*@verbatim + |Name | Field | Scope + ------------------|----------|---------- + |Input Image Size | H | [1, 100000] + | | W | [1, 4096] + ------------------|----------|---------- + |Filter Size | H | [1, 255] + | | W | [1, 255] + ------------------|----------|---------- + |Stride | H | [1, 63] + | | W | [1, 63] + ------------------|----------|---------- + |Padding | top | [0, 255] + | | bottom | [0, 255] + | | left | [0, 255] + | | right | [0, 255] + ------------------|----------|---------- + |Dilation | H | [1, 255] + | | W | [1, 255] +@endverbatim +* +*@par Outputs: +*@li y: A 4D Tensor of output images. Has the same type and format as "x". With +* "NHWC" format, the shape is [batch, out_height, out_width, out_channels]. +*\n +* out_height = (in_height + top_pad + bottom_pad - +* dilation_h * (filter_height - 1) - 1) +* / stride_h + 1 +*\n +* out_width = (in_width + left_pad + right_pad - +* dilation_w * (filter_width - 1) - 1) +* / stride_w + 1 +* +*@attention Constraints: +*@li The following restrictions on the output must be met: +*@verbatim + | Output | Restrictions + -------------------|--------------------------- + | W dimension == 1 | H*W(input) == H*W(filter) + | H dimension == 1 | + -------------------|--------------------------- + | W dimension == 1 | Not supported + | H dimension != 1 | +@endverbatim +* "H * W (input)" indicates the image size after padding and "H * W (filter)" +* indicates the filter size after dilation. +*\n +* +*@par Quantization supported or not +*@li Yes +* +*@par Third-party framework compatibility +*@li Compatible with the TensorFlow operator "conv2d". +*@li Compatible with the Caffe operator 2D "Convolution". +*/ +REG_OP(Conv2D) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) + .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(dilations, ListInt, {1, 1, 1, 1}) + .ATTR(groups, Int, 1) + .ATTR(data_format, String, "NHWC") + .ATTR(offset_x, Int, 0) + .OP_END_FACTORY_REG(Conv2D) + +/** +*@brief Computes a 2D convolution given 4D "x" and "filter_compress" tensors. +*@par Inputs: * @li x: A 4D tensor of input images. -* @li filter: A 4D tensor of filters. +* @li filter_compress: A 4D tensor of compressed filters. +* @li compress_index: A 1D Tensor dtype of int8. * @li bias: An optional 1D tensor. * @li offset_w: An optional 1D tensor for quantized convolution. Reserved. * * The input and output tensor attributes are listed as follows: * @verbatim - |Tensor | x | filter | bias | offset_w | y + |Tensor | x | filter_compress | bias | offset_w | y -----------|---------|---------|---------|----------|-------- |Data Type | float16 | float16 | float16 | _ | float16 | |---------|---------|---------|----------|-------- @@ -618,48 +742,127 @@ REG_OP(Conv2DBackpropFilterD) * @li groups: Number of blocked connections from input channels to output * channels. Input channels and output channels must both be divisible by * "groups".Type is int32. -* @li offset_x: An optional integer for quantized convolution. Type is int32. Defaults to "0". +* @li offset_x: An optional integer for quantized convolution. Type is int32. +* Defaults to "0". * @li data_format: An optional string from: "NHWC", "NCHW". Specifying the -* data format of the input and output images. Type is string. Defaults to "NHWC". Reserved . \n +* data format of the input and output images. Type is string. +* Defaults to "NHWC". Reserved . \n *@par Outputs: * @li y: A 4D Tensor of output images . \n +*@par Restrictions: +*Warning: THIS FUNCTION IS DEPRECATED. +*/ +REG_OP(Conv2DCompress) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) + .INPUT(filter_compress, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) + .INPUT(compress_index, TensorType({DT_INT8})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(dilations, ListInt, {1, 1, 1, 1}) + .ATTR(groups, Int, 1) + .ATTR(data_format, String, "NHWC") + .ATTR(offset_x, Int, 0) + .OP_END_FACTORY_REG(Conv2DCompress) + +/** +*@brief Computes a 2D convolution given 4D "x", "filter" and "offsets" +* tensors. +*@par Inputs: +* @li x: A 4D tensor of input images. With shape of +* [batch, in_height, in_width, in_channels] when format is "NHWC". +* @li filter: A 4D tensor of filters. Must have the same type as "x". With +* shape of [filter_height, filter_width, in_channels, out_channels] when format +* is "HWCN". +* @li offsets: A 4D tensor of offsets. With shape of +* [batch, deformable_groups * filter_height * filter_width * 3, in_height, +* in_width] when format is "NCHW". +* @li bias: An optional 1D tensor. Shape is [out_channels]. +* +* The input and output tensor attributes are listed as follows: +* @verbatim + |Tensor | x | filter | offsets | bias | y + -----------|---------|---------|---------|----------|-------- + |Data Type | float16 | float16 | float16 | float16 | float16 + -----------|---------|---------|---------|----------|-------- + |Format | NCHW | NCHW | NCHW | ND | NCHW + | | NHWC | HWCN | | | NHWC +@endverbatim +* It should be noted that the data types must correspond to each other, but +* the format does not need to. + +*@par Attributes: +* @li strides: Required. A list of 4 integers. Specifying the strides of the +* convolution along the height and width. The dimension order is determined +* by the data format of "x". By default the N and C dimensions are set to 1. +* @li pads: Required. A list of 4 integers. Specifying the top, bottom, left +* and right padding. +* @li dilations: Optional. A list of 4 integers. Specifying the dilation rate +* to use for dilated convolution. Has the same dimension order and value as +* "strides". +* @li groups: Optional. Number of blocked connections from input channels to +* output channels. Input channels and output channels must both be divisible +* by "groups".Type is int32. +* @li data_format: Optional. An optional string from: "NHWC", "NCHW". Specifying the +* data format of the input and output images. Type is string. Defaults to +* "NHWC". Reserved. +* @li deformable_groups: Optional. Cut the c chanel of input X into deformable_groups, +* each share a different offsets. Input channels must be divisible by +* "deformable_groups". Type is int32. + +*@par Outputs: +* @li y: A 4D Tensor of output images. Must have the same type and format as +* "x". With shape of [batch, out_channels, out_height, out_width] when format +* is "NHWC". +* @li output_height = (in_height + top_pad + botton_pad - +* dilation_h * (filter_height - 1) -1) / stride_h + 1 +* @li output_width = (in_width + left_pad + right_pad - +* dilation_w * (filter_width - 1) -1) / stride_w + 1 + *@attention * @li The parameter scope is listed as follows: * @verbatim |Name | Field | Scope - ------------------|--------------|---------- - |Input Image Size | H dimension | [1, 4096] - | | W dimension | [1, 4096] - ------------------|--------------|---------- + ------------------|--------------|---------------------------------------- + |Input Image Size | H dimension | 1 <= in_height * filter_height <= 4096 + | | W dimension | 1 <= in_width * filter_width <=4096 + ------------------|--------------|---------------------------------------- |Filter Size | H dimension | [1, 255] | | W dimension | [1, 255] - ------------------|--------------|---------- + ------------------|--------------|---------------------------------------- + |offsets Size | C dimension | offsets_c = deformable_groups * + | | | filter_width * filter_height * 3 + | | H dimension | the same as output H dimension + | | W dimension | the same as output W dimension + ------------------|--------------|---------------------------------------- |Stride Size | H dimension | [1, 63] | | W dimension | [1, 63] - ------------------|--------------|---------- + ------------------|--------------|---------------------------------------- |Padding Size | top side | [0, 255] | | bottom side | [0, 255] | | left side | [0, 255] | | right side | [0, 255] - ------------------|--------------|---------- + ------------------|--------------|---------------------------------------- |Dilation Size | H dimension | [1, 255] - | W dimension | [1, 255] + | | W dimension | [1, 255] @endverbatim * @li There are restrictions for certain scenarios: * @verbatim - Output | Restrictions - ------------------|---------------------------------------------- - W dimension == 1 | HxW(input) == HxW(filter) - H dimension == 1 | - ------------------|---------------------------------------------- - W dimension == 1 | Not supported - H dimension != 1 | + | Output | Restrictions + -------------------|--------------------------- + | W dimension == 1 | HxW(input) == HxW(filter) + | H dimension == 1 | + -------------------|--------------------------- + | W dimension == 1 | Not supported + | H dimension != 1 | @endverbatim * As shown above, "HxW(input)" indicates the image size after padding and -* "HxW(filter)" indicates the filter size after dilation . \n +* "HxW(filter)" indicates the filter size after dilation. *@par Quantization supported or not * Yes @@ -668,34 +871,19 @@ REG_OP(Conv2DBackpropFilterD) *@li Compatible with the TensorFlow operator "conv2d". *@li Compatible with the Caffe operator 2D "Convolution". */ -REG_OP(Conv2D) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) - .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) - .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) - .REQUIRED_ATTR(strides, ListInt) - .REQUIRED_ATTR(pads, ListInt) - .ATTR(dilations, ListInt, {1, 1, 1, 1}) - .ATTR(groups, Int, 1) - .ATTR(data_format, String, "NHWC") - .ATTR(offset_x, Int, 0) - .OP_END_FACTORY_REG(Conv2D) - -REG_OP(Conv2DCompress) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) - .INPUT(filter_compress, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) - .INPUT(compress_index, TensorType({DT_INT8})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) - .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) +REG_OP(DeformableConv2D) + .INPUT(x, TensorType({DT_FLOAT16})) + .INPUT(filter, TensorType({DT_FLOAT16})) + .INPUT(offsets, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT16})) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1}) .ATTR(groups, Int, 1) .ATTR(data_format, String, "NHWC") - .ATTR(offset_x, Int, 0) - .OP_END_FACTORY_REG(Conv2DCompress) + .ATTR(deformable_groups, Int, 1) + .OP_END_FACTORY_REG(DeformableConv2D) /** *@brief Computes a 3D convolution given 5D "x" and "filter" tensors. @@ -1206,5 +1394,39 @@ REG_OP(Conv2DTransposeD) .ATTR(offset_x, Int, 0) .OP_END_FACTORY_REG(Conv2DTransposeD) +/** +*@brief In the deformable convolution operator, the original input FeatureMap is expanded to a ksize_y * H * ksize_x *W +*FeatureMap by bilinear interpolation according to the offset offset. +*@par Inputs: + * Four inputs: + * @li x: A Tensor of type float16 + * @li offsets: A Tensor of type float16,float32.Deformation offset parameter. +*@par Required Attributes: + * @li strides: A tuple/list of 2 integers.The stride of the sliding window for + * height and width for H/W dimension. + * @li pads: A tuple/list of 4 integers.Padding added to each dimension + * of the input. + * @li ksize: A tuple/list of 2 integers.kernel size. +*@par Attributes: + * Three attributes: + * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension + * of input. Defaults to [0, 0, 0, 0] + * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. + * @li deformable_groups: Specify the c-axis grouping number of input x. +*@par Outputs: + * y: A Tensor. A Tensor of type float16. +*/ +REG_OP(DeformableOffsets) + .INPUT(x, TensorType({DT_FLOAT16})) + .INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT32})) + .OUTPUT(y, TensorType({DT_FLOAT16})) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .REQUIRED_ATTR(ksize, ListInt) + .ATTR(dilations, ListInt, {0, 0, 0, 0}) + .ATTR(data_format, String, "NCHW") + .ATTR(deformable_groups, Int, 1) + .OP_END_FACTORY_REG(DeformableOffsets) + } // namespace ge -#endif // GE_OP_NN_CALCULATION_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index d9c28087..a013fb33 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -18,8 +18,8 @@ * \file nn_detect_ops.h * \brief */ -#ifndef GE_OP_NN_DETECT_OPS_H_ -#define GE_OP_NN_DETECT_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -518,11 +518,11 @@ as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n *@par Outputs: *@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2], -where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box. +* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box. *@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2], -where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence. +* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence. *@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2], -where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n +* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n *@attention Constraints: *@li This operator applies to YOLO v2 and v3 networks. @@ -550,9 +550,9 @@ REG_OP(Yolo) *@par Inputs: * Four inputs, including: *@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov3DetectionOutput. -Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. +* Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. *@li img_info: A float16 or float32, describing the image information including the required image height and width -and the actual image height and width. +* and the actual image height and width. * *@par Attributes: *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" @@ -561,7 +561,7 @@ and the actual image height and width. *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20]. *@li relative: An optional bool. Defaults to and must be "true". *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, -which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n +* which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n *@li post_nms_topn: An optional int32. This attribute is reserved. *@li score_threshold: A required float, specifying the class score threshold for box filtering, @@ -608,11 +608,11 @@ REG_OP(YoloV2DetectionOutput) *@par Inputs: *Six inputs, including: *@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov2DetectionOutput. -Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. +* Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. *@li imginfo: A float16, describing the image information including the required image height and width -and the actual image height and width. +* and the actual image height and width. *@li windex: A windex tensor with shape [height, weight]. Has the same type as the inputs. -[[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed. +* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed. *@li hindex: A hindex tensor with shape [height, weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]]. @@ -673,10 +673,10 @@ REG_OP(YoloV2DetectionOutputD) *@par Inputs: *Ten inputs, including: *@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". -There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo. +* There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo. *@li img_info: A float16 or float32, describing the image information including the required image height and width -and the actual image height and width. -* +* and the actual image height and width. + *@par Attributes: *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. @@ -691,13 +691,13 @@ and the actual image height and width. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0]. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". -* + *@par Outputs: *@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 -* + *@attention Constraints: *@li This operator applies only to the YOLO v3 network. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators . \n @@ -739,16 +739,15 @@ REG_OP(YoloV3DetectionOutput) *@par Inputs: *16 Input, including: *@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput. -A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. +* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. *@li imginfo: A float16, describing the image information including the required image height and width -and the actual image height and width. +* and the actual image height and width. *@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. -[[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n +* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. -[[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n - -* +* [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n +s *@par Attributes: *@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. @@ -760,13 +759,13 @@ and the actual image height and width. *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0]. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". -* + *@par Outputs: *@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 -* + *@attention Constraints: *@li This operator applies only to the YOLO v3 network. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators. @@ -817,8 +816,8 @@ REG_OP(YoloV3DetectionOutputD) *@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo. *@li img_info: A float16 or float32, describing the image information including the required image height and width \n -and the actual image height and width. -* +* and the actual image height and width. + *@par Attributes: *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. @@ -833,13 +832,13 @@ and the actual image height and width. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". -* + *@par Outputs: *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2), * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 -* + *@attention Constraints:\n *@li This operator applies only to the YOLO v3 network. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators. @@ -868,21 +867,19 @@ REG_OP(YoloV3DetectionOutputV2) .OP_END_FACTORY_REG(YoloV3DetectionOutputV2) /** -*@brief Performs YOLO V3 detection . \n +*@brief Performs YOLO V3 detection. *@par Inputs: *16 Input, including: *@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput. -A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. +* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. *@li imginfo: A float16, describing the image information including the required image height and width -and the actual image height and width. +* and the actual image height and width. *@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. -[[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] -is formed for the three Yolo outputs, respectively .It's a dynamic input. \n +* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] +* is formed for the three Yolo outputs, respectively .It's a dynamic input. \n *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n - -* *@par Attributes: *@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. @@ -897,6 +894,7 @@ is formed for the three Yolo outputs, respectively .It's a dynamic input. \n * *@par Outputs: *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2), +* describing the information of each output box. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 @@ -907,6 +905,9 @@ is formed for the three Yolo outputs, respectively .It's a dynamic input. \n *@see Yolo() *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. + +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead. */ REG_OP(YoloV3DetectionOutputV2D) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -1023,18 +1024,21 @@ REG_OP(ROIPooling) /** *@brief Computes decode bbox function. -* + *@par Inputs: *Inputs include: * @li box_predictions: A Tensor. Must be float16. * @li anchors: A Tensor. Must have the same type as box_predictions. -* + *@par Attributes: * @ decode_clip: required, float, threahold of decode process. -* + *@par Outputs: * @ decoded_boxes: A Tensor. Must have the same type as box_predictions. * N-D with shape [N, 4]. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeBbox) .INPUT(box_predictions, TensorType{DT_FLOAT16}) @@ -1052,6 +1056,9 @@ REG_OP(DecodeBbox) *@par Outputs: *boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4]. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ClipBoxes) .INPUT(boxes_input, TensorType({DT_FLOAT16})) @@ -1196,12 +1203,12 @@ REG_OP(RpnProposalsD) /** *@brief Computes Score Filte Pre-Sort function. -* + *@par Inputs: *Inputs include: * @li rois: A Tensor. Must be float16. N-D with shape [N, 4]. * @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1]. -* + *@par Attributes: * @li score_threshold: required, float, threahold of topk process. * @li k: required, Int, threahold of topk process. @@ -1262,14 +1269,17 @@ REG_OP(RpnProposalPostProcessing) .OP_END_FACTORY_REG(RpnProposalPostProcessing) /** *@brief Computes DecodeBoundariesTarget function. -* + *@par Inputs: *Inputs include: * @li boundary_predictions: A Tensor. Must be float16. * @li anchors: A Tensor. Must be float16. -* + *@par Outputs: * @ boundary_encoded: A Tensor. Must be float16. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeBoundariesTarget) .INPUT(boundary_predictions, TensorType({DT_FLOAT16})) @@ -1287,6 +1297,9 @@ REG_OP(DecodeBoundariesTarget) * *@par Outputs: * @ keypoints_decoded: A Tensor. Must be float16. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeCornerpointsTargetBG) .INPUT(keypoints_prediction, TensorType({DT_FLOAT16})) @@ -1304,6 +1317,9 @@ REG_OP(DecodeCornerpointsTargetBG) * *@par Outputs: * @ keypoints_decoded: A Tensor. Must be float16. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeCornerpointsTargetWrtCenterV1) .INPUT(keypoints_prediction, TensorType({DT_FLOAT16})) @@ -1321,6 +1337,9 @@ REG_OP(DecodeCornerpointsTargetWrtCenterV1) * *@par Outputs: * @ boundary_encoded: A Tensor. Must be float16. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeWheelsTarget) .INPUT(boundary_predictions, TensorType({DT_FLOAT16})) @@ -1453,7 +1472,21 @@ REG_OP(DecodeBboxV2) .ATTR(reversed_box, Bool, false) .OP_END_FACTORY_REG(DecodeBboxV2) - +/** +*@brief Computes sort function. +* +*@par Inputs: +*Inputs include: +* x: A Tensor. Must be float16 or float32. +* +*@par Attributes: +* @li axis: optional, int. +* @li descending: optional,bool. +* +*@par Outputs: +* @li y1: A Tensor. Must have the same type as x. +* @li y2: A Tensor. Indices of y1 in x.Dtype must be int32. +*/ REG_OP(Sort) .INPUT(x, TensorType({ DT_FLOAT16 })) .OUTPUT(y1, TensorType({ DT_FLOAT16 })) @@ -1462,7 +1495,6 @@ REG_OP(Sort) .ATTR(descending, Bool, false) .OP_END_FACTORY_REG(Sort) - } // namespace ge -#endif // GE_OP_NN_DETECT_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 6d4f6f9d..35c4c7d4 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -18,8 +18,8 @@ * \file nn_norm_ops.h * \brief */ -#ifndef GE_OP_NN_NORM_OPS_H -#define GE_OP_NN_NORM_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ #include "graph/operator_reg.h" namespace ge { @@ -159,6 +159,34 @@ REG_OP(SigmoidCrossEntropyWithLogits) .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT})) .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogits) +/** +*@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n + +*@par Inputs: +* four inputs, including: +*@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value. +*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n +*@li weight: An multi-dimensional Tensor, specifying the weight value. \n +*@li pos_weight: An multi-dimensional Tensor, specifying the pos weight value. \n + +*@par Attributes: +*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean" . \n + +*@par Outputs: +*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n + +*@par Third-party framework compatibility +* Compatible with PyTorch operator BCEWithLogitsLoss. +*/ +REG_OP(SigmoidCrossEntropyWithLogitsV2) + .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(pos_weight, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsV2) + /** *@brief Computes the regression box of the RPN. It is a FasterRCNN operator . \n @@ -335,6 +363,8 @@ REG_OP(LogSoftmaxV2) *@par Outputs: * y: A Tensor of the same type as "grad" . \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ConfusionSoftmaxGrad) .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -499,6 +529,9 @@ REG_OP(LayerNorm) * @li pd_x: A Tensor. Must be one of the following types: float16, float32. * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. * @li pd_beta: A Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LayerNormGrad) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -540,6 +573,9 @@ REG_OP(LayerNormGrad) *@par Outputs: *Three outputs, including: * @li pd_x: A Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LayerNormXBackprop) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -579,6 +615,9 @@ REG_OP(LayerNormXBackprop) *Three outputs, including: * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. * @li pd_beta: A Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LayerNormBetaGammaBackprop) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -811,6 +850,9 @@ instruction . \n *@par Third-party framework compatibility *@li Compatible with the PyTorch operator GroupNorm. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(GroupNorm) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -862,6 +904,9 @@ Specifies the variance of "x" . \n *@par Third-party framework compatibility *@li Compatible with the PyTorch operator InstanceNorm. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(InstanceNormV2) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -914,6 +959,20 @@ REG_OP(INInferV2D) .OUTPUT(batch_variance, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(INInferV2D) +/** +*@brief Performs instance normalization for inference of InHost part. + +*@par Inputs:\n +* One input, including: (NC1HWC0 supported) +* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance. + +*@par Attributes: +* epsilon: An optional float32, specifying the small value added to +variance to avoid dividing by zero. Defaults to "0.00001" . \n + +*@par Outputs:\n +* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt. +*/ REG_OP(InHost) .INPUT(variance, TensorType({DT_FLOAT})) .OUTPUT(variance_sqrt, TensorType({DT_FLOAT})) @@ -921,4 +980,4 @@ REG_OP(InHost) .OP_END_FACTORY_REG(InHost) } // namespace ge -#endif //GE_OP_NN_NORM_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h index ea4a5ba3..9edc469a 100644 --- a/third_party/fwkacllib/inc/ops/nn_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_ops.h @@ -18,9 +18,9 @@ * \file nn_ops.h * \brief */ -#ifndef GE_OP_NN_OPS_H_ -#define GE_OP_NN_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ #include "nn_pooling_ops.h" -#endif // GE_OP_NN_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index 5d3cd931..6615d2f5 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -18,8 +18,8 @@ * \file nn_pooling_ops.h * \brief */ -#ifndef GE_OP_NN_POOLING_OPS_H -#define GE_OP_NN_POOLING_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -31,7 +31,7 @@ namespace ge { *@par Inputs: *@li x: An NCHW tensor of type float16, float32, int8. *@par Attributes: -*@li mode: An optional int32, specifying the pooling algorithm, either "1" (max pooling) or "0" (avg pooling). Defaults to "0". +*@li mode: An optional int32, specifying the pooling algorithm, either "0" (max pooling) or "1" (avg pooling). Defaults to "0". *@li global_pooling: An optional bool. Defaults to "false". *@li window: Optional, including: *window[0]: An optional int32, specifying the window size along in the H dimension. The value range is [1, 32768]. Defaults to "1". @@ -109,7 +109,47 @@ REG_OP(AvgPool) *@brief Performs average pooling on the input . \n *@par Inputs: -*x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double . \n +*x: A tensor of type float16, float32, double. + +*@par Attributes: +*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, where N = C = 1, and H and W are positive integers within the range [1, 32768]. +*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. The strides of the H and W dimensions are positive integers within the range [1, 63]. +*@li padding_mode: A required string, specifying the padding algorithm, either "VALID", "SAME" and "CALCULATED". With "SAME" means that the outputs will have the same spatial dimensions as its inputs. With "VALID" means no padding. +*@li pads: Pad value when padding_mode is "CALCULATED". +*@li data_format: An optional string, specifying the data format of "ksize" and "strides", either "NCHW", "NC1HWC0", or "NHWC" (default). +*@li global_pooling: Global or not. If true, pads will change to {0,0,0,0} and ksize will change to [input_h, input_w] +*@li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". +*@li exclusive: Ignore padding area or not when calculating average. + +*@par Outputs: +*y: The average pooled output tensor. Has the same type and format as input "x". + +*@attention Constraints: +*@li Only single input and single output are supported. +*@li Global pooling is supported. +*@li "ksize_H" and "ksize_W" are positive integers within the range [1, 32768]. ksize_H * ksize_W < 256 +*@li Due to instruction restrictions, the values of "strides_h" and "strides_w" are positive integers within the range [1, 63]. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator AvgPoolV2. +*/ +REG_OP(AvgPoolV2) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .ATTR(padding_mode, String, "CALCULATED") + .ATTR(pads, ListInt, {0, 0, 0, 0}) + .ATTR(data_format, String, "NCHW") + .ATTR(global_pooling, Bool, false) + .ATTR(ceil_mode, Bool, false) + .ATTR(exclusive, Bool, true) + .OP_END_FACTORY_REG(AvgPoolV2) + +/** +*@brief Performs average pooling on the input. + +*@par Inputs: +*x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double. *@par Attributes: *@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. @@ -188,15 +228,15 @@ REG_OP(MaxPoolExt2) *@par Inputs: * One input: *x: An NC1HWC0 Tensor. Supported type:float16, float32, double, int8, int16, - * int32, int64, uint8, uint16, qint8 +* int32, int64, uint8, uint16, qint8 *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, - * specifying the size of the window for each dimension of the input tensor. - * No default value. +* specifying the size of the window for each dimension of the input tensor. +* No default value. *@li strides: A required list of int8, int16, int32, or int64 values, - * specifying the stride of the sliding window for each dimension of - * the input tensor. No default value. +* specifying the stride of the sliding window for each dimension of +* the input tensor. No default value. *@li padding: A required string. No default value. *@li data_format: An optional string. Defaults to "NHWC" . \n @@ -205,9 +245,9 @@ REG_OP(MaxPoolExt2) *@attention Constraints: *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, - * ksize[1] * ksize[2] <= 255. +* ksize[1] * ksize[2] <= 255. *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, - * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. +* strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. *@li "padding" is either "SAME" or "VALID". @@ -629,7 +669,7 @@ REG_OP(AvgPoolGrad) * @par Inputs: * @input_grad: An NHWC tensor of type float16. * @mean_matrix: Assist matrix, an NHWC tensor of type float16. -* @kernel_matrix: Assist matrix, an NHWC tensor of type float16. \n +* @kernel_matrix: Assist matrix, an NHWC tensor of type float16. * @par Attributes: * @li orig_input_shape: A required Original input dimensions. @@ -659,6 +699,88 @@ REG_OP(AvgPoolGradD) .ATTR(data_format, String, "NHWC") .OP_END_FACTORY_REG(AvgPoolGradD) +/** +* @brief Computes avgpoolv2grad function. + +* @par Inputs: +* @li orig_input_shape: An NHWC tensor of type int32. +* @li input_grad: An NHWC tensor of type float16, float32, or double. + +* @par Attributes: +* @li ksize: A required tuple or list, specifying the size of the window for +* each dimension of the input tensor. +* @li strides: A required tuple or list, specifying the stride of the sliding +* window for each dimension of the input tensor. +* @li padding_mode: A required string, specifying the type of +* the padding algorithm to use. +* @li global_pooling: Whether to use the global pooling. If global_pooling=true, +* ksize and pads will be ignored. Default False. +* @li ceil_mode: Whether to use the ceil function to calculate output height and +* width. Default False. +* @li exclusive: Whether to exclude padding points. default is true. +* @li data_format: An optional string. Defaults to "NHWC". + +* @par Outputs: +* @out_grad: A mutable tensor with the same shape and type as "orig_input". + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator AvgPoolGrad. +*/ +REG_OP(AvgPoolV2Grad) + .INPUT(orig_input_shape, TensorType({DT_INT32})) + .INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .OUTPUT(out_grad, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .ATTR(padding_mode, String, "CALCULATED") + .ATTR(pads, ListInt, {0,0,0,0}) + .ATTR(data_format, String, "NCHW") + .ATTR(global_pooling, Bool, false) + .ATTR(ceil_mode, Bool, false) + .ATTR(exclusive, Bool, true) + .OP_END_FACTORY_REG(AvgPoolV2Grad) +/** +* @brief Computes gradients of averagev2 pooling function. + +* @par Inputs: +* @li input_grad: An NHWC tensor of type float16, float32, or double. + +* @par Attributes: +* @li orig_input_shape: A required tuple or list of type int32. +* @li ksize: A required tuple or list, specifying the size of the window for +* each dimension of the input tensor. +* @li strides: A required tuple or list, specifying the stride of the sliding +* window for each dimension of the input tensor. +* @li padding_mode: A required string, specifying the type of +* the padding algorithm to use. +* @li global_pooling: Whether to use the global pooling. If global_pooling=true, +* ksize and pads will be ignored. Default False. +* @li ceil_mode: Whether to use the ceil function to calculate output height and +* width. Default False. +* @li exclusive: Whether to exclude padding points. default is true. +* @li data_format: An optional string. Defaults to "NHWC". + +* @par Outputs: +* @out_grad: A mutable tensor with the same shape and type as "orig_input". + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator AvgPoolGrad. +*/ +REG_OP(AvgPoolV2GradD) + .INPUT(input_grad, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(mean_matrix, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(kernel_matrix, TensorType({DT_FLOAT16})) + .OUTPUT(out_grad, TensorType({DT_FLOAT16})) + .REQUIRED_ATTR(orig_input_shape, ListInt) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .ATTR(padding_mode, String, "CALCULATED") + .ATTR(pads, ListInt, {0,0,0,0}) + .ATTR(data_format, String, "NCHW") + .ATTR(global_pooling, Bool, false) + .ATTR(ceil_mode, Bool, false) + .ATTR(exclusive, Bool, true) + .OP_END_FACTORY_REG(AvgPoolV2GradD) /** *@brief :upsample the layer @@ -1070,4 +1192,4 @@ REG_OP(MaxPoolGradWithArgmaxV2) .OP_END_FACTORY_REG(MaxPoolGradWithArgmaxV2) } // namespace ge -#endif // GE_OP_NN_POOLING_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index 4f51a82e..047fd6da 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -18,8 +18,8 @@ * \file nn_training_ops.h * \brief */ -#ifndef GE_OP_TRAINING_OPS_H -#define GE_OP_TRAINING_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_ #include "graph/operator_reg.h" namespace ge { @@ -2031,6 +2031,9 @@ REG_OP(ApplyAdadeltaD) * Two outputs, including: * @li var: A mutable Tensor has the same type as "var". * @li accum: A mutable Tensor has the same type as "var". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(FusedMulApplyMomentum) .INPUT(var, TensorType::NumberType()) @@ -2079,6 +2082,9 @@ REG_OP(FusedMulApplyMomentum) * @li var: A Tensor has the type float32. * @li var_copy: A Tensor has the type float16. * @li accum: A Tensor has the same type as input "accum". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(FusedMulApplyMomentumExtern) .INPUT(var, TensorType(DT_FLOAT)) @@ -2581,10 +2587,12 @@ REG_OP(SparseApplyAdadeltaD) *@par Attributes: * @li automic_add_mem_size: sizes of workspaces . \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(AtomicAddrClean) .ATTR(automic_add_mem_size, ListInt, {}) .OP_END_FACTORY_REG(AtomicAddrClean) } // namespace ge -#endif // GE_OP_TRAINING_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/no_op.h b/third_party/fwkacllib/inc/ops/no_op.h index 503d97b1..7834591c 100644 --- a/third_party/fwkacllib/inc/ops/no_op.h +++ b/third_party/fwkacllib/inc/ops/no_op.h @@ -18,8 +18,8 @@ * \file no_op.h * \brief */ -#ifndef GE_NO_OP_H_ -#define GE_NO_OP_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_NO_OP_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_NO_OP_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -38,4 +38,4 @@ REG_OP(NoOp) } // namespace ge -#endif // GE_NO_OP_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_NO_OP_H_ diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index b50b7cd1..e0e5dfc6 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -18,8 +18,8 @@ * \file nonlinear_fuc_ops.h * \brief */ -#ifndef GE_OP_NONLINEAR_FUC_OPS_H -#define GE_OP_NONLINEAR_FUC_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ #include "graph/operator_reg.h" @@ -642,4 +642,4 @@ REG_OP(Mish) } // namespace ge -#endif // GE_OP_NONLINEAR_FUC_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h index 90628af6..8d7ef9f9 100644 --- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h +++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h @@ -18,9 +18,8 @@ * \file npu_loss_scale_ops.h * \brief */ - -#ifndef GE_OP_NN_LOSS_SCALE_OPS_H -#define GE_OP_NN_LOSS_SCALE_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_ #include "graph/operator_reg.h" namespace ge { @@ -30,6 +29,9 @@ namespace ge { *@par Outputs: *data: A Tensor of data value. Must be float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUAllocFloatStatusOperator) .OUTPUT(data, TensorType({DT_FLOAT})) @@ -43,6 +45,9 @@ REG_OP(NPUAllocFloatStatusOperator) *@par Outputs: *data: A Tensor of data value. Must be float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUClearFloatStatusOperator) .INPUT(addr, TensorType{DT_FLOAT}) @@ -57,6 +62,9 @@ REG_OP(NPUClearFloatStatusOperator) *@par Outputs: *data: A Tensor of data value. Must be float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUGetFloatStatusOperator) .INPUT(addr, TensorType{DT_FLOAT}) @@ -68,6 +76,9 @@ REG_OP(NPUGetFloatStatusOperator) *@par Outputs: *y: A Tensor of type int32, output eight numbers with a value of zero. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUAllocFloatStatus) .OUTPUT(data, TensorType({DT_FLOAT})) @@ -81,6 +92,9 @@ REG_OP(NPUAllocFloatStatus) *@par Outputs: *data: A Tensor of type float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUClearFloatStatus) .INPUT(addr, TensorType{DT_FLOAT}) @@ -95,6 +109,9 @@ REG_OP(NPUClearFloatStatus) *@par Outputs: *data: A Tensor of type float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUGetFloatStatus) .INPUT(addr, TensorType{DT_FLOAT}) @@ -102,4 +119,4 @@ REG_OP(NPUGetFloatStatus) .OP_END_FACTORY_REG(NPUGetFloatStatus) } // namespace ge -#endif // GE_OP_NN_LOSS_SCALE_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/outfeed_ops.h b/third_party/fwkacllib/inc/ops/outfeed_ops.h index 139e4880..e0b783bc 100644 --- a/third_party/fwkacllib/inc/ops/outfeed_ops.h +++ b/third_party/fwkacllib/inc/ops/outfeed_ops.h @@ -18,10 +18,10 @@ * \file outfeed_ops.h * \brief */ -#ifndef GE_OP_OUTFEED_OPS_H -#define GE_OP_OUTFEED_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_OUTFEED_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_OUTFEED_OPS_H_ #include "data_flow_ops.h" -#endif // GE_OP_OUTFEED_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_OUTFEED_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index 5938941a..625c1e09 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -18,8 +18,8 @@ * \file pad_ops.h * \brief */ -#ifndef GE_OP_PAD_OPS_H -#define GE_OP_PAD_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_ #include "graph/operator_reg.h" namespace ge { @@ -185,6 +185,76 @@ REG_OP(PadD) .REQUIRED_ATTR(paddings, ListListInt) .OP_END_FACTORY_REG(PadD) +/** +*@brief Pads a tensor. + +*@par Inputs: +*Two inputs, including: +* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, +* uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, +* complex128, uint32, uint64. +* @li paddings: A Tensor of type int32 or int64. +* @li constant_values: A optional Tensor of int32 or int64 + +*@par Attributes: +* @li mode: An optional string, Defaults to "constant", indicates paddings mode, +* support "constant", "reflect", "edge" +* @li paddings_contiguous: An optional bool value, Defaults to true. +* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] +* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] + +*@par Outputs: +*y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility: +* Compatible with ONNX operator Pad. +*/ +REG_OP(PadV3) + .INPUT(x, TensorType::BasicType()) + .INPUT(paddings, TensorType::IndexNumberType()) + .OPTIONAL_INPUT(constant_values, TensorType::BasicType()) + .OUTPUT(y, TensorType::BasicType()) + .ATTR(mode, String, "constant") + .ATTR(paddings_contiguous, Bool, true) + .OP_END_FACTORY_REG(PadV3) + +/** +*@brief Pads a tensor. + +*@par Inputs: +*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32. + +*@par Attributes: +* @li paddings: An required "vector>". +* For each dimension D of input, paddings[D, 0] indicates how many +* values to add before the contents of tensor in that dimension, +* and paddings[D, 1] indicates how many values to add after the +* contents of tensor in that dimension. +* @li constant_values: An optional int value for pad. +* @li mode: An optional string, Defaults to "constant", indicates paddings mode, +* support "constant", "reflect", "edge" +* @li paddings_contiguous: An optional bool value, Defaults to true. +* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] +* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] + +*@par Outputs: +*y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility: +* Compatible with ONNX operator Pad. + +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead. +*/ +REG_OP(PadV3D) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) + .REQUIRED_ATTR(paddings, ListListInt) + .ATTR(constant_values, Int, 0) + .ATTR(mode, String, "constant") + .ATTR(paddings_contiguous, Bool, true) + .OP_END_FACTORY_REG(PadV3D) + /** *@brief Create a diagonal tensor @@ -258,6 +328,9 @@ REG_OP(AscendPadding) /** *@brief EmbeddingRankId, traverse the index calculation server and its position in the server . \n +*@par Restrictions: +*Warning:THIS FUNCTION IS DEPRECATED. Please do not use. \n + *@par Inputs: *One input, include: *addr_table: Tensor which last dimension must be 3. For example: [8, 3]. @@ -278,4 +351,4 @@ REG_OP(EmbeddingRankId) } // namespace ge -#endif //GE_OP_PAD_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h index b3c50654..5c7adfd8 100644 --- a/third_party/fwkacllib/inc/ops/parsing_ops.h +++ b/third_party/fwkacllib/inc/ops/parsing_ops.h @@ -18,8 +18,8 @@ * \file parsing_ops.h * \brief */ -#ifndef GE_OP_PARSING_OPS_H -#define GE_OP_PARSING_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -53,4 +53,4 @@ REG_OP(StringToNumber) } // namespace ge -#endif // GE_OP_PARSING_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h index 31ba266b..b53cfeb6 100644 --- a/third_party/fwkacllib/inc/ops/quantize_ops.h +++ b/third_party/fwkacllib/inc/ops/quantize_ops.h @@ -18,8 +18,8 @@ * \file quantize_ops.h * \brief */ -#ifndef GE_OP_QUANTIZE_OPS_H -#define GE_OP_QUANTIZE_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_ #include "graph/operator_reg.h" namespace ge { @@ -221,4 +221,4 @@ REG_OP(AscendRequantS16) } // namespace ge -#endif // GE_OP_QUANTIZE_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/ragged_array_ops.h b/third_party/fwkacllib/inc/ops/ragged_array_ops.h index 4c62ec86..9b31aa8e 100644 --- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h @@ -18,8 +18,8 @@ * \file ragged_array_ops.h * \brief */ -#ifndef GE_OP_RAGGED_ARRAY_OPS_H -#define GE_OP_RAGGED_ARRAY_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_RAGGED_ARRAY_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_RAGGED_ARRAY_OPS_H_ #include "graph/operator.h" #include "graph/operator_reg.h" @@ -62,4 +62,4 @@ REG_OP(RaggedGather) } // namespace ge -#endif //GE_OP_RAGGED_ARRAY_OPS_H \ No newline at end of file +#endif // OPS_BUILT_IN_OP_PROTO_INC_RAGGED_ARRAY_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h index cd6cfdfe..13488a25 100644 --- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h @@ -18,8 +18,8 @@ * \file ragged_conversion_ops.h * \brief */ -#ifndef GE_OP_RAGGED_CONVERSION_OPS_H -#define GE_OP_RAGGED_CONVERSION_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_RAGGED_CONVERSION_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_RAGGED_CONVERSION_OPS_H_ #include "graph/operator_reg.h" namespace ge { @@ -95,4 +95,4 @@ REG_OP(RaggedTensorToTensor) } // namespace ge -#endif // GE_OP_RAGGED_CONVERSION_OPS_H \ No newline at end of file +#endif // OPS_BUILT_IN_OP_PROTO_INC_RAGGED_CONVERSION_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/ragged_math_ops.h b/third_party/fwkacllib/inc/ops/ragged_math_ops.h index ab871b7e..8af4f867 100644 --- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h @@ -18,8 +18,8 @@ * \file ragged_math_ops.h * \brief */ -#ifndef GE_OP_RAGGED_MATH_OPS_H -#define GE_OP_RAGGED_MATH_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_RAGGED_MATH_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_RAGGED_MATH_OPS_H_ #include "graph/operator.h" #include "graph/operator_reg.h" @@ -57,4 +57,4 @@ REG_OP(RaggedRange) } // namespace ge -#endif //GE_OP_RAGGED_MATH_OPS_H \ No newline at end of file +#endif // OPS_BUILT_IN_OP_PROTO_INC_RAGGED_MATH_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index edec232d..b46da435 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -18,8 +18,8 @@ * \file random_ops.h * \brief */ -#ifndef GE_OP_RANDOM_OPS_H_ -#define GE_OP_RANDOM_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ #include @@ -32,7 +32,7 @@ namespace ge { *@par Inputs: *Inputs include: -* @li logits: A Tensor. Must be one of the following types: float32, float64,double. +* @li logits: A Tensor. Must be one of the following types: float16, float, double. 2-D Tensor with shape [batch_size, num_classes]. * @li num_samples: A Tensor of type int32. 0-D. Number of independent samples to draw for each row slice . \n @@ -411,6 +411,25 @@ REG_OP(LinSpace) .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE})) .OP_END_FACTORY_REG(LinSpace) + + +/** +*@brief The dropout operator randomly sets (according to the given dropout probability) +*the outputs of some units to zero, while others are remain unchanged. . \n + +*@par Inputs: +*One input, including: +*@li x:The input tensor variable. The data type is float32. \n + +*@par Attributes: +*@li dropout_ratio:Float between 0 and 1. Fraction of the input units to drop.Defaults to "0.5". +*@li scale_train: Bool,default to true. +*@li alpha: An optional float32. A scaling factor. Defaults to "1.0". +*@li beta: An optional float32. An exponent. Defaults to "0.0". \n + +*@par Outputs: +*y: A Variable holding Tensor representing the dropout, has same shape and data type with x. \n +*/ REG_OP(Dropout) .INPUT(x, TensorType{DT_FLOAT}) .OUTPUT(y, TensorType{DT_FLOAT}) @@ -478,4 +497,4 @@ REG_OP(ShuffleChannel) .OP_END_FACTORY_REG(ShuffleChannel) } // namespace ge -#endif // GE_OP_RANDOM_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 7a239732..6f44093e 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -18,8 +18,8 @@ * \file reduce_ops.h * \brief */ -#ifndef GE_OP_REDUCE_OPS_H -#define GE_OP_REDUCE_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ #include "graph/operator_reg.h" @@ -502,7 +502,7 @@ REG_OP(ReduceMean) *@par Inputs: *One input: -* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8 . \n +* @li x: A Tensor. Must be one of the following types: float16, float32 . \n *@par Attributes: *@li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType. @@ -521,8 +521,8 @@ REG_OP(ReduceMean) * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMean instead. */ REG_OP(ReduceMeanD) - .INPUT(x, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_INT8, DT_UINT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_INT8, DT_UINT8})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(axes, ListInt) .ATTR(keep_dims, Bool, false) .OP_END_FACTORY_REG(ReduceMeanD) @@ -984,4 +984,4 @@ REG_OP(GNTrainingUpdate) } //namespace ge -#endif /* GE_OP_REDUCE_OPS_H */ +#endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/resource_variable_ops.h b/third_party/fwkacllib/inc/ops/resource_variable_ops.h index a4d54088..1b60d42a 100644 --- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h +++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h @@ -18,14 +18,29 @@ * \file resource_variable_ops.h * \brief */ -#ifndef GE_OP_RESOURCE_VARIABLE_OPS_H -#define GE_OP_RESOURCE_VARIABLE_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_RESOURCE_VARIABLE_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_RESOURCE_VARIABLE_OPS_H_ #include "graph/operator.h" #include "graph/operator_reg.h" namespace ge { +/** +*@brief Creates a handle to a Variable resource. \n + +*@par Outputs: +*y:A Tensor of type resource. \n + +*@par Attributes: +* @li container: optional, string. +* @li shared_name: optional, string. +* @li dtype: required, type. +* @li shape: optional, ListInt. \n + +*@see VarHandleOp. +*/ + REG_OP(VarHandleOp) .ATTR(container, String, "") .ATTR(shared_name, String, "") @@ -34,6 +49,19 @@ REG_OP(VarHandleOp) .OUTPUT(y, TensorType({DT_RESOURCE})) .OP_END_FACTORY_REG(VarHandleOp) +/** +*@brief Assigns a new value to a variable. \n + +*@par Inputs: +*resource:Handle to the resource in which to store the variable. +*value:The value to set the new tensor to use. \n + +*@par Attributes: +* @li dtype: required, type. \n + +*@see AssignVariableOp. +*/ + REG_OP(AssignVariableOp) .INPUT(resource, TensorType({DT_RESOURCE})) .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ @@ -41,6 +69,19 @@ REG_OP(AssignVariableOp) .REQUIRED_ATTR(dtype, Type) .OP_END_FACTORY_REG(AssignVariableOp) +/** +*@brief Adds a value to the current value of a variable. \n + +*@par Inputs: +*resource:Handle to the resource in which to store the variable. +*value:The value by which the variable will be incremented. \n + +*@par Attributes: +* @li dtype: required, type. \n + +*@see AssignAddVariableOp. +*/ + REG_OP(AssignAddVariableOp) .INPUT(resource, TensorType({DT_RESOURCE})) .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ @@ -48,6 +89,19 @@ REG_OP(AssignAddVariableOp) .REQUIRED_ATTR(dtype, Type) .OP_END_FACTORY_REG(AssignAddVariableOp) +/** +*@brief Subtracts a value to the current value of a variable. \n + +*@par Inputs: +*resource:Handle to the resource in which to store the variable. +*value:The value by which the variable will be incremented. \n + +*@par Attributes: +* @li dtype: required, type. \n + +*@see AssignSubVariableOp. +*/ + REG_OP(AssignSubVariableOp) .INPUT(resource, TensorType({DT_RESOURCE})) .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ @@ -57,4 +111,4 @@ REG_OP(AssignSubVariableOp) } // namespace ge -#endif //GE_OP_RESOURCE_VARIABLE_OPS_H \ No newline at end of file +#endif // OPS_BUILT_IN_OP_PROTO_INC_RESOURCE_VARIABLE_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index 77437aba..4010707b 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -18,8 +18,8 @@ * \file rnn.h * \brief */ -#ifndef GE_OP_RNN_H -#define GE_OP_RNN_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ #include "graph/operator_reg.h" @@ -81,6 +81,9 @@ REG_OP(BasicLSTMCell) *@par Outputs: *output_h:A Tensor of output. Must be the type float32. The format must be FRACTAL_Z. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DynamicLSTM) .INPUT(x, TensorType({DT_FLOAT32})) @@ -109,8 +112,8 @@ REG_OP(DynamicLSTM) *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li seq_length:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li mask:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li seq_length:A 1D Tensor. Must be one of the following types: int32. +*@li mask:A 1D Tensor. Must be one of the following types: int8. *@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. @@ -135,6 +138,9 @@ REG_OP(DynamicLSTM) *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dwci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dwcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dwco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. */ REG_OP(DynamicRNNGrad) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -306,6 +312,9 @@ REG_OP(LSTMInputGrad) *two outputs: *@li dxt:A 4D Tensor. Must be one of the following types: float16, float32. *@li dht:A 4D Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(BasicLSTMCellInputGrad) .INPUT(dgate, TensorType({DT_FLOAT16})) @@ -328,6 +337,9 @@ REG_OP(BasicLSTMCellInputGrad) *two outputs: *@li dw:A 4D Tensor. Must be one of the following types: float16. *@li db:A 4D Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(BasicLSTMCellWeightGrad) .INPUT(x, TensorType({DT_FLOAT16})) @@ -358,6 +370,9 @@ REG_OP(BasicLSTMCellWeightGrad) *two outputs: *@li dgate:A 4D Tensor. Must be one of the following types: float16. *@li dct_1:A 4D Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(BasicLSTMCellCStateGrad) .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -439,6 +454,9 @@ REG_OP(RNN) *two outputs: *@li o_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(BasicRNNCell) .INPUT(x, TensorType({DT_FLOAT16})) @@ -460,13 +478,13 @@ REG_OP(BasicRNNCell) *@brief: DynamicGRU calculation. *@par Inputs: *seven inputs: \n -*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. -*@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. -*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li cw:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. -*@li cb:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. -*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. +*@li w:Must be one of the following types: float16. The format must be FRACTAL_Z. +*@li b:Must be one of the following types: float16, float32. The format must be ND. +*@li cw:Must be one of the following types: float16. The format must be FRACTAL_Z. +*@li cb:Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:Must be one of the following types: int32. The format must be ND. +*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Attributes: *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. @@ -480,11 +498,11 @@ REG_OP(BasicRNNCell) *@par Outputs: *five outputs: \n -*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li r:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li n:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li i:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li n:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -495,7 +513,7 @@ REG_OP(DynamicGRU) .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(cw, TensorType({DT_FLOAT16})) .INPUT(cb, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(seq_length, TensorType({DT_UINT32})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -516,13 +534,13 @@ REG_OP(DynamicGRU) *@brief: DynamicGRUV2 calculation. *@par Inputs: *seven inputs: \n -*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. -*@li weight_input:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. -*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. -*@li bias_input:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li bias_hidden:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. -*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. +*@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z. +*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. +*@li bias_input:Must be one of the following types: float16, float32. The format must be ND. +*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:Must be one of the following types: int32. The format must be ND. +*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Attributes: *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. @@ -538,12 +556,12 @@ REG_OP(DynamicGRU) *@par Outputs: *six outputs: \n -*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -554,7 +572,7 @@ REG_OP(DynamicGRUV2) .INPUT(weight_hidden, TensorType({DT_FLOAT16})) .OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(seq_length, TensorType({DT_UINT32})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -575,4 +593,4 @@ REG_OP(DynamicGRUV2) .OP_END_FACTORY_REG(DynamicGRUV2) } // namespace ge -#endif // GE_OP_RNN_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ diff --git a/third_party/fwkacllib/inc/ops/rpn_ops.h b/third_party/fwkacllib/inc/ops/rpn_ops.h index 39583293..b7649a44 100644 --- a/third_party/fwkacllib/inc/ops/rpn_ops.h +++ b/third_party/fwkacllib/inc/ops/rpn_ops.h @@ -18,8 +18,8 @@ * \file rpn_ops.h * \brief */ -#ifndef GE_OP_RPN_OPS_H -#define GE_OP_RPN_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_RPN_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_RPN_OPS_H_ #include "graph/operator_reg.h" namespace ge { @@ -58,4 +58,4 @@ REG_OP(NMSWithMask) .OP_END_FACTORY_REG(NMSWithMask) } // namespace ge -#endif // GE_OP_TRAINING_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_RPN_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/save_ops.h b/third_party/fwkacllib/inc/ops/save_ops.h index 159e7382..0ce473b7 100644 --- a/third_party/fwkacllib/inc/ops/save_ops.h +++ b/third_party/fwkacllib/inc/ops/save_ops.h @@ -18,8 +18,8 @@ * \file save_ops.h * \brief */ -#ifndef GE_OP_SAVE_OPS_H_ -#define GE_OP_SAVE_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_SAVE_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_SAVE_OPS_H_ #include "graph/operator_reg.h" @@ -39,4 +39,4 @@ REG_OP(Save) } // namespace ge -#endif // GE_OP_SAVE_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_SAVE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/ops/sdca_ops.h index dc6852d4..cbd9839d 100644 --- a/third_party/fwkacllib/inc/ops/sdca_ops.h +++ b/third_party/fwkacllib/inc/ops/sdca_ops.h @@ -18,8 +18,8 @@ * \file sdca_ops.h * \brief */ -#ifndef GE_OP_SDCA_OPS_H -#define GE_OP_SDCA_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_SDCA_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_SDCA_OPS_H_ #include "graph/operator.h" #include "graph/operator_reg.h" @@ -89,4 +89,4 @@ REG_OP(SdcaOptimizerV2) } // namespace ge -#endif //GE_OP_SDCA_OPS_H \ No newline at end of file +#endif // OPS_BUILT_IN_OP_PROTO_INC_SDCA_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index d17e8e94..7c4802a6 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -18,8 +18,8 @@ * \file selection_ops.h * \brief */ -#ifndef GE_OP_SELECTION_OPS_H -#define GE_OP_SELECTION_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ #include "graph/operator_reg.h" namespace ge { @@ -186,7 +186,8 @@ REG_OP(GatherNd) * uint8, int16, int8, int64, qint8, quint8, qint32, qint16, quint16, * uint16, complex128, float16, uint32, uint64, complex64, complex128. * @li indices: A Tensor of type int32 or int64. -* @li axis: A Tensor of type as int32 . \n +* @li axis: A Tensor of type as int32 or int64, +* Must be in the range [-rank(input_tensor), rank(input_tensor)) . \n *@par Outputs: *y: A Tensor. Has the same type as "x" . \n @@ -920,6 +921,9 @@ REG_OP(ScatterNd) *@li "y" has the same type as "x". *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterNd. + +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use ScatterNd instead. */ REG_OP(ScatterNdD) .INPUT(indices, TensorType::IndexNumberType()) @@ -1163,6 +1167,9 @@ REG_OP(Cumprod) *y: A Tensor. Has the same type as "x". *@par Third-party framework compatibility * Compatible with the TensorFlow operator Cumprod. + +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use Cumprod instead. */ REG_OP(CumprodD) .INPUT(x, TensorType::NumberType()) @@ -1217,6 +1224,9 @@ REG_OP(Cumsum) *y: A Tensor. Has the same type as "x". *@par Third-party framework compatibility * Compatible with the TensorFlow operator Cumsum. + +* @par Restrictions: +* Warning: THIS FUNCTION IS DEPRECATED. Please use Cumsum instead. */ REG_OP(CumsumD) .INPUT(x, TensorType::NumberType()) @@ -1787,6 +1797,9 @@ REG_OP(TileWithAxis) *@par Outputs: *y: A Tensor of the same type as "x". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ReadSelect) .INPUT(x, TensorType::ALL()) @@ -1802,6 +1815,9 @@ REG_OP(ReadSelect) *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(WriteSelect) .INPUT(x, TensorType::ALL()) @@ -1907,4 +1923,4 @@ REG_OP(CumulativeLogsumexpD) .OP_END_FACTORY_REG(CumulativeLogsumexpD) } // namespace ge -#endif // GE_OP_SELECTION_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/set_ops.h b/third_party/fwkacllib/inc/ops/set_ops.h index 18df6edf..1d02fa15 100644 --- a/third_party/fwkacllib/inc/ops/set_ops.h +++ b/third_party/fwkacllib/inc/ops/set_ops.h @@ -18,8 +18,8 @@ * \file set_ops.h * \brief */ -#ifndef GE_OP_SET_OPS_H_ -#define GE_OP_SET_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_SET_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_SET_OPS_H_ #include "graph/operator.h" #include "graph/operator_reg.h" @@ -178,4 +178,4 @@ REG_OP(SetSize) .OP_END_FACTORY_REG(SetSize) } // namespace ge -#endif // GE_OP_SET_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_SET_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h index 3eecbeab..d7512790 100644 --- a/third_party/fwkacllib/inc/ops/sparse_ops.h +++ b/third_party/fwkacllib/inc/ops/sparse_ops.h @@ -18,8 +18,8 @@ * \file sparse_ops.h * \brief */ -#ifndef GE_OP_SPARSE_OPS_H_ -#define GE_OP_SPARSE_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_SPARSE_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_SPARSE_OPS_H_ #include "graph/operator_reg.h" @@ -1044,4 +1044,4 @@ REG_OP(DeserializeManySparse) .OP_END_FACTORY_REG(DeserializeManySparse) } // namespace ge -#endif // GE_OP_SPARSE_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_SPARSE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h index 460dada4..64fa7814 100644 --- a/third_party/fwkacllib/inc/ops/spectral_ops.h +++ b/third_party/fwkacllib/inc/ops/spectral_ops.h @@ -18,8 +18,8 @@ * \file spectral_ops.h * \brief */ -#ifndef GE_OP_SPECTRAL_OPS_H -#define GE_OP_SPECTRAL_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_ #include "graph/operator.h" #include "graph/operator_reg.h" @@ -49,4 +49,4 @@ REG_OP(RFFT) } // namespace ge -#endif //GE_OP_SPECTRAL_OPS_H \ No newline at end of file +#endif // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h index b0bd14c0..efe4715d 100644 --- a/third_party/fwkacllib/inc/ops/split_combination_ops.h +++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h @@ -18,8 +18,8 @@ * \file split_combination_ops.h * \brief */ -#ifndef GE_OP_SPLIT_COMBINATION_OPS_H -#define GE_OP_SPLIT_COMBINATION_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_ #include "graph/operator_reg.h" namespace ge { @@ -386,4 +386,4 @@ REG_OP(ConcatOffsetD) .OP_END_FACTORY_REG(ConcatOffsetD) } // namespace ge -#endif // GE_OP_SPLIT_COMBINATION_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/state_ops.h b/third_party/fwkacllib/inc/ops/state_ops.h index ca85067b..db1f5353 100644 --- a/third_party/fwkacllib/inc/ops/state_ops.h +++ b/third_party/fwkacllib/inc/ops/state_ops.h @@ -18,8 +18,8 @@ * \file state_ops.h * \brief */ -#ifndef GE_OP_STATE_OPS_H_ -#define GE_OP_STATE_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_STATE_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_STATE_OPS_H_ #include "graph/operator_reg.h" @@ -164,4 +164,4 @@ REG_OP(CountUpTo) } // namespace ge -#endif // GE_OP_STATE_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_STATE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/stateful_random_ops.h b/third_party/fwkacllib/inc/ops/stateful_random_ops.h index 779e7cea..366112d6 100644 --- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h @@ -18,8 +18,8 @@ * \file stateful_random_ops.h * \brief */ -#ifndef GE_OP_STATEFUL_RANDOM_OPS_H -#define GE_OP_STATEFUL_RANDOM_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_STATEFUL_RANDOM_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_STATEFUL_RANDOM_OPS_H_ #include "graph/operator.h" #include "graph/operator_reg.h" @@ -233,4 +233,4 @@ REG_OP(StatefulUniformInt) } // namespace ge -#endif //GE_OP_STATELESS_RANDOM_OPS_H \ No newline at end of file +#endif // OPS_BUILT_IN_OP_PROTO_INC_STATEFUL_RANDOM_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/stateless_random_ops.h b/third_party/fwkacllib/inc/ops/stateless_random_ops.h index d91bc38a..dad3c379 100644 --- a/third_party/fwkacllib/inc/ops/stateless_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateless_random_ops.h @@ -18,8 +18,8 @@ * \file stateless_random_ops.h * \brief */ -#ifndef GE_OP_STATELESS_RANDOM_OPS_H -#define GE_OP_STATELESS_RANDOM_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_ #include "graph/operator.h" #include "graph/operator_reg.h" @@ -81,4 +81,4 @@ REG_OP(StatelessRandomUniformInt) } // namespace ge -#endif //GE_OP_STATELESS_RANDOM_OPS_H \ No newline at end of file +#endif // OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h index 90ee700d..4a88bc79 100644 --- a/third_party/fwkacllib/inc/ops/string_ops.h +++ b/third_party/fwkacllib/inc/ops/string_ops.h @@ -18,8 +18,8 @@ * \file string_ops.h * \brief */ -#ifndef GE_OP_STRING_OPS_H_ -#define GE_OP_STRING_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_ #include #include "graph/operator_reg.h" @@ -559,4 +559,4 @@ REG_OP(DecodeBase64) .OP_END_FACTORY_REG(DecodeBase64) } // namespace ge -#endif // GE_OP_STRING_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/swap_co_ops.h b/third_party/fwkacllib/inc/ops/swap_co_ops.h index fb25c741..a1bf4f8b 100644 --- a/third_party/fwkacllib/inc/ops/swap_co_ops.h +++ b/third_party/fwkacllib/inc/ops/swap_co_ops.h @@ -18,8 +18,8 @@ * \file swap_co_ops.h * \brief */ -#ifndef GE_OP_SWAP_CO_OPS_H_ -#define GE_OP_SWAP_CO_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_SWAP_CO_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_SWAP_CO_OPS_H_ #include "graph/operator_reg.h" @@ -59,4 +59,4 @@ REG_OP(SwapCo) } // namespace ge -#endif // GE_OP_SWAP_CO_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_SWAP_CO_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 5414f122..5d77c75d 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -18,8 +18,8 @@ * \file transformation_ops.h * \brief */ -#ifndef GE_OP_TRANSFORMATION_OPS_H -#define GE_OP_TRANSFORMATION_OPS_H +#ifndef OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_ #include "graph/operator_reg.h" @@ -625,6 +625,9 @@ REG_OP(ConfusionTransposeD) *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ConfusionTranspose) .INPUT(x, TensorType::BasicType()) @@ -661,11 +664,6 @@ REG_OP(FlattenV2) .ATTR(end_axis, Int, -1) .OP_END_FACTORY_REG(FlattenV2) -REG_OP(DeConvTrans) - .INPUT(x, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_INT8})) - .OP_END_FACTORY_REG(DeConvTrans) - /** *@brief Compress large weight to small one. Usually inserted before Conv2d. * @@ -713,4 +711,4 @@ REG_OP(CompressFcOp) .OP_END_FACTORY_REG(CompressFcOp) } // namespace ge -#endif // GE_OP_TRANSFORMATION_OPS_H +#endif // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h index 2f014937..e19cbd7c 100644 --- a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h +++ b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h @@ -18,8 +18,8 @@ * \file warp_perspective_ops.h * \brief */ -#ifndef GE_OP_WARP_PERSPECTIVE_OPS_H_ -#define GE_OP_WARP_PERSPECTIVE_OPS_H_ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_WARP_PERSPECTIVE_OPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_WARP_PERSPECTIVE_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -28,6 +28,9 @@ namespace ge { /** *@brief Applies a perspective transformation to an image . \n +*@par Restrictions: +*Warning:THIS FUNCTION IS DEPRECATED. Please do not use. \n + *@par Inputs: *@li x: input tensor, format NCHW, type must be float. *@li matrix: transformation matrix, format ND , shape must be (N, 9), type must be float . \n @@ -53,4 +56,4 @@ REG_OP(WarpPerspective) .OP_END_FACTORY_REG(WarpPerspective) } // namespace ge -#endif // GE_OP_WARP_PERSPECTIVE_OPS_H_ +#endif // OPS_BUILT_IN_OP_PROTO_INC_WARP_PERSPECTIVE_OPS_H_ diff --git a/third_party/fwkacllib/inc/register/op_kernel_registry.h b/third_party/fwkacllib/inc/register/op_kernel_registry.h index 5fed8960..2c479e92 100644 --- a/third_party/fwkacllib/inc/register/op_kernel_registry.h +++ b/third_party/fwkacllib/inc/register/op_kernel_registry.h @@ -41,7 +41,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpKernelRegistry { private: OpKernelRegistry(); class OpKernelRegistryImpl; - /*lint -e148*/ std::unique_ptr impl_; }; } // namespace ge diff --git a/third_party/fwkacllib/inc/register/ops_kernel_builder_registry.h b/third_party/fwkacllib/inc/register/ops_kernel_builder_registry.h new file mode 100644 index 00000000..96ac931b --- /dev/null +++ b/third_party/fwkacllib/inc/register/ops_kernel_builder_registry.h @@ -0,0 +1,62 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_REGISTER_OPS_KERNEL_BUILDER_REGISTRY_H_ +#define INC_REGISTER_OPS_KERNEL_BUILDER_REGISTRY_H_ + +#include +#include "register/register_types.h" +#include "common/opskernel/ops_kernel_builder.h" + +namespace ge { +using OpsKernelBuilderPtr = std::shared_ptr; + +class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpsKernelBuilderRegistry { + public: + static OpsKernelBuilderRegistry &GetInstance(); + + void Register(const std::string &lib_name, const OpsKernelBuilderPtr &instance); + + void UnregisterAll(); + + const std::map &GetAll() const; + + private: + std::map kernel_builders_; +}; + +class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpsKernelBuilderRegistrar { + public: + using CreateFn = OpsKernelBuilder *(*)(); + OpsKernelBuilderRegistrar(const std::string &kernel_lib_name, CreateFn fn); + ~OpsKernelBuilderRegistrar() = default; +}; + +#define REGISTER_OPS_KERNEL_BUILDER(kernel_lib_name, builder) \ + REGISTER_OPS_KERNEL_BUILDER_UNIQ_HELPER(__COUNTER__, kernel_lib_name, builder) + +#define REGISTER_OPS_KERNEL_BUILDER_UNIQ_HELPER(ctr, kernel_lib_name, builder) \ + REGISTER_OPS_KERNEL_BUILDER_UNIQ(ctr, kernel_lib_name, builder) + +#define REGISTER_OPS_KERNEL_BUILDER_UNIQ(ctr, kernel_lib_name, builder) \ + static ::ge::OpsKernelBuilderRegistrar register_op_kernel_builder_##ctr \ + __attribute__((unused)) = \ + ::ge::OpsKernelBuilderRegistrar(kernel_lib_name, []()->::ge::OpsKernelBuilder* { \ + return new (std::nothrow) builder(); \ + }) +} // namespace ge + +#endif // INC_REGISTER_OPS_KERNEL_BUILDER_REGISTRY_H_ diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 4b08916e..ea32c164 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -19,7 +19,7 @@ #include -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -580,7 +580,18 @@ RTS_API rtError_t rtLabelListCpy(rtLabel_t *label, uint32_t labelNumber, void *d * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream); -#ifdef __cplusplus + +/** + * @ingroup dvrt_base + * @brief get current thread last stream id and task id + * @param [out] stream id and task id + * @param [in] null + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for input null ptr + */ +RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskid, uint32_t *streamid); + +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index c64ed16f..6de84c02 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -19,7 +19,7 @@ #include "base.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -185,7 +185,7 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); */ RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h index cc74a5ed..21296ca2 100644 --- a/third_party/fwkacllib/inc/runtime/context.h +++ b/third_party/fwkacllib/inc/runtime/context.h @@ -19,7 +19,7 @@ #include "base.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -149,7 +149,14 @@ RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t* groupInfo, uint */ RTS_API rtError_t rtGetGroupCount(uint32_t *count); -#ifdef __cplusplus +/** + * @ingroup rt_context + * @brief set context INF mode + * @param [in] mode + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtSetCtxINFMode(bool mode); +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index 048be69a..dddb1e10 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -19,7 +19,7 @@ #include "base.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -339,7 +339,24 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3 * @return RT_ERROR_NONE for ok */ RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value); -#ifdef __cplusplus + +/** + * @ingroup dvrt_dev + * @brief set target device for current thread + * @param [int] device the device id + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device); + +/** + * @ingroup dvrt_dev + * @brief reset all opened device + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device); +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/runtime/dvfsprofile.h index 60f400b3..e27cd832 100644 --- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h +++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h @@ -19,7 +19,7 @@ #include "base.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -56,7 +56,7 @@ RTS_API rtError_t rtUnsetDvfsProfile(); */ RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index 9dc44766..af7b16d8 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -19,7 +19,7 @@ #include "base.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -229,7 +229,7 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs */ RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index 956e033b..2030634a 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -20,7 +20,7 @@ #include "base.h" #include "stream.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -529,7 +529,7 @@ RTS_API rtError_t rtStopOnlineProf(rtStream_t stream); * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtGetOnlineProfData(rtStream_t stream, rtProfDataInfo_t *pProfData, uint32_t profDataNum); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index 8e159dd7..0d9e20ce 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -17,14 +17,12 @@ #ifndef __CCE_RUNTIME_MEM_H__ #define __CCE_RUNTIME_MEM_H__ -/*lint -e7*/ #include -/*lint +e7*/ #include "base.h" #include "config.h" #include "stream.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -159,7 +157,12 @@ typedef struct rtAiCoreMemorySize { * @ingroup dvrt_mem * @brief memory type */ -typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE = 2 } rtMemoryType_t; +typedef enum tagRtMemoryType { + RT_MEMORY_TYPE_HOST = 1, + RT_MEMORY_TYPE_DEVICE = 2 , + RT_MEMORY_TYPE_SVM = 3, + RT_MEMORY_TYPE_DVPP = 4 +} rtMemoryType_t; /** * @ingroup dvrt_mem @@ -167,11 +170,33 @@ typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE = */ typedef struct tagRtPointerAttributes { rtMemoryType_t memoryType; // host memory or device memory + rtMemoryType_t locationType; uint32_t deviceID; // device ID - uint32_t isManaged; uint32_t pageSize; } rtPointerAttributes_t; + +typedef struct rtMallocHostSharedMemoryIn { + const char* name; + const uint64_t size; + uint32_t flag; +} rtMallocHostSharedMemoryIn; + +typedef struct rtMallocHostSharedMemoryOut { + int fd; + void* ptr; + void* devPtr; +} rtMallocHostSharedMemoryOut; + +typedef struct rtFreeHostSharedMemoryIn { + const char* name; + const uint64_t size; + int fd; + void* ptr; + void* devPtr; +} rtFreeHostSharedMemoryIn; + + /** * @ingroup dvrt_mem * @brief alloc device memory @@ -230,6 +255,28 @@ RTS_API rtError_t rtMallocHost(void **hostPtr, uint64_t size); */ RTS_API rtError_t rtFreeHost(void *hostPtr); +/** + * @ingroup dvrt_mem + * @brief alloc host shared memory + * @param [in] in alloc host shared memory inputPara pointer + * @param [in] out alloc host shared memory outputInfo pointer + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ + +RTS_API rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in, + rtMallocHostSharedMemoryOut *out); + +/** + * @ingroup dvrt_mem + * @brief free host memory + * @param [in] in free host shared memory inputPara pointer + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ + +RTS_API rtError_t rtFreeHostSharedMemory(rtFreeHostSharedMemoryIn *in); + /** * @ingroup dvrt_mem * @brief alloc managed memory @@ -486,7 +533,7 @@ RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num); */ RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index 089a90b7..59a1ba7d 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -19,7 +19,7 @@ #include "base.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -430,7 +430,7 @@ rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *addr, uint */ RTS_API rtError_t rtDebugUnRegister(rtModel_t model); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h index 3123c3a9..ab542d89 100644 --- a/third_party/fwkacllib/inc/runtime/stream.h +++ b/third_party/fwkacllib/inc/runtime/stream.h @@ -20,7 +20,7 @@ #include "base.h" #include "event.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -188,7 +188,7 @@ RTS_API rtError_t rtStreamActive(rtStream_t active_stream, rtStream_t stream); */ RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtStream_t *trueStreamPtr, uint32_t elementSize, rtStream_t stream, rtSwitchDataType_t dataType); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/tdt/status.h b/third_party/fwkacllib/inc/tdt/status.h index 87ae8f75..185d2b9c 100644 --- a/third_party/fwkacllib/inc/tdt/status.h +++ b/third_party/fwkacllib/inc/tdt/status.h @@ -100,6 +100,8 @@ enum { TDT_TSD_SEND_HEARTBEAT_FAILED_CODE, TDT_TSD_CLEAN_RESOURCE_FAILED_CODE, TDT_TSD_SEND_MSG_FAILED_CODE, + TDT_TSD_AICPU_SD_PROCESS_ABNORMAL_CODE, + TDT_TSD_CUSTOM_PROCESS_ABNORMAL_CODE, TDT_PPC_DRIVER_INIT_FAIL_CODE, TDT_PPC_SERVER_CLIENT_CREATE_FAIL_CODE, TDT_PPC_SERVER_CLIENT_DESTORY_FAIL_CODE, @@ -510,6 +512,8 @@ TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_INIT_HDCSERVER_FAILED, " TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_HEARTBEAT_FAILED, "Tsdaemon get pid fail"); TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEAN_RESOURCE_FAILED, "Tsdaemon clean resource fail"); TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_MSG_FAILED, "Tsdaemon send msg fail"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_AICPU_SD_PROCESS_ABNORMAL, "aicpu_sd process abnormal"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_INFO, TDT_TSD_CUSTOM_PROCESS_ABNORMAL, "custom_aicpu_sd process abnormal"); /********************* PPC ****************************/ // create PPC error level error diff --git a/third_party/fwkacllib/inc/tdt/tdt_host_interface.h b/third_party/fwkacllib/inc/tdt/tdt_host_interface.h index 0e62a85c..1cab6fd1 100644 --- a/third_party/fwkacllib/inc/tdt/tdt_host_interface.h +++ b/third_party/fwkacllib/inc/tdt/tdt_host_interface.h @@ -135,6 +135,93 @@ int32_t TdtHostPopData(const std::string &channelName, std::vector &it * @li tdt_host_interface.h: Header file where the interface declaration is located. */ int32_t TdtHostStop(const std::string &channelName); + +/** +* @ingroup TdtInFeedInit +* @brief Initialize the interface, start and initialize various general thread, log and other services +* +* @par Function +* Initialize the interface, start and initialize various general thread, log and other services +* +* @param deviceId [IN] type #unsigned int. logic device ID +* @retval #0 Success +* @retval #Not 0 Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +*/ +int32_t TdtInFeedInit(uint32_t deviceId); + +/** +* @ingroup TdtOutFeedInit +* @brief Initialize the interface, start and initialize various general thread, log and other services +* +* @par Function +* Initialize the interface, start and initialize various general thread, log and other services +* +* @param deviceId [IN] type #unsigned int. logic device ID +* @retval #0 Success +* @retval #Not 0 Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +*/ +int32_t TdtOutFeedInit(uint32_t deviceId); + +/** +* @ingroup TdtInFeedDestroy +* @brief Notify TDT component to close related resources +* +* @par Function +* Notify TDT component to close related resources +* +* @param NA +* @retval 0 Success +* @retval OtherValues Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +*/ +int32_t TdtInFeedDestroy(uint32_t deviceId); + +/** +* @ingroup TdtOutFeedDestroy +* @brief Notify TDT component to close related resources +* +* @par Function +* Notify TDT component to close related resources +* +* @param NA +* @retval 0 Success +* @retval OtherValues Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +*/ +int32_t TdtOutFeedDestroy(); + +/** +* @ingroup TdtInFeedData +* @brief Blocking queue. When the queue is full, the Push interface will block. +* +* @par Function +* Blocking queue. When the queue is full, the Push interface will block. +* +* @param channelName [IN] type #String. queue channel name +* @param items [IN] type #vector DataItem is defined in data_common.h. input data +* @retval 0 Success +* @retval OtherValues 0 Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +* @li data_common.h: Header file where 'DataItem' defined +*/ +int32_t TdtInFeedData(const std::string &channelName, const std::vector &item, uint32_t deviceId); } // namespace tdt #ifdef __cplusplus } diff --git a/inc/common/util/ai_core/param_calculate/aicore_param_calculator.h b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h similarity index 51% rename from inc/common/util/ai_core/param_calculate/aicore_param_calculator.h rename to third_party/fwkacllib/inc/toolchain/adx_datadump_server.h index c0c378fd..67adecd9 100644 --- a/inc/common/util/ai_core/param_calculate/aicore_param_calculator.h +++ b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h @@ -14,20 +14,29 @@ * limitations under the License. */ -#ifndef AICORE_PARAM_CALCULATOR -#define AICORE_PARAM_CALCULATOR - -#include "graph/node.h" -#include "graph_optimizer/graph_optimize_register_error_codes.h" +#ifndef ADX_DATADUMP_SERVER_H +#define ADX_DATADUMP_SERVER_H +#ifdef __cplusplus +extern "C" { +#endif +/** + * @brief initialize server for normal datadump function. + * @return + * IDE_DAEMON_OK: datadump server init success + * IDE_DAEMON_ERROR: datadump server init failed + */ +int AdxDataDumpServerInit(); -namespace fe { -class AICoreParamCalculator { - public: - AICoreParamCalculator(); +/** + * @brief uninitialize server for normal datadump function. + * @return + * IDE_DAEMON_OK: datadump server uninit success + * IDE_DAEMON_ERROR: datadump server uninit failed + */ +int AdxDataDumpServerUnInit(); - ~AICoreParamCalculator(); +#ifdef __cplusplus +} +#endif +#endif - Status CalcOpRunningParam(ge::Node &node); -}; -} // namespace fe -#endif // AICORE_PARAM_CALCULATOR diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h index 4f216239..c8715041 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h +++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h @@ -152,4 +152,13 @@ MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg); */ MSVP_PROF_API int32_t ProfFinalize(); +/** + * @name ProfGetDataTypeConfig + * @brief get dataTypeConfig started with of one device + * @param deviceId [IN] deviceId to get dataTypeConfig + * @param dataTypeConfig [OUT] result get + * @return ProfErrorCode + */ +MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig); + #endif // MSPROF_ENGINE_PROF_ACL_API_H_ diff --git a/third_party/patch/securec/securec.patch001 b/third_party/patch/securec/securec.patch001 index 666f28ce..01c2d769 100644 --- a/third_party/patch/securec/securec.patch001 +++ b/third_party/patch/securec/securec.patch001 @@ -1,5 +1,5 @@ -diff -Npur -x .git bounds_checking_function/CMakeLists.txt securec/CMakeLists.txt ---- bounds_checking_function/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 +diff -Npur -x .git libboundscheck/CMakeLists.txt securec/CMakeLists.txt +--- libboundscheck/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 +++ securec/CMakeLists.txt 2020-09-19 16:53:48.689460700 +0800 @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.14)