| @@ -88,4 +88,8 @@ void OpTilingManager::LoadSo() { | |||||
| } | } | ||||
| } | } | ||||
| OpTilingManager &OpTilingManager::GetInstance() { | |||||
| static OpTilingManager instance; | |||||
| return instance; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -25,6 +25,7 @@ using SoToHandleMap = std::map<std::string, void *>; | |||||
| class OpTilingManager { | class OpTilingManager { | ||||
| public: | public: | ||||
| OpTilingManager() = default; | OpTilingManager() = default; | ||||
| static OpTilingManager &GetInstance(); | |||||
| ~OpTilingManager(); | ~OpTilingManager(); | ||||
| void LoadSo(); | void LoadSo(); | ||||
| @@ -72,7 +72,89 @@ set(SRC_LIST | |||||
| "../single_op/task/tbe_task_builder.cc" | "../single_op/task/tbe_task_builder.cc" | ||||
| "../single_op/task/aicpu_task_builder.cc" | "../single_op/task/aicpu_task_builder.cc" | ||||
| "../single_op/task/aicpu_kernel_task_builder.cc" | "../single_op/task/aicpu_kernel_task_builder.cc" | ||||
| "../hybrid/hybrid_davinci_model_stub.cc" | |||||
| "../hybrid/common/tensor_value.cc" | |||||
| "../hybrid/common/npu_memory_allocator.cc" | |||||
| "../hybrid/executor/rt_callback_manager.cc" | |||||
| "../hybrid/executor/node_state.cc" | |||||
| "../hybrid/executor/node_done_manager.cc" | |||||
| "../hybrid/executor/hybrid_profiler.cc" | |||||
| "../hybrid/executor/hybrid_model_executor.cc" | |||||
| "../hybrid/executor/hybrid_model_async_executor.cc" | |||||
| "../hybrid/executor/hybrid_execution_context.cc" | |||||
| "../hybrid/executor/subgraph_context.cc" | |||||
| "../hybrid/executor/subgraph_executor.cc" | |||||
| "../hybrid/executor/worker/task_compile_engine.cc" | |||||
| "../hybrid/executor/worker/shape_inference_engine.cc" | |||||
| "../hybrid/executor/worker/execution_engine.cc" | |||||
| "../hybrid/model/hybrid_model.cc" | |||||
| "../hybrid/model/hybrid_model_builder.cc" | |||||
| "../hybrid/model/node_item.cc" | |||||
| "../hybrid/model/graph_item.cc" | |||||
| "../hybrid/node_executor/aicore/aicore_node_executor.cc" | |||||
| "../hybrid/node_executor/aicore/aicore_op_task.cc" | |||||
| "../hybrid/node_executor/aicore/aicore_task_builder.cc" | |||||
| "../hybrid/node_executor/aicpu/aicpu_node_executor.cc" | |||||
| "../hybrid/node_executor/compiledsubgraph/known_node_executor.cc" | |||||
| "../hybrid/node_executor/ge_local/ge_local_node_executor.cc" | |||||
| "../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc" | |||||
| "../hybrid/node_executor/host_cpu/kernel_factory.cc" | |||||
| "../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc" | |||||
| "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | |||||
| "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | |||||
| "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | |||||
| "../hybrid/node_executor/controlop/control_op_executor.cc" | |||||
| "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | |||||
| "../hybrid/node_executor/rts/rts_node_executor.cc" | |||||
| "../hybrid/node_executor/node_executor.cc" | |||||
| "../hybrid/node_executor/task_context.cc" | |||||
| "../hybrid/hybrid_davinci_model.cc" | |||||
| "../ge_local_engine/engine/host_cpu_engine.cc" | |||||
| "../graph/common/omg_util.cc" | |||||
| "../graph/manager/host_mem_manager.cc" | |||||
| "../graph/build/memory/var_mem_assign_util.cc" | |||||
| "../host_kernels/transpose_kernel.cc" | |||||
| "../host_kernels/add_kernel.cc" | |||||
| "../host_kernels/broadcast_args_kernel.cc" | |||||
| "../host_kernels/broadcast_gradient_args_kernel.cc" | |||||
| "../host_kernels/cast_kernel.cc" | |||||
| "../host_kernels/concat_offset_kernel.cc" | |||||
| "../host_kernels/concat_v2_kernel.cc" | |||||
| "../host_kernels/dynamic_stitch_kernel.cc" | |||||
| "../host_kernels/identity_kernel.cc" | |||||
| "../host_kernels/empty_kernel.cc" | |||||
| "../host_kernels/expanddims_kernel.cc" | |||||
| "../host_kernels/fill_kernel.cc" | |||||
| "../host_kernels/floordiv_kernel.cc" | |||||
| "../host_kernels/floormod_kernel.cc" | |||||
| "../host_kernels/gather_v2_kernel.cc" | |||||
| "../host_kernels/greater_kernel.cc" | |||||
| "../host_kernels/kernel_utils.cc" | |||||
| "../host_kernels/maximum_kernel.cc" | |||||
| "../host_kernels/mul_kernel.cc" | |||||
| "../host_kernels/pack_kernel.cc" | |||||
| "../host_kernels/permute_kernel.cc" | |||||
| "../host_kernels/range_kernel.cc" | |||||
| "../host_kernels/rank_kernel.cc" | |||||
| "../host_kernels/reduce_prod_kernel.cc" | |||||
| "../host_kernels/reshape_kernel.cc" | |||||
| "../host_kernels/rsqrt_kernel.cc" | |||||
| "../host_kernels/shape_kernel.cc" | |||||
| "../host_kernels/shape_n_kernel.cc" | |||||
| "../host_kernels/size_kernel.cc" | |||||
| "../host_kernels/slice_d_kernel.cc" | |||||
| "../host_kernels/slice_kernel.cc" | |||||
| "../host_kernels/squeeze_kernel.cc" | |||||
| "../host_kernels/unsqueeze_kernel.cc" | |||||
| "../host_kernels/ssd_prior_box_kernel.cc" | |||||
| "../host_kernels/strided_slice_kernel.cc" | |||||
| "../host_kernels/sub_kernel.cc" | |||||
| "../host_kernels/transdata_kernel.cc" | |||||
| "../host_kernels/unpack_kernel.cc" | |||||
| "../graph/passes/pass_utils.cc" | |||||
| "../graph/common/bcast.cc" | |||||
| "../common/fp16_t.cc" | |||||
| "../common/formats/format_transfers/format_transfer_transpose.cc" | |||||
| "../common/formats/utils/formats_trans_utils.cc" | |||||
| ) | ) | ||||
| ######## libge_executor.a ######## | ######## libge_executor.a ######## | ||||
| @@ -105,9 +187,9 @@ target_include_directories(ge_executor PRIVATE | |||||
| ${CMAKE_BINARY_DIR}/proto/ge | ${CMAKE_BINARY_DIR}/proto/ge | ||||
| #### yellow zone #### | #### yellow zone #### | ||||
| ${GE_CODE_DIR}/../inc | ${GE_CODE_DIR}/../inc | ||||
| ${GE_CODE_DIR}/../inc/cce | |||||
| ${GE_CODE_DIR}/../inc/cce | |||||
| #### blue zone #### | #### blue zone #### | ||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||||
| ) | ) | ||||
| target_link_libraries(ge_executor PRIVATE | target_link_libraries(ge_executor PRIVATE | ||||
| @@ -147,9 +229,9 @@ target_include_directories(ge_executor_shared PRIVATE | |||||
| ${CMAKE_BINARY_DIR}/proto/ge | ${CMAKE_BINARY_DIR}/proto/ge | ||||
| #### yellow zone #### | #### yellow zone #### | ||||
| ${GE_CODE_DIR}/../inc | ${GE_CODE_DIR}/../inc | ||||
| ${GE_CODE_DIR}/../inc/cce | |||||
| ${GE_CODE_DIR}/../inc/cce | |||||
| #### blue zone #### | #### blue zone #### | ||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||||
| ) | ) | ||||
| target_link_libraries(ge_executor_shared PRIVATE | target_link_libraries(ge_executor_shared PRIVATE | ||||
| @@ -158,7 +240,7 @@ target_link_libraries(ge_executor_shared PRIVATE | |||||
| -Wl,--no-as-needed | -Wl,--no-as-needed | ||||
| ge_common | ge_common | ||||
| runtime | runtime | ||||
| slog | |||||
| slog | |||||
| mmpa | mmpa | ||||
| graph | graph | ||||
| register | register | ||||
| @@ -39,6 +39,8 @@ | |||||
| #include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
| #include "graph/load/new_model_manager/davinci_model.h" | #include "graph/load/new_model_manager/davinci_model.h" | ||||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | #include "opskernel_manager/ops_kernel_builder_manager.h" | ||||
| #include "graph/opsproto_manager.h" | |||||
| #include "ge_local_engine/engine/host_cpu_engine.h" | |||||
| using std::string; | using std::string; | ||||
| using std::vector; | using std::vector; | ||||
| @@ -221,6 +223,33 @@ class ModelListenerAdapter : public ModelListener { | |||||
| std::shared_ptr<ge::ModelListener> listener; | std::shared_ptr<ge::ModelListener> listener; | ||||
| }; | }; | ||||
| static void InitOpsProtoManger() { | |||||
| string opsproto_path; | |||||
| const char *path_env = std::getenv("ASCEND_OPP_PATH"); | |||||
| if (path_env != nullptr) { | |||||
| string path = path_env; | |||||
| string file_path = RealPath(path.c_str()); | |||||
| if (file_path.empty()) { | |||||
| GELOGE(FAILED, "File path %s is invalid.", path.c_str()); | |||||
| return; | |||||
| } | |||||
| opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/"); | |||||
| GELOGI("Get opsproto so path from env : %s", path.c_str()); | |||||
| } else { | |||||
| string path_base = PluginManager::GetPath(); | |||||
| GELOGI("path_base is %s", path_base.c_str()); | |||||
| path_base = path_base.substr(0, path_base.rfind('/')); | |||||
| path_base = path_base.substr(0, path_base.rfind('/') + 1); | |||||
| opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); | |||||
| } | |||||
| GELOGI("Get opsproto path is %s", opsproto_path.c_str()); | |||||
| OpsProtoManager *manager = OpsProtoManager::Instance(); | |||||
| map<string, string> option_tmp; | |||||
| option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path)); | |||||
| (void)manager->Initialize(option_tmp); | |||||
| } | |||||
| GeExecutor::GeExecutor() {} | GeExecutor::GeExecutor() {} | ||||
| Status GeExecutor::Initialize() { | Status GeExecutor::Initialize() { | ||||
| @@ -230,6 +259,16 @@ Status GeExecutor::Initialize() { | |||||
| return ge::SUCCESS; | return ge::SUCCESS; | ||||
| } | } | ||||
| OpTilingManager::GetInstance().LoadSo(); | |||||
| Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize(); | |||||
| if (initHostCpuEngineStatus != SUCCESS) { | |||||
| GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine"); | |||||
| return initHostCpuEngineStatus; | |||||
| } | |||||
| InitOpsProtoManger(); | |||||
| std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM); | std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM); | ||||
| mem_type.push_back(RT_MEMORY_P2P_DDR); | mem_type.push_back(RT_MEMORY_P2P_DDR); | ||||
| auto ret = MemManager::Instance().Initialize(mem_type); | auto ret = MemManager::Instance().Initialize(mem_type); | ||||
| @@ -600,10 +639,16 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | return ACL_ERROR_GE_INTERNAL_ERROR; | ||||
| } | } | ||||
| std::shared_ptr<DavinciModel> davinci_model = ModelManager::GetInstance()->GetModel(model_id); | |||||
| if (davinci_model != nullptr) { | |||||
| uint64_t session_id = davinci_model->GetSessionId(); | |||||
| std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = ModelManager::GetInstance()->GetHybridModel(model_id); | |||||
| if (hybrid_davinci_model != nullptr) { | |||||
| uint64_t session_id = hybrid_davinci_model->GetSessionId(); | |||||
| VarManagerPool::Instance().RemoveVarManager(session_id); | VarManagerPool::Instance().RemoveVarManager(session_id); | ||||
| } else { | |||||
| std::shared_ptr<DavinciModel> davinci_model = ModelManager::GetInstance()->GetModel(model_id); | |||||
| if (davinci_model != nullptr) { | |||||
| uint64_t session_id = davinci_model->GetSessionId(); | |||||
| VarManagerPool::Instance().RemoveVarManager(session_id); | |||||
| } | |||||
| } | } | ||||
| ret = GraphLoader::UnloadModel(model_id); | ret = GraphLoader::UnloadModel(model_id); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -933,6 +978,26 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat | |||||
| */ | */ | ||||
| Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, | Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, | ||||
| ge::RunModelData &run_output_data, bool async_mode) { | ge::RunModelData &run_output_data, bool async_mode) { | ||||
| std::vector<GeTensorDesc> input_desc = {}; | |||||
| std::vector<GeTensorDesc> output_desc = {}; | |||||
| return ExecModel(model_id, stream, run_input_data, input_desc, run_output_data, output_desc, async_mode); | |||||
| } | |||||
| /** | |||||
| * @ingroup ge | |||||
| * @brief Synchronous execution of offline model(Do not create thread) | |||||
| * @param [in] uint32_t model_id: Model ID to execute | |||||
| void* stream: stream to execute | |||||
| const domi::InputData *input_data: Model input data | |||||
| const std::vector<GeTensorDesc> &input_desc: Description of model input data | |||||
| bool async_mode: is asynchronize mode | |||||
| * @param [out] domi::OutputData *output_data: Model output data | |||||
| * @param [out] std::vector<GeTensorDesc> &output_desc: Description of model output data | |||||
| * @return SUCCESS handle successfully / others handle failed | |||||
| */ | |||||
| Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, | |||||
| const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data, | |||||
| std::vector<GeTensorDesc> &output_desc, bool async_mode) { | |||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | ||||
| return ACL_ERROR_GE_EXEC_NOT_INIT; | return ACL_ERROR_GE_EXEC_NOT_INIT; | ||||
| @@ -957,7 +1022,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel | |||||
| } | } | ||||
| } | } | ||||
| return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, output_data); | |||||
| return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc); | |||||
| } | } | ||||
| /** | /** | ||||
| @@ -61,9 +61,91 @@ local_ge_executor_src_files := \ | |||||
| ../single_op/task/tbe_task_builder.cc \ | ../single_op/task/tbe_task_builder.cc \ | ||||
| ../single_op/task/aicpu_task_builder.cc \ | ../single_op/task/aicpu_task_builder.cc \ | ||||
| ../single_op/task/aicpu_kernel_task_builder.cc \ | ../single_op/task/aicpu_kernel_task_builder.cc \ | ||||
| ../hybrid/hybrid_davinci_model_stub.cc\ | |||||
| ../hybrid/node_executor/aicpu/aicpu_ext_info.cc \ | ../hybrid/node_executor/aicpu/aicpu_ext_info.cc \ | ||||
| ../graph/common/local_context.cc \ | ../graph/common/local_context.cc \ | ||||
| ../hybrid/common/tensor_value.cc \ | |||||
| ../hybrid/common/npu_memory_allocator.cc \ | |||||
| ../hybrid/executor/rt_callback_manager.cc \ | |||||
| ../hybrid/executor/node_state.cc \ | |||||
| ../hybrid/executor/node_done_manager.cc \ | |||||
| ../hybrid/executor/hybrid_profiler.cc \ | |||||
| ../hybrid/executor/hybrid_model_executor.cc \ | |||||
| ../hybrid/executor/hybrid_model_async_executor.cc \ | |||||
| ../hybrid/executor/hybrid_execution_context.cc \ | |||||
| ../hybrid/executor/subgraph_context.cc \ | |||||
| ../hybrid/executor/subgraph_executor.cc \ | |||||
| ../hybrid/executor/worker/task_compile_engine.cc \ | |||||
| ../hybrid/executor/worker/shape_inference_engine.cc \ | |||||
| ../hybrid/executor/worker/execution_engine.cc \ | |||||
| ../hybrid/model/hybrid_model.cc \ | |||||
| ../hybrid/model/hybrid_model_builder.cc \ | |||||
| ../hybrid/model/node_item.cc \ | |||||
| ../hybrid/model/graph_item.cc \ | |||||
| ../hybrid/node_executor/aicore/aicore_node_executor.cc \ | |||||
| ../hybrid/node_executor/aicore/aicore_op_task.cc \ | |||||
| ../hybrid/node_executor/aicore/aicore_task_builder.cc \ | |||||
| ../hybrid/node_executor/aicpu/aicpu_node_executor.cc \ | |||||
| ../hybrid/node_executor/compiledsubgraph/known_node_executor.cc \ | |||||
| ../hybrid/node_executor/ge_local/ge_local_node_executor.cc \ | |||||
| ../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc \ | |||||
| ../hybrid/node_executor/host_cpu/kernel_factory.cc \ | |||||
| ../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc \ | |||||
| ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ | |||||
| ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ | |||||
| ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ | |||||
| ../hybrid/node_executor/controlop/control_op_executor.cc \ | |||||
| ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ | |||||
| ../hybrid/node_executor/rts/rts_node_executor.cc \ | |||||
| ../hybrid/node_executor/node_executor.cc \ | |||||
| ../hybrid/node_executor/task_context.cc \ | |||||
| ../hybrid/hybrid_davinci_model.cc \ | |||||
| ../ge_local_engine/engine/host_cpu_engine.cc \ | |||||
| ../graph/common/omg_util.cc \ | |||||
| ../graph/manager/host_mem_manager.cc \ | |||||
| ../graph/build/memory/var_mem_assign_util.cc \ | |||||
| ../host_kernels/transpose_kernel.cc \ | |||||
| ../host_kernels/add_kernel.cc \ | |||||
| ../host_kernels/broadcast_args_kernel.cc \ | |||||
| ../host_kernels/broadcast_gradient_args_kernel.cc \ | |||||
| ../host_kernels/cast_kernel.cc \ | |||||
| ../host_kernels/concat_offset_kernel.cc \ | |||||
| ../host_kernels/concat_v2_kernel.cc \ | |||||
| ../host_kernels/dynamic_stitch_kernel.cc \ | |||||
| ../host_kernels/identity_kernel.cc \ | |||||
| ../host_kernels/empty_kernel.cc \ | |||||
| ../host_kernels/expanddims_kernel.cc \ | |||||
| ../host_kernels/fill_kernel.cc \ | |||||
| ../host_kernels/floordiv_kernel.cc \ | |||||
| ../host_kernels/floormod_kernel.cc \ | |||||
| ../host_kernels/gather_v2_kernel.cc \ | |||||
| ../host_kernels/greater_kernel.cc \ | |||||
| ../host_kernels/kernel_utils.cc \ | |||||
| ../host_kernels/maximum_kernel.cc \ | |||||
| ../host_kernels/mul_kernel.cc \ | |||||
| ../host_kernels/pack_kernel.cc \ | |||||
| ../host_kernels/permute_kernel.cc \ | |||||
| ../host_kernels/range_kernel.cc \ | |||||
| ../host_kernels/rank_kernel.cc \ | |||||
| ../host_kernels/reduce_prod_kernel.cc \ | |||||
| ../host_kernels/reshape_kernel.cc \ | |||||
| ../host_kernels/rsqrt_kernel.cc \ | |||||
| ../host_kernels/shape_kernel.cc \ | |||||
| ../host_kernels/shape_n_kernel.cc \ | |||||
| ../host_kernels/size_kernel.cc \ | |||||
| ../host_kernels/slice_d_kernel.cc \ | |||||
| ../host_kernels/slice_kernel.cc \ | |||||
| ../host_kernels/squeeze_kernel.cc \ | |||||
| ../host_kernels/unsqueeze_kernel.cc \ | |||||
| ../host_kernels/ssd_prior_box_kernel.cc \ | |||||
| ../host_kernels/strided_slice_kernel.cc \ | |||||
| ../host_kernels/sub_kernel.cc \ | |||||
| ../host_kernels/transdata_kernel.cc \ | |||||
| ../host_kernels/unpack_kernel.cc \ | |||||
| ../graph/passes/pass_utils.cc \ | |||||
| ../graph/common/bcast.cc \ | |||||
| ../common/fp16_t.cc \ | |||||
| ../common/formats/format_transfers/format_transfer_transpose.cc \ | |||||
| ../common/formats/utils/formats_trans_utils.cc \ | |||||
| local_ge_executor_c_include := \ | local_ge_executor_c_include := \ | ||||
| proto/insert_op.proto \ | proto/insert_op.proto \ | ||||
| @@ -195,7 +195,7 @@ set_target_properties(atc_ge_local_opskernel_builder PROPERTIES | |||||
| ) | ) | ||||
| ############ libge_local_opskernel_builder.a ############ | ############ libge_local_opskernel_builder.a ############ | ||||
| add_library(ge_local_opskernel_builder_static SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) | |||||
| add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) | |||||
| target_compile_options(ge_local_opskernel_builder_static PRIVATE | target_compile_options(ge_local_opskernel_builder_static PRIVATE | ||||
| -Werror | -Werror | ||||
| @@ -95,8 +95,8 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) { | |||||
| void HostCpuEngine::CloseSo() { | void HostCpuEngine::CloseSo() { | ||||
| for (auto handle : lib_handles_) { | for (auto handle : lib_handles_) { | ||||
| if (dlclose(handle) != 0) { | |||||
| GELOGW("failed to close handle, message: %s", dlerror()); | |||||
| if (mmDlclose(handle) != 0) { | |||||
| GELOGW("failed to close handle, message: %s", mmDlerror()); | |||||
| } | } | ||||
| } | } | ||||
| lib_handles_.clear(); | lib_handles_.clear(); | ||||
| @@ -322,13 +322,13 @@ Status HostCpuEngine::LoadLibs(std::vector<std::string> &lib_paths) { | |||||
| Status HostCpuEngine::LoadLib(const std::string &lib_path) { | Status HostCpuEngine::LoadLib(const std::string &lib_path) { | ||||
| GELOGI("To invoke dlopen on lib: %s", lib_path.c_str()); | GELOGI("To invoke dlopen on lib: %s", lib_path.c_str()); | ||||
| auto handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL); | |||||
| auto handle = mmDlopen(lib_path.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); | |||||
| if (handle == nullptr) { | if (handle == nullptr) { | ||||
| GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), dlerror()); | |||||
| GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), mmDlerror()); | |||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| auto initialize = (Status (*)(const HostCpuContext &))dlsym(handle, "Initialize"); | |||||
| auto initialize = (Status (*)(const HostCpuContext &))mmDlsym(handle, "Initialize"); | |||||
| if (initialize != nullptr) { | if (initialize != nullptr) { | ||||
| GELOGI("Invoke function Initialize in lib: %s", lib_path.c_str()); | GELOGI("Invoke function Initialize in lib: %s", lib_path.c_str()); | ||||
| if (initialize(HostCpuContext()) != SUCCESS) { | if (initialize(HostCpuContext()) != SUCCESS) { | ||||
| @@ -20,7 +20,7 @@ | |||||
| #include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
| #include "graph/node.h" | #include "graph/node.h" | ||||
| #include "graph/operator.h" | #include "graph/operator.h" | ||||
| #include "register/register.h" | |||||
| #include "external/../register/register.h" | |||||
| namespace ge { | namespace ge { | ||||
| class HostCpuEngine { | class HostCpuEngine { | ||||
| @@ -13,6 +13,9 @@ set(GE_SRC_LIST | |||||
| "task/hccl_task.cc" | "task/hccl_task.cc" | ||||
| "task/memcpy_async_task.cc" | "task/memcpy_async_task.cc" | ||||
| "task/profiler_task.cc" | "task/profiler_task.cc" | ||||
| "task/label_goto_task.cc" | |||||
| "task/label_set_task.cc" | |||||
| "task/label_switch_task.cc" | |||||
| ) | ) | ||||
| add_library(ge_runtime SHARED ${GE_SRC_LIST}) | add_library(ge_runtime SHARED ${GE_SRC_LIST}) | ||||
| @@ -307,8 +307,8 @@ bool RuntimeModel::Run() { | |||||
| ret = rtStreamSynchronize(rt_model_stream_); | ret = rtStreamSynchronize(rt_model_stream_); | ||||
| if (ret != RT_ERROR_NONE) { | if (ret != RT_ERROR_NONE) { | ||||
| if (ret == RT_ERROR_END_OF_SEQUENCE) { | |||||
| GELOGI("Model stream RT_ERROR_END_OF_SEQUENCE signal received, ret = 0x%X", ret); | |||||
| if (ret == ACL_ERROR_RT_END_OF_SEQUENCE) { | |||||
| GELOGI("Model stream ACL_ERROR_RT_END_OF_SEQUENCE signal received, ret = 0x%X", ret); | |||||
| return true; | return true; | ||||
| } | } | ||||
| GELOGE(RT_FAILED, "Model stream sync failed, ret = 0x%X", ret); | GELOGE(RT_FAILED, "Model stream sync failed, ret = 0x%X", ret); | ||||
| @@ -24,6 +24,7 @@ | |||||
| #include "runtime/rt_model.h" | #include "runtime/rt_model.h" | ||||
| #include "ge_runtime/model_context.h" | #include "ge_runtime/model_context.h" | ||||
| #include "ge_runtime/task_info.h" | #include "ge_runtime/task_info.h" | ||||
| #include "external/runtime/rt_error_codes.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace model_runner { | namespace model_runner { | ||||
| @@ -30,6 +30,7 @@ | |||||
| #include "model/ge_model.h" | #include "model/ge_model.h" | ||||
| #include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | #include "opskernel_manager/ops_kernel_builder_manager.h" | ||||
| #include "graph/utils/op_desc_utils.h" | |||||
| using domi::BuildMode; | using domi::BuildMode; | ||||
| @@ -311,6 +312,53 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt | |||||
| return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); | return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); | ||||
| } | } | ||||
| static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor, | |||||
| const std::vector<InDataAnchorPtr> &in_anchors, const std::string &name) { | |||||
| GE_CHECK_NOTNULL(out_anchor); | |||||
| NodePtr in_node = out_anchor->GetOwnerNode(); | |||||
| GE_CHECK_NOTNULL(in_node); | |||||
| OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC); | |||||
| OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0)) | |||||
| .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) | |||||
| .Build(); | |||||
| (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false); | |||||
| if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) { | |||||
| for (auto &node : graph->GetDirectNode()) { | |||||
| // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| if (op_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto op_type = op_desc->GetType(); | |||||
| if (op_type == NETOUTPUT) { | |||||
| for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { | |||||
| const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||||
| NodePtr in_node = peer_out_anchor->GetOwnerNode(); | |||||
| GE_CHECK_NOTNULL(in_node); | |||||
| std::string in_node_op_type = in_node->GetType(); | |||||
| if (in_node_op_type == CONSTANT) { | |||||
| GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); | |||||
| std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; | |||||
| if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) { | |||||
| GELOGE(FAILED, "Insert memcpy between %s and %s failed.", in_node->GetName().c_str(), node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | ||||
| std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | ||||
| GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | ||||
| @@ -332,6 +380,9 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||||
| if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { | if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed."); | |||||
| if (sub_graph->GetGraphUnknownFlag()) { | if (sub_graph->GetGraphUnknownFlag()) { | ||||
| // unknown shape build flow | // unknown shape build flow | ||||
| GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | ||||
| @@ -274,13 +274,16 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da | |||||
| /// @param [in] stream stream to execute model on | /// @param [in] stream stream to execute model on | ||||
| /// @param [in] async_mode is asynchronize mode. | /// @param [in] async_mode is asynchronize mode. | ||||
| /// @param [in] input_data model input data | /// @param [in] input_data model input data | ||||
| /// @param [in] input_desc description of model input data | |||||
| /// @param [out] output_data model output data | /// @param [out] output_data model output data | ||||
| /// @param [out] output_desc description of model output data | |||||
| /// | /// | ||||
| Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | ||||
| OutputData &output_data) { | |||||
| const std::vector<GeTensorDesc> &input_desc, OutputData &output_data, | |||||
| std::vector<GeTensorDesc> &output_desc) { | |||||
| auto model_manager = ModelManager::GetInstance(); | auto model_manager = ModelManager::GetInstance(); | ||||
| GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
| Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, output_data); | |||||
| Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Execute model failed, model_id:%u.", model_id); | GELOGE(ret, "Execute model failed, model_id:%u.", model_id); | ||||
| return ret; | return ret; | ||||
| @@ -65,7 +65,8 @@ class GraphLoader { | |||||
| const std::vector<uint32_t> &output_queue_ids); | const std::vector<uint32_t> &output_queue_ids); | ||||
| static Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | static Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | ||||
| OutputData &output_data); | |||||
| const std::vector<GeTensorDesc> &input_desc, OutputData &output_data, | |||||
| std::vector<GeTensorDesc> &output_desc); | |||||
| static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id); | static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id); | ||||
| @@ -919,11 +919,11 @@ Status DataDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> exceptio | |||||
| ReplaceStringElem(op_name); | ReplaceStringElem(op_name); | ||||
| ReplaceStringElem(op_type); | ReplaceStringElem(op_type); | ||||
| string dump_file_path = | string dump_file_path = | ||||
| "./" + op_type + "." + op_name + "." + to_string(op_desc_info.task_id) + "." + to_string(now_time); | |||||
| "./" + op_type + "." + op_name + "." + std::to_string(op_desc_info.task_id) + "." + std::to_string(now_time); | |||||
| GELOGI("The exception dump file path is %s", dump_file_path.c_str()); | GELOGI("The exception dump file path is %s", dump_file_path.c_str()); | ||||
| uint64_t proto_size = dump_data.ByteSizeLong(); | uint64_t proto_size = dump_data.ByteSizeLong(); | ||||
| unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]); | |||||
| std::unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]); | |||||
| bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size); | bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size); | ||||
| if (!ret || proto_size == 0) { | if (!ret || proto_size == 0) { | ||||
| GELOGE(PARAM_INVALID, "Dump data proto serialize failed"); | GELOGE(PARAM_INVALID, "Dump data proto serialize failed"); | ||||
| @@ -117,7 +117,8 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener | |||||
| load_end_time_(0), | load_end_time_(0), | ||||
| time_info_(), | time_info_(), | ||||
| dataInputTid(0), | dataInputTid(0), | ||||
| is_model_has_inited_(false), | |||||
| is_weight_mem_has_inited_(false), | |||||
| is_feature_map_mem_has_inited_(false), | |||||
| model_id_(0), | model_id_(0), | ||||
| runtime_model_id_(0), | runtime_model_id_(0), | ||||
| version_(0), | version_(0), | ||||
| @@ -263,34 +264,65 @@ void DavinciModel::Shrink() { | |||||
| ge_model_.reset(); // delete object. | ge_model_.reset(); // delete object. | ||||
| } | } | ||||
| Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | |||||
| if (is_model_has_inited_) { | |||||
| GELOGE(FAILED, "call InitModelMem more than once ."); | |||||
| Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size) { | |||||
| if (is_weight_mem_has_inited_) { | |||||
| GELOGE(FAILED, "call InitWeightMem more than once."); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| is_model_has_inited_ = true; | |||||
| is_weight_mem_has_inited_ = true; | |||||
| std::size_t data_size = TotalMemSize(); | |||||
| std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size; | |||||
| const Buffer &weights = ge_model_->GetWeight(); | const Buffer &weights = ge_model_->GetWeight(); | ||||
| std::size_t weights_size = weights.GetSize(); | std::size_t weights_size = weights.GetSize(); | ||||
| GE_CHECK_LE(weights_size, ALLOC_MEMORY_MAX_SIZE); | GE_CHECK_LE(weights_size, ALLOC_MEMORY_MAX_SIZE); | ||||
| if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { | |||||
| GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); | |||||
| if ((weight_ptr != nullptr) && (weight_size < weights_size)) { | |||||
| GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| if ((weight_ptr != nullptr) && (weight_size < weights_size)) { | |||||
| GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size); | |||||
| weights_mem_base_ = static_cast<uint8_t *>(dev_ptr); | |||||
| is_inner_weight_base_ = false; | |||||
| if (weights_size != 0) { | |||||
| weights_mem_base_ = static_cast<uint8_t *>(weight_ptr); | |||||
| is_inner_weight_base_ = false; | |||||
| if (weight_ptr == nullptr) { | |||||
| weights_mem_base_ = MallocWeightsMem(weights_size); | |||||
| if (weights_mem_base_ == nullptr) { | |||||
| GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); | |||||
| return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; | |||||
| } | |||||
| is_inner_weight_base_ = true; | |||||
| } | |||||
| GELOGI("[IMAS]InitWeightMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||||
| weights_mem_base_, weights_size); | |||||
| GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| GELOGI("copy weights data to device"); | |||||
| } | |||||
| runtime_param_.weight_base = weights_mem_base_; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||||
| if (is_feature_map_mem_has_inited_) { | |||||
| GELOGE(FAILED, "call InitFeatureMapMem more than once ."); | |||||
| return FAILED; | |||||
| } | |||||
| is_feature_map_mem_has_inited_ = true; | |||||
| std::size_t data_size = TotalMemSize(); | |||||
| std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size; | |||||
| if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { | |||||
| GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| mem_base_ = static_cast<uint8_t *>(dev_ptr); | mem_base_ = static_cast<uint8_t *>(dev_ptr); | ||||
| p2p_mem_base_ = static_cast<uint8_t *>(dev_ptr); | p2p_mem_base_ = static_cast<uint8_t *>(dev_ptr); | ||||
| weights_mem_base_ = static_cast<uint8_t *>(dev_ptr); | |||||
| is_inner_mem_base_ = false; | is_inner_mem_base_ = false; | ||||
| is_inner_weight_base_ = false; | |||||
| if (TotalMemSize() && mem_base_ == nullptr) { | if (TotalMemSize() && mem_base_ == nullptr) { | ||||
| mem_base_ = MallocFeatureMapMem(data_size); | mem_base_ = MallocFeatureMapMem(data_size); | ||||
| @@ -298,12 +330,14 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p | |||||
| GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); | GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); | ||||
| return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; | return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; | ||||
| } | } | ||||
| GEEVENT("[IMAS]InitModelMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||||
| GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||||
| mem_base_, data_size); | mem_base_, data_size); | ||||
| weights_mem_base_ = mem_base_; | |||||
| if (!is_inner_weight_base_) { | |||||
| weights_mem_base_ = mem_base_; | |||||
| is_inner_weight_base_ = true; | |||||
| } | |||||
| is_inner_mem_base_ = true; | is_inner_mem_base_ = true; | ||||
| is_inner_weight_base_ = true; | |||||
| } | } | ||||
| if (p2p_data_size != 0) { | if (p2p_data_size != 0) { | ||||
| @@ -312,27 +346,11 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p | |||||
| GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size); | GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size); | ||||
| return GE_EXEC_ALLOC_P2P_MEM_FAILED; | return GE_EXEC_ALLOC_P2P_MEM_FAILED; | ||||
| } | } | ||||
| GELOGI("InitModelMem graph_%u MallocMemory type[P] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||||
| GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||||
| p2p_mem_base_, p2p_data_size); | p2p_mem_base_, p2p_data_size); | ||||
| is_inner_p2p_mem_base_ = true; | is_inner_p2p_mem_base_ = true; | ||||
| } | } | ||||
| if (weights_size != 0) { | |||||
| weights_mem_base_ = static_cast<uint8_t *>(weight_ptr); | |||||
| is_inner_weight_base_ = false; | |||||
| if (weight_ptr == nullptr) { | |||||
| weights_mem_base_ = MallocWeightsMem(weights_size); | |||||
| if (weights_mem_base_ == nullptr) { | |||||
| GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); | |||||
| return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; | |||||
| } | |||||
| is_inner_weight_base_ = true; | |||||
| } | |||||
| GELOGI("[IMAS]InitModelMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||||
| weights_mem_base_, weights_size); | |||||
| GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||||
| } | |||||
| GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); | GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); | ||||
| runtime_param_.mem_base = mem_base_; | runtime_param_.mem_base = mem_base_; | ||||
| runtime_param_.weight_base = weights_mem_base_; | runtime_param_.weight_base = weights_mem_base_; | ||||
| @@ -642,8 +660,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
| GE_TIMESTAMP_START(InitModelMem); | GE_TIMESTAMP_START(InitModelMem); | ||||
| GELOGD("Known node is %d", known_node_); | GELOGD("Known node is %d", known_node_); | ||||
| GE_CHK_STATUS_RET_NOLOG(InitWeightMem(dev_ptr, weight_ptr, weight_size)); | |||||
| if (!known_node_) { | if (!known_node_) { | ||||
| GE_CHK_STATUS_RET_NOLOG(InitModelMem(dev_ptr, mem_size, weight_ptr, weight_size)); | |||||
| GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size)); | |||||
| data_inputer_ = new (std::nothrow) DataInputer(); | data_inputer_ = new (std::nothrow) DataInputer(); | ||||
| GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr."); | GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr."); | ||||
| } | } | ||||
| @@ -1140,6 +1159,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||||
| GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS, | GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS, | ||||
| GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;); | GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;); | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -2780,7 +2800,7 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
| reinterpret_cast<int64_t *>(shape_data_buffer_data) + | reinterpret_cast<int64_t *>(shape_data_buffer_data) + | ||||
| shape_data_buffer_length / sizeof(int64_t)); | shape_data_buffer_length / sizeof(int64_t)); | ||||
| GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); | GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); | ||||
| delete[] (int64_t *)current_data.blobs.back().data; | |||||
| delete[] reinterpret_cast<int64_t *>(current_data.blobs.back().data); | |||||
| current_data.blobs.pop_back(); | current_data.blobs.pop_back(); | ||||
| } | } | ||||
| GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); | GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); | ||||
| @@ -584,7 +584,8 @@ class DavinciModel { | |||||
| Status SyncVarData(); | Status SyncVarData(); | ||||
| Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize); | |||||
| Status InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size); | |||||
| Status InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size); | |||||
| void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); | void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); | ||||
| @@ -850,7 +851,9 @@ class DavinciModel { | |||||
| Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node); | Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node); | ||||
| Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc); | Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc); | ||||
| bool is_model_has_inited_; | |||||
| bool is_weight_mem_has_inited_; | |||||
| bool is_feature_map_mem_has_inited_; | |||||
| uint32_t model_id_; | uint32_t model_id_; | ||||
| uint32_t runtime_model_id_; | uint32_t runtime_model_id_; | ||||
| string name_; | string name_; | ||||
| @@ -31,6 +31,7 @@ | |||||
| #include "model/ge_root_model.h" | #include "model/ge_root_model.h" | ||||
| #include "graph/common/local_context.h" | #include "graph/common/local_context.h" | ||||
| #include "common/formats/utils/formats_trans_utils.h" | #include "common/formats/utils/formats_trans_utils.h" | ||||
| #include "hybrid/hybrid_davinci_model.h" | |||||
| namespace ge { | namespace ge { | ||||
| thread_local uint32_t device_count = 0; | thread_local uint32_t device_count = 0; | ||||
| @@ -204,6 +205,13 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { | |||||
| ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | ||||
| std::lock_guard<std::mutex> lock(map_mutex_); | std::lock_guard<std::mutex> lock(map_mutex_); | ||||
| auto hybrid_davinci_model = hybrid_model_map_.find(model_id); | |||||
| if (hybrid_davinci_model != hybrid_model_map_.end()) { | |||||
| uint64_t session_id = hybrid_davinci_model->second->GetSessionId(); | |||||
| DestroyAicpuSession(session_id); | |||||
| return SUCCESS; | |||||
| } | |||||
| auto it = model_map_.find(model_id); | auto it = model_map_.find(model_id); | ||||
| if (it == model_map_.end()) { | if (it == model_map_.end()) { | ||||
| GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); | GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); | ||||
| @@ -216,7 +224,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | |||||
| ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) { | ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) { | ||||
| GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id); | GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id); | ||||
| std::lock_guard<std::mutex> lock(sess_ids_mutex_); | |||||
| std::lock_guard<std::mutex> lock(map_mutex_); | |||||
| std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); | std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); | ||||
| if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { | if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { | ||||
| Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id); | Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id); | ||||
| @@ -229,7 +237,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_ | |||||
| } | } | ||||
| ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) { | ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) { | ||||
| std::lock_guard<std::mutex> lock(sess_ids_mutex_); | |||||
| std::lock_guard<std::mutex> lock(map_mutex_); | |||||
| std::vector<uint64_t> v_aicpu_kernel; | std::vector<uint64_t> v_aicpu_kernel; | ||||
| std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); | std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); | ||||
| if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { | if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { | ||||
| @@ -925,6 +933,12 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu | |||||
| vector<InputOutputDescInfo> &output_desc, | vector<InputOutputDescInfo> &output_desc, | ||||
| std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats, | std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats, | ||||
| bool new_model_desc) { | bool new_model_desc) { | ||||
| std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id); | |||||
| if (hybrid_davinci_model != nullptr) { | |||||
| hybrid_davinci_model->SetModelDescVersion(new_model_desc); | |||||
| return hybrid_davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats); | |||||
| } | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, | GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, | ||||
| "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); | "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); | ||||
| @@ -943,6 +957,11 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu | |||||
| /// | /// | ||||
| Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | ||||
| int32_t &dynamic_type) { | int32_t &dynamic_type) { | ||||
| std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id); | |||||
| if (hybrid_davinci_model != nullptr) { | |||||
| return hybrid_davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type); | |||||
| } | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | ||||
| "GetDynamicBatchInfo failed, Invalid model id %u!", model_id); | "GetDynamicBatchInfo failed, Invalid model id %u!", model_id); | ||||
| @@ -975,6 +994,12 @@ Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector<vect | |||||
| /// | /// | ||||
| Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id, | Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id, | ||||
| std::vector<std::string> &user_input_shape_order) { | std::vector<std::string> &user_input_shape_order) { | ||||
| auto hybrid_davinci_model = GetHybridModel(model_id); | |||||
| if (hybrid_davinci_model != nullptr) { | |||||
| hybrid_davinci_model->GetUserDesignateShapeOrder(user_input_shape_order); | |||||
| return SUCCESS; | |||||
| } | |||||
| auto davinci_model = GetModel(model_id); | auto davinci_model = GetModel(model_id); | ||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | ||||
| "GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id) | "GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id) | ||||
| @@ -990,6 +1015,12 @@ Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> & | |||||
| } | } | ||||
| Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) { | Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) { | ||||
| std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id); | |||||
| if (hybrid_davinci_model != nullptr) { | |||||
| hybrid_davinci_model->GetModelAttr(dynamic_output_shape_info); | |||||
| return SUCCESS; | |||||
| } | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
| GE_CHECK_NOTNULL(davinci_model); | GE_CHECK_NOTNULL(davinci_model); | ||||
| davinci_model->GetModelAttr(dynamic_output_shape_info); | davinci_model->GetModelAttr(dynamic_output_shape_info); | ||||
| @@ -1201,10 +1232,25 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d | |||||
| /// @param [in] stream model stream | /// @param [in] stream model stream | ||||
| /// @param [in] async_mode is asynchronize mode. | /// @param [in] async_mode is asynchronize mode. | ||||
| /// @param [in] input_data input data | /// @param [in] input_data input data | ||||
| /// @param [in] input_desc description of input data | |||||
| /// @param [out] output_data output data | /// @param [out] output_data output data | ||||
| /// @param [out] output_desc description of output data | |||||
| /// | /// | ||||
| Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | ||||
| OutputData &output_data) { | |||||
| const std::vector<GeTensorDesc> &input_desc, OutputData &output_data, | |||||
| std::vector<GeTensorDesc> &output_desc) { | |||||
| std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id); | |||||
| if (hybrid_davinci_model != nullptr) { | |||||
| auto inputs = input_data.blobs; | |||||
| auto outputs = output_data.blobs; | |||||
| Status status = hybrid_davinci_model->Execute(inputs, input_desc, outputs, output_desc, stream); | |||||
| if (status == SUCCESS) { | |||||
| GELOGI("Execute model %u success.", model_id); | |||||
| } | |||||
| return status; | |||||
| } | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id); | GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id); | ||||
| @@ -1243,8 +1289,8 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name) { | |||||
| GELOGI("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str()); | |||||
| Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded) { | |||||
| GELOGD("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str()); | |||||
| std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); | std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); | ||||
| CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); | CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); | ||||
| if (aicpu_kernel == nullptr) { | if (aicpu_kernel == nullptr) { | ||||
| @@ -1267,18 +1313,24 @@ Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_ | |||||
| std::map<string, CustAICPUKernelPtr> new_so_name; | std::map<string, CustAICPUKernelPtr> new_so_name; | ||||
| new_so_name.insert({so_name, aicpu_kernel}); | new_so_name.insert({so_name, aicpu_kernel}); | ||||
| cust_aicpu_so_[resource_id] = new_so_name; | cust_aicpu_so_[resource_id] = new_so_name; | ||||
| GELOGI("LoadCustAicpuSo new aicpu so resource id %lu", resource_id); | |||||
| loaded = false; | |||||
| GELOGD("LoadCustAicpuSo new aicpu so name %s, resource id %lu", so_name.c_str(), resource_id); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| auto it_so_name = it->second.find(so_name); | auto it_so_name = it->second.find(so_name); | ||||
| if (it_so_name == it->second.end()) { | if (it_so_name == it->second.end()) { | ||||
| it->second.insert({so_name, aicpu_kernel}); | it->second.insert({so_name, aicpu_kernel}); | ||||
| GELOGI("LoadCustAicpuSo add aicpu so resource id %lu", resource_id); | |||||
| loaded = false; | |||||
| GELOGD("LoadCustAicpuSo add aicpu so name %s, resource id %lu", so_name.c_str(), resource_id); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| loaded = true; | |||||
| GELOGD("LoadCustAicpuSo so name %s has been loaded.", so_name.c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { | Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { | ||||
| GELOGD("Aicpu kernel launch task in, kernel name %s.", kernel_name.c_str()); | |||||
| std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); | std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); | ||||
| if (cust_aicpu_so_.size() == 0) return SUCCESS; | if (cust_aicpu_so_.size() == 0) return SUCCESS; | ||||
| // get current context | // get current context | ||||
| @@ -148,10 +148,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| /// @param [in] stream model stream | /// @param [in] stream model stream | ||||
| /// @param [in] async_mode is asynchronize mode. | /// @param [in] async_mode is asynchronize mode. | ||||
| /// @param [in] input_data model input data | /// @param [in] input_data model input data | ||||
| /// @param [in] input_desc description of model input data | |||||
| /// @param [out] output_data model output data | /// @param [out] output_data model output data | ||||
| /// @param [out] output_desc description of model output data | |||||
| /// | /// | ||||
| ge::Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | ge::Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | ||||
| OutputData &output_data); | |||||
| const std::vector<GeTensorDesc> &input_desc, OutputData &output_data, | |||||
| std::vector<GeTensorDesc> &output_desc); | |||||
| ge::Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs); | ge::Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs); | ||||
| @@ -286,7 +289,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); | ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); | ||||
| ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name); | |||||
| ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded); | |||||
| ge::Status LaunchCustAicpuSo(); | ge::Status LaunchCustAicpuSo(); | ||||
| @@ -875,7 +875,9 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
| } | } | ||||
| if (kernel_type_ == ccKernelType::CUST_AI_CPU) { | if (kernel_type_ == ccKernelType::CUST_AI_CPU) { | ||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed"); | |||||
| bool loaded = false; | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_, loaded), | |||||
| "launch cust aicpu so failed"); | |||||
| } | } | ||||
| // copy args to new host memory | // copy args to new host memory | ||||
| @@ -41,7 +41,7 @@ class StreamSwitchTaskInfo : public TaskInfo { | |||||
| Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | ||||
| private: | private: | ||||
| void SetInputAndValuePtr(DavinciModel *davinci_model, const vector<void *> &input_data_addrs); | |||||
| void SetInputAndValuePtr(DavinciModel *davinci_model, const std::vector<void *> &input_data_addrs); | |||||
| void *input_ptr_; | void *input_ptr_; | ||||
| rtCondition_t cond_; | rtCondition_t cond_; | ||||
| void *value_ptr_; | void *value_ptr_; | ||||
| @@ -49,7 +49,7 @@ class StreamSwitchTaskInfo : public TaskInfo { | |||||
| uint32_t true_stream_id_; | uint32_t true_stream_id_; | ||||
| rtSwitchDataType_t data_type_; | rtSwitchDataType_t data_type_; | ||||
| static const uint32_t kInputNum = 2; | static const uint32_t kInputNum = 2; | ||||
| vector<int64_t> fixed_addr_offset_; | |||||
| std::vector<int64_t> fixed_addr_offset_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ | #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ | ||||
| @@ -25,10 +25,11 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { | |||||
| const void *args[] = {this->GetNavTablePtr(), | const void *args[] = {this->GetNavTablePtr(), | ||||
| reinterpret_cast<const void *>(static_cast<uintptr_t>(this->GetNavTableSize()))}; | reinterpret_cast<const void *>(static_cast<uintptr_t>(this->GetNavTableSize()))}; | ||||
| rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return | |||||
| RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
| rt_ret = rtMemcpy((void *)device_args_addr_, sizeof(args), (void *)args, sizeof(args), RT_MEMCPY_HOST_TO_DEVICE); | |||||
| rtError_t rt_ret = rtMalloc(reinterpret_cast<void **>(&device_args_addr_), sizeof(args), RT_MEMORY_HBM); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
| rt_ret = rtMemcpy(reinterpret_cast<void *>(device_args_addr_), sizeof(args), (void *)args, sizeof(args), | |||||
| RT_MEMCPY_HOST_TO_DEVICE); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret); | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret);) | return RT_ERROR_TO_GE_STATUS(rt_ret);) | ||||
| rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream, | rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream, | ||||
| @@ -87,7 +87,7 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list | |||||
| } | } | ||||
| GELOGI("SKT: superkernel start fuse, superkernel size %zu.", stub_func_list.size()); | GELOGI("SKT: superkernel start fuse, superkernel size %zu.", stub_func_list.size()); | ||||
| const size_t nav_table_len = 2 * stub_func_list.size(); | const size_t nav_table_len = 2 * stub_func_list.size(); | ||||
| std::unique_ptr<uint64_t[]> nav_table(new(std::nothrow) uint64_t[nav_table_len]); | |||||
| std::unique_ptr<uint64_t[]> nav_table(new (std::nothrow) uint64_t[nav_table_len]); | |||||
| GE_CHECK_NOTNULL(nav_table); | GE_CHECK_NOTNULL(nav_table); | ||||
| uint64_t nav_table_size = 2 * stub_func_list.size() * sizeof(int64_t); | uint64_t nav_table_size = 2 * stub_func_list.size() * sizeof(int64_t); | ||||
| @@ -106,16 +106,16 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list | |||||
| nav_table[i * 2 + 1] = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_list[i])); | nav_table[i * 2 + 1] = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_list[i])); | ||||
| GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]); | GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]); | ||||
| } | } | ||||
| rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM); | |||||
| rt_ret = rtMalloc(reinterpret_cast<void **>(&hbm_nav_table_addr), nav_table_size, RT_MEMORY_HBM); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret); | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret);) | return RT_ERROR_TO_GE_STATUS(rt_ret);) | ||||
| rt_ret = | |||||
| rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table.get(), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); | |||||
| rt_ret = rtMemcpy(reinterpret_cast<void *>(hbm_nav_table_addr), nav_table_size, | |||||
| reinterpret_cast<void *>(nav_table.get()), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret); | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret); | ||||
| GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) | GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) | ||||
| // Create the necessary metadata for the super kernel | // Create the necessary metadata for the super kernel | ||||
| h = std::unique_ptr<skt::SuperKernel>( | |||||
| new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim)); | |||||
| h = | |||||
| std::unique_ptr<skt::SuperKernel>(new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim)); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namespace skt | } // namespace skt | ||||
| @@ -63,8 +63,8 @@ struct RuntimeParam { | |||||
| }; | }; | ||||
| typedef struct FusionOpInfo { | typedef struct FusionOpInfo { | ||||
| vector<string> original_op_names; | |||||
| string op_name; | |||||
| std::vector<std::string> original_op_names; | |||||
| std::string op_name; | |||||
| uint32_t op_index; | uint32_t op_index; | ||||
| uint32_t stream_id; | uint32_t stream_id; | ||||
| } FusionOpInfo; | } FusionOpInfo; | ||||
| @@ -131,7 +131,7 @@ Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const ma | |||||
| auto dst_addr = static_cast<uint8_t *>(buffer_addr); | auto dst_addr = static_cast<uint8_t *>(buffer_addr); | ||||
| GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p", | GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p", | ||||
| name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr); | name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr); | ||||
| *(uintptr_t *)(args_info + offset) = reinterpret_cast<uintptr_t>(dst_addr); | |||||
| *reinterpret_cast<uintptr_t *>(args_info + offset)= reinterpret_cast<uintptr_t>(dst_addr); | |||||
| is_updated_ = true; | is_updated_ = true; | ||||
| } | } | ||||
| } | } | ||||
| @@ -26,6 +26,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/debug/log.h" | |||||
| #include "framework/common/types.h" | #include "framework/common/types.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| @@ -72,7 +73,7 @@ Status DynamicShapePartitioner::Partition() { | |||||
| } | } | ||||
| REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, true), | REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, true), | ||||
| "Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str()); | "Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str()); | ||||
| REQUIRE_SUCCESS(CtrlEdgeTransfer(), "Failed do ctrl edge transfer!"); | |||||
| DumpGraph("_Before_DSP"); | DumpGraph("_Before_DSP"); | ||||
| auto status = PartitionImpl(); | auto status = PartitionImpl(); | ||||
| GELOGD("%s.", DebugString().c_str()); | GELOGD("%s.", DebugString().c_str()); | ||||
| @@ -86,6 +87,50 @@ Status DynamicShapePartitioner::Partition() { | |||||
| return status; | return status; | ||||
| } | } | ||||
| Status DynamicShapePartitioner::CtrlEdgeTransfer() { | |||||
| GELOGD("Do ctrl edge transfer start!"); | |||||
| GE_CHECK_NOTNULL(root_graph_); | |||||
| bool is_dynamic_shape = false; | |||||
| (void)AttrUtils::GetBool(root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); | |||||
| if (!is_dynamic_shape) { | |||||
| return SUCCESS; | |||||
| } | |||||
| for (auto &subgraph : root_graph_->GetAllSubgraphs()) { | |||||
| for (ge::NodePtr &n : subgraph->GetDirectNode()) { | |||||
| auto op_desc = n->GetOpDesc(); | |||||
| if (op_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto op_type = op_desc->GetType(); | |||||
| if (op_type == CONSTANT || op_type == CONSTANTOP) { | |||||
| if (n->GetInAllNodes().empty()) { | |||||
| GELOGD("[CtrlEdgeTransferPass] node [%s] in nodes is empty", n->GetName().c_str()); | |||||
| continue; | |||||
| } | |||||
| GELOGD("start to tranfer ctrl edge for const node [%s]", n->GetName().c_str()); | |||||
| for (auto &in_control_node : n->GetInControlNodes()) { | |||||
| GE_CHECK_NOTNULL(in_control_node); | |||||
| GE_CHK_STATUS_RET(ge::GraphUtils::RemoveEdge(in_control_node->GetOutControlAnchor(), | |||||
| n->GetInControlAnchor()), "remove edge failed"); | |||||
| for (auto &out_node : n->GetOutNodes()) { | |||||
| if (out_node == nullptr) { | |||||
| continue; | |||||
| } | |||||
| GE_CHK_STATUS_RET(ge::GraphUtils::AddEdge(in_control_node->GetOutControlAnchor(), | |||||
| out_node->GetInControlAnchor()), "add edge failed."); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| GELOGD("Do ctrl edge transfer end!"); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DynamicShapePartitioner::PartitionImpl() { | Status DynamicShapePartitioner::PartitionImpl() { | ||||
| REQUIRE_SUCCESS(root_graph_->TopologicalSorting(), "Graph topological sort failed."); | REQUIRE_SUCCESS(root_graph_->TopologicalSorting(), "Graph topological sort failed."); | ||||
| REQUIRE_SUCCESS(InitClusters(), "Failed init cluster nodes."); | REQUIRE_SUCCESS(InitClusters(), "Failed init cluster nodes."); | ||||
| @@ -151,6 +151,7 @@ class DynamicShapePartitioner { | |||||
| Status IsUnknownShapeGraph(ge::ComputeGraphPtr graph, bool &is_unknow); | Status IsUnknownShapeGraph(ge::ComputeGraphPtr graph, bool &is_unknow); | ||||
| Status IsUnknownShapeNode(ge::NodePtr node, bool &is_unknow); | Status IsUnknownShapeNode(ge::NodePtr node, bool &is_unknow); | ||||
| bool IsUnknownShapeTensor(const ge::GeTensorDesc &tensor); | bool IsUnknownShapeTensor(const ge::GeTensorDesc &tensor); | ||||
| Status CtrlEdgeTransfer(); | |||||
| ge::ComputeGraphPtr root_graph_; // The original graph to partition | ge::ComputeGraphPtr root_graph_; // The original graph to partition | ||||
| std::unordered_map<NodePtr, std::shared_ptr<Cluster>> node_2_cluster_; // Record nodes and the cluster it belongs to | std::unordered_map<NodePtr, std::shared_ptr<Cluster>> node_2_cluster_; // Record nodes and the cluster it belongs to | ||||
| // topological sorted clusters, this field will change with the splitting. | // topological sorted clusters, this field will change with the splitting. | ||||
| @@ -37,10 +37,6 @@ | |||||
| #include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
| namespace ge { | namespace ge { | ||||
| namespace { | |||||
| const uint32_t kShapeDimSize = 1; | |||||
| const uint32_t DIM_SIZE_TWO = 2; | |||||
| } // namespace | |||||
| Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std::vector<int64_t> &data, | Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std::vector<int64_t> &data, | ||||
| std::vector<GeTensorPtr> &v_output, const bool scalar_output) { | std::vector<GeTensorPtr> &v_output, const bool scalar_output) { | ||||
| @@ -63,7 +63,7 @@ std::string TransOpBreadthFusionPass::GetNodeId(const int anchor_index, const No | |||||
| GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, GELOGE(FAILED, "node is null"); return ""); | GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, GELOGE(FAILED, "node is null"); return ""); | ||||
| if (node->GetType() == CAST) { | if (node->GetType() == CAST) { | ||||
| trans_data_type = true; | trans_data_type = true; | ||||
| } else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED) { | |||||
| } else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED || node->GetType() == EXPANDDIMS) { | |||||
| trans_format = true; | trans_format = true; | ||||
| trans_shape = true; | trans_shape = true; | ||||
| } else if (node->GetType() == TRANSDATA) { | } else if (node->GetType() == TRANSDATA) { | ||||
| @@ -8,7 +8,7 @@ set(SRC_LIST | |||||
| "engine/host_cpu_engine.cc" | "engine/host_cpu_engine.cc" | ||||
| "ops_kernel_store/host_cpu_ops_kernel_info.cc" | "ops_kernel_store/host_cpu_ops_kernel_info.cc" | ||||
| "ops_kernel_store/op/op_factory.cc" | "ops_kernel_store/op/op_factory.cc" | ||||
| "ops_kernel_store/op/host_op.cc" | |||||
| "ops_kernel_store/op/host_op.cc" | |||||
| ) | ) | ||||
| set(CPU_OPS_KERNEL_LIST | set(CPU_OPS_KERNEL_LIST | ||||
| @@ -98,7 +98,7 @@ target_link_libraries(atc_host_cpu_engine PRIVATE | |||||
| set_target_properties(atc_host_cpu_engine PROPERTIES | set_target_properties(atc_host_cpu_engine PROPERTIES | ||||
| OUTPUT_NAME host_cpu_engine | OUTPUT_NAME host_cpu_engine | ||||
| LIBRARY_OUTPUT_DIRECTORY atclib | |||||
| LIBRARY_OUTPUT_DIRECTORY atclib | |||||
| ) | ) | ||||
| ############ libhost_cpu_opskernel_builder.so ############ | ############ libhost_cpu_opskernel_builder.so ############ | ||||
| @@ -185,7 +185,7 @@ set_target_properties(atc_host_cpu_opskernel_builder PROPERTIES | |||||
| ) | ) | ||||
| ############ libhost_cpu_opskernel_builder.a ############ | ############ libhost_cpu_opskernel_builder.a ############ | ||||
| add_library(host_cpu_opskernel_builder_static SHARED ${CPU_OPS_KERNEL_LIST}) | |||||
| add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST}) | |||||
| target_compile_options(host_cpu_opskernel_builder_static PRIVATE | target_compile_options(host_cpu_opskernel_builder_static PRIVATE | ||||
| -Werror | -Werror | ||||
| @@ -112,8 +112,8 @@ void FloorDivKernel::ShapeCal(const std::vector<ge::ConstGeTensorPtr> &input, Ge | |||||
| template <typename T> | template <typename T> | ||||
| T FloorDivKernel::DivCal(const T &x_i, const T &y_i) { | T FloorDivKernel::DivCal(const T &x_i, const T &y_i) { | ||||
| if ((x_i < static_cast<T>(0)) != (y_i < static_cast<T>(0))) { | if ((x_i < static_cast<T>(0)) != (y_i < static_cast<T>(0))) { | ||||
| T abs_x_i = std::abs(x_i); | |||||
| T abs_y_i = std::abs(y_i); | |||||
| T abs_x_i = x_i < 0 ? -x_i : x_i; | |||||
| T abs_y_i = y_i < 0 ? -y_i : y_i; | |||||
| return static_cast<T>(static_cast<int32_t>(-(abs_x_i + abs_y_i - 1) / abs_y_i)); | return static_cast<T>(static_cast<int32_t>(-(abs_x_i + abs_y_i - 1) / abs_y_i)); | ||||
| } else { | } else { | ||||
| return static_cast<T>(static_cast<int32_t>(x_i / y_i)); | return static_cast<T>(static_cast<int32_t>(x_i / y_i)); | ||||
| @@ -40,10 +40,6 @@ class FloorDivKernel : public Kernel { | |||||
| template <typename T> | template <typename T> | ||||
| Status DataCal(const std::vector<ConstGeTensorPtr> &input, ge::GeTensorPtr output_ptr); | Status DataCal(const std::vector<ConstGeTensorPtr> &input, ge::GeTensorPtr output_ptr); | ||||
| Status ComputeByDataType(DataType data_type, const std::vector<ConstGeTensorPtr> &input, GeTensorPtr output_ptr); | Status ComputeByDataType(DataType data_type, const std::vector<ConstGeTensorPtr> &input, GeTensorPtr output_ptr); | ||||
| int64_t axis_dim_; | |||||
| int64_t head_dim_; | |||||
| int64_t end_dim_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -187,7 +187,7 @@ Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uin | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| uint tmp_value = aspect_ratios_size * min_sizes_size; | |||||
| uint32_t tmp_value = aspect_ratios_size * min_sizes_size; | |||||
| if (ge::CheckUint32AddOverflow(tmp_value, max_sizes_size) != SUCCESS) { | if (ge::CheckUint32AddOverflow(tmp_value, max_sizes_size) != SUCCESS) { | ||||
| GELOGW("Failed to get list param."); | GELOGW("Failed to get list param."); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| @@ -199,7 +199,7 @@ Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uin | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| num_priors = static_cast<int>(tmp_value); | num_priors = static_cast<int>(tmp_value); | ||||
| if (ge::CheckIntMulOverflow(layer_width, layer_height) != SUCCESS) { | if (ge::CheckIntMulOverflow(layer_width, layer_height) != SUCCESS) { | ||||
| GELOGW("Failed to get list param."); | GELOGW("Failed to get list param."); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| @@ -288,7 +288,7 @@ std::unique_ptr<float[]> SsdPriorboxKernel::BoundaryCalulate(int dim_size, int l | |||||
| } | } | ||||
| } | } | ||||
| return std::move(output_data); | |||||
| return output_data; | |||||
| } | } | ||||
| Status SsdPriorboxKernel::Compute(const NodePtr &node, std::vector<GeTensorPtr> &v_output) { | Status SsdPriorboxKernel::Compute(const NodePtr &node, std::vector<GeTensorPtr> &v_output) { | ||||
| @@ -77,7 +77,7 @@ do { \ | |||||
| RECORD_PROFILING_EVENT((context), HybridProfiler::EXECUTION, fmt, "Execution", name, ##__VA_ARGS__) | RECORD_PROFILING_EVENT((context), HybridProfiler::EXECUTION, fmt, "Execution", name, ##__VA_ARGS__) | ||||
| #define RECORD_CALLBACK_EVENT(context, name, fmt, ...) \ | #define RECORD_CALLBACK_EVENT(context, name, fmt, ...) \ | ||||
| RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACK, fmt, "Callback", name, ##__VA_ARGS__) | |||||
| RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACKS, fmt, "Callback", name, ##__VA_ARGS__) | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_HYBRID_EXECUTOR_HYBRID_EXECUTION_CONTEXT_H_ | #endif // GE_HYBRID_EXECUTOR_HYBRID_EXECUTION_CONTEXT_H_ | ||||
| @@ -353,6 +353,44 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs, | |||||
| const std::vector<GeTensorDesc> &input_desc, | |||||
| std::vector<DataBuffer> &outputs, | |||||
| std::vector<GeTensorDesc> &output_desc) { | |||||
| GELOGI("Start to execute model."); | |||||
| HybridModelExecutor::ExecuteArgs args; | |||||
| args.inputs.resize(inputs.size()); | |||||
| for (size_t i = 0; i < inputs.size(); ++i) { | |||||
| TensorValue tensor_value(inputs[i].data, inputs[i].length); | |||||
| args.inputs[i] = tensor_value; | |||||
| } | |||||
| GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model."); | |||||
| for (const auto &output_tensor_desc : args.output_desc) { | |||||
| output_desc.emplace_back(*output_tensor_desc); | |||||
| } | |||||
| for (size_t i = 0; i < args.outputs.size(); ++i) { | |||||
| int64_t output_real_size = 0; | |||||
| ge::graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(output_desc[i], output_real_size); | |||||
| if (graph_status != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Get tensor size in bytes failed."); | |||||
| return FAILED; | |||||
| } | |||||
| if (output_real_size > 0) { | |||||
| if (outputs[i].length < static_cast<uint64_t>(output_real_size)) { | |||||
| GELOGE(FAILED, "output idx[%zu], the memory size of output[%lu] given by user should be greater than or equal to the real size of output[%ld]", | |||||
| i, outputs[i].length, output_real_size); | |||||
| return FAILED; | |||||
| } | |||||
| GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, RT_MEMCPY_DEVICE_TO_DEVICE)); | |||||
| } | |||||
| outputs[i].length = output_real_size; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | ||||
| GELOGD("Start to execute model."); | GELOGD("Start to execute model."); | ||||
| // prepare inputs | // prepare inputs | ||||
| @@ -35,6 +35,11 @@ class HybridModelAsyncExecutor { | |||||
| Status Init(); | Status Init(); | ||||
| Status Execute(const std::vector<DataBuffer> &inputs, | |||||
| const std::vector<GeTensorDesc> &input_desc, | |||||
| std::vector<DataBuffer> &outputs, | |||||
| std::vector<GeTensorDesc> &output_desc); | |||||
| Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs); | Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs); | ||||
| Status Start(const std::shared_ptr<ModelListener> &listener); | Status Start(const std::shared_ptr<ModelListener> &listener); | ||||
| @@ -82,7 +82,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||||
| Status HybridModelExecutor::Cleanup() { | Status HybridModelExecutor::Cleanup() { | ||||
| GELOGD("Start to cleanup."); | GELOGD("Start to cleanup."); | ||||
| context_.callback_manager->Destroy(); | context_.callback_manager->Destroy(); | ||||
| RuntimeInferenceContext::DestroyContext(to_string(context_.session_id)); | |||||
| RuntimeInferenceContext::DestroyContext(std::to_string(context_.session_id)); | |||||
| GELOGD("Cleanup successfully."); | GELOGD("Cleanup successfully."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -33,7 +33,7 @@ class HybridProfiler { | |||||
| SHAPE_INFERENCE, | SHAPE_INFERENCE, | ||||
| COMPILE, | COMPILE, | ||||
| EXECUTION, | EXECUTION, | ||||
| CALLBACK | |||||
| CALLBACKS | |||||
| }; | }; | ||||
| struct Event { | struct Event { | ||||
| @@ -27,7 +27,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| class NodeTask; | class NodeTask; | ||||
| class GraphExecutionContext; | |||||
| struct GraphExecutionContext; | |||||
| class SubgraphContext; | class SubgraphContext; | ||||
| class ShapeFuture { | class ShapeFuture { | ||||
| @@ -38,6 +38,14 @@ class HybridDavinciModel::Impl { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status Execute(const std::vector<DataBuffer> &inputs, | |||||
| const std::vector<GeTensorDesc> &input_desc, | |||||
| std::vector<DataBuffer> &outputs, | |||||
| std::vector<GeTensorDesc> &output_desc, | |||||
| rtStream_t stream) { | |||||
| return executor_.Execute(inputs, input_desc, outputs, output_desc); | |||||
| } | |||||
| Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | ||||
| return executor_.Execute(inputs, outputs); | return executor_.Execute(inputs, outputs); | ||||
| } | } | ||||
| @@ -68,6 +76,33 @@ class HybridDavinciModel::Impl { | |||||
| executor_.SetDeviceId(device_id); | executor_.SetDeviceId(device_id); | ||||
| } | } | ||||
| uint64_t GetSessionId() { | |||||
| return model_.GetSessionId(); | |||||
| } | |||||
| Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) { | |||||
| return model_.GetDynamicBatchInfo(batch_info, dynamic_type); | |||||
| } | |||||
| void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) { | |||||
| model_.GetUserDesignateShapeOrder(user_input_shape_order); | |||||
| } | |||||
| void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) { | |||||
| model_.GetModelAttr(dynamic_output_shape_info); | |||||
| } | |||||
| Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | |||||
| vector<InputOutputDescInfo> &output_desc, | |||||
| std::vector<uint32_t> &input_formats, | |||||
| std::vector<uint32_t> &output_formats) { | |||||
| return model_.GetInputOutputDescInfo(input_desc, output_desc, input_formats, output_formats); | |||||
| } | |||||
| void SetModelDescVersion(bool is_new_model_desc) { | |||||
| model_.SetModelDescVersion(is_new_model_desc); | |||||
| } | |||||
| private: | private: | ||||
| std::shared_ptr<ModelListener> listener_; | std::shared_ptr<ModelListener> listener_; | ||||
| HybridModel model_; | HybridModel model_; | ||||
| @@ -78,8 +113,8 @@ HybridDavinciModel::~HybridDavinciModel() { | |||||
| delete impl_; | delete impl_; | ||||
| } | } | ||||
| unique_ptr<HybridDavinciModel> HybridDavinciModel::Create(const GeRootModelPtr &ge_root_model) { | |||||
| auto instance = unique_ptr<HybridDavinciModel>(new (std::nothrow)HybridDavinciModel()); | |||||
| std::unique_ptr<HybridDavinciModel> HybridDavinciModel::Create(const GeRootModelPtr &ge_root_model) { | |||||
| auto instance = std::unique_ptr<HybridDavinciModel>(new (std::nothrow)HybridDavinciModel()); | |||||
| if (instance != nullptr) { | if (instance != nullptr) { | ||||
| instance->impl_ = new (std::nothrow) HybridDavinciModel::Impl(ge_root_model); | instance->impl_ = new (std::nothrow) HybridDavinciModel::Impl(ge_root_model); | ||||
| if (instance->impl_ != nullptr) { | if (instance->impl_ != nullptr) { | ||||
| @@ -95,6 +130,14 @@ Status HybridDavinciModel::Init() { | |||||
| return impl_->Init(); | return impl_->Init(); | ||||
| } | } | ||||
| Status HybridDavinciModel::Execute(const std::vector<DataBuffer> &inputs, | |||||
| const std::vector<GeTensorDesc> &input_desc, | |||||
| std::vector<DataBuffer> &outputs, | |||||
| std::vector<GeTensorDesc> &output_desc, rtStream_t stream) { | |||||
| GE_CHECK_NOTNULL(impl_); | |||||
| return impl_->Execute(inputs, input_desc, outputs, output_desc, stream); | |||||
| } | |||||
| Status HybridDavinciModel::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | Status HybridDavinciModel::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | ||||
| GE_CHECK_NOTNULL(impl_); | GE_CHECK_NOTNULL(impl_); | ||||
| return impl_->Execute(inputs, outputs); | return impl_->Execute(inputs, outputs); | ||||
| @@ -132,5 +175,41 @@ void HybridDavinciModel::SetDeviceId(uint32_t device_id) { | |||||
| impl_->SetDeviceId(device_id); | impl_->SetDeviceId(device_id); | ||||
| } | } | ||||
| } | } | ||||
| Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) { | |||||
| GE_CHECK_NOTNULL(impl_); | |||||
| return impl_->GetDynamicBatchInfo(batch_info, dynamic_type); | |||||
| } | |||||
| void HybridDavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) { | |||||
| if (impl_ != nullptr) { | |||||
| impl_->GetUserDesignateShapeOrder(user_input_shape_order); | |||||
| } | |||||
| } | |||||
| void HybridDavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) { | |||||
| if (impl_ != nullptr) { | |||||
| impl_->GetModelAttr(dynamic_output_shape_info); | |||||
| } | |||||
| } | |||||
| Status HybridDavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | |||||
| vector<InputOutputDescInfo> &output_desc, | |||||
| std::vector<uint32_t> &input_formats, | |||||
| std::vector<uint32_t> &output_formats) { | |||||
| GE_CHECK_NOTNULL(impl_); | |||||
| return impl_->GetInputOutputDescInfo(input_desc, output_desc, input_formats, output_formats); | |||||
| } | |||||
| void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) { | |||||
| if (impl_ != nullptr) { | |||||
| impl_->SetModelDescVersion(is_new_model_desc); | |||||
| } | |||||
| } | |||||
| uint64_t HybridDavinciModel::GetSessionId() { | |||||
| GE_CHECK_NOTNULL(impl_); | |||||
| return impl_->GetSessionId(); | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -37,6 +37,12 @@ class HybridDavinciModel { | |||||
| Status Init(); | Status Init(); | ||||
| Status Execute(const std::vector<DataBuffer> &inputs, | |||||
| const std::vector<GeTensorDesc> &input_desc, | |||||
| std::vector<DataBuffer> &outputs, | |||||
| std::vector<GeTensorDesc> &output_desc, | |||||
| rtStream_t stream); | |||||
| Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs); | Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs); | ||||
| Status ModelRunStart(); | Status ModelRunStart(); | ||||
| @@ -51,6 +57,21 @@ class HybridDavinciModel { | |||||
| void SetDeviceId(uint32_t device_id); | void SetDeviceId(uint32_t device_id); | ||||
| uint64_t GetSessionId(); | |||||
| Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type); | |||||
| void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order); | |||||
| void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info); | |||||
| Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | |||||
| vector<InputOutputDescInfo> &output_desc, | |||||
| std::vector<uint32_t> &input_formats, | |||||
| std::vector<uint32_t> &output_formats); | |||||
| void SetModelDescVersion(bool is_new_model_desc); | |||||
| private: | private: | ||||
| HybridDavinciModel() = default; | HybridDavinciModel() = default; | ||||
| class Impl; | class Impl; | ||||
| @@ -28,6 +28,14 @@ Status HybridDavinciModel::Init() { | |||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| } | } | ||||
| Status HybridDavinciModel::Execute(const std::vector<DataBuffer> &inputs, | |||||
| const std::vector<GeTensorDesc> &input_desc, | |||||
| std::vector<DataBuffer> &outputs, | |||||
| std::vector<GeTensorDesc> &output_desc, | |||||
| rtStream_t stream) { | |||||
| return UNSUPPORTED; | |||||
| } | |||||
| Status HybridDavinciModel::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | Status HybridDavinciModel::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { | ||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| } | } | ||||
| @@ -52,5 +60,29 @@ void HybridDavinciModel::SetModelId(uint32_t model_id) { | |||||
| void HybridDavinciModel::SetDeviceId(uint32_t device_id) { | void HybridDavinciModel::SetDeviceId(uint32_t device_id) { | ||||
| } | } | ||||
| uint64_t HybridDavinciModel::GetSessionId() { | |||||
| return 0; | |||||
| } | |||||
| Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) { | |||||
| return UNSUPPORTED; | |||||
| } | |||||
| void HybridDavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) { | |||||
| } | |||||
| void HybridDavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) { | |||||
| } | |||||
| Status HybridDavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | |||||
| vector<InputOutputDescInfo> &output_desc, | |||||
| std::vector<uint32_t> &input_formats, | |||||
| std::vector<uint32_t> &output_formats) { | |||||
| return UNSUPPORTED; | |||||
| } | |||||
| void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) { | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -21,12 +21,18 @@ | |||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
| #include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
| #include "graph/utils/type_utils.h" | |||||
| #include "hybrid/common/npu_memory_allocator.h" | #include "hybrid/common/npu_memory_allocator.h" | ||||
| #include "hybrid/model/hybrid_model_builder.h" | #include "hybrid/model/hybrid_model_builder.h" | ||||
| #include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
| #include "common/op/ge_op_utils.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| namespace { | |||||
| const int64_t kMemSizeUnknownShape = -1; // Unknown shape mem size | |||||
| } | |||||
| HybridModel::HybridModel(GeRootModelPtr ge_model) : ge_root_model_(std::move(ge_model)) { | HybridModel::HybridModel(GeRootModelPtr ge_model) : ge_root_model_(std::move(ge_model)) { | ||||
| } | } | ||||
| @@ -128,7 +134,187 @@ const GraphItem *HybridModel::GetSubgraphItem(const ComputeGraphPtr &subgraph) c | |||||
| } | } | ||||
| const string &HybridModel::GetModelName() const { | const string &HybridModel::GetModelName() const { | ||||
| return model_name_; | |||||
| return model_name_; | |||||
| } | |||||
| Status HybridModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) { | |||||
| // dynamic shape do not need dynamic batch | |||||
| batch_info = {}; | |||||
| dynamic_type = -1; | |||||
| return SUCCESS; | |||||
| } | |||||
| void HybridModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) { | |||||
| // dynamic shape do not need dynamic batch | |||||
| user_input_shape_order = {}; | |||||
| } | |||||
| void HybridModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) { | |||||
| dynamic_output_shape_info = {}; | |||||
| } | |||||
| Status HybridModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | |||||
| vector<InputOutputDescInfo> &output_desc, | |||||
| std::vector<uint32_t> &input_formats, | |||||
| std::vector<uint32_t> &output_formats) { | |||||
| auto node_item_list = root_graph_item_->GetInputNodes(); | |||||
| if (node_item_list.empty()) { | |||||
| GELOGE(FAILED, "node item list is empty!"); | |||||
| return FAILED; | |||||
| } | |||||
| GE_CHECK_NOTNULL(node_item_list[0]->node); | |||||
| GE_CHECK_NOTNULL(node_item_list[0]->node->GetOpDesc()); | |||||
| if (node_item_list[0]->node->GetOpDesc()->GetInputsSize() != 1) { | |||||
| GELOGE(FAILED, "input size of op is not 1!"); | |||||
| return FAILED; | |||||
| } | |||||
| GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); | |||||
| GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed"); | |||||
| return SUCCESS; | |||||
| } | |||||
| void HybridModel::SetInputDimsAndShapeRangesInfo(const vector<int64_t> &model_input_dims, std::vector<std::pair<int64_t,int64_t>> &shape_ranges, | |||||
| InputOutputDescInfo &input) { | |||||
| for (auto model_input_dim : model_input_dims) { | |||||
| input.shape_info.dims.push_back(model_input_dim); | |||||
| } | |||||
| input.shape_info.shape_ranges = shape_ranges; | |||||
| return; | |||||
| } | |||||
| void HybridModel::CreateInputDimsInfo(const OpDescPtr &op_desc, InputOutputDescInfo &input) { | |||||
| std::vector<std::pair<int64_t,int64_t>> shape_ranges; | |||||
| if (is_new_model_desc_ && op_desc->HasAttr(ATTR_NAME_INPUT_DIMS)) { | |||||
| // When static aipp is set, need to get the model input dims which processed by aipp | |||||
| vector<int64_t> model_input_dims; | |||||
| (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_DIMS, model_input_dims); | |||||
| SetInputDimsAndShapeRangesInfo(model_input_dims, shape_ranges, input); | |||||
| return; | |||||
| } | |||||
| // judge if this data is linked dynamic aipp first, multiply batch has been considered | |||||
| if (op_desc->HasAttr("_dynamic_aipp_input_dims")) { | |||||
| vector<int64_t> dynamic_aipp_input_dims; | |||||
| (void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims); | |||||
| SetInputDimsAndShapeRangesInfo(dynamic_aipp_input_dims, shape_ranges, input); | |||||
| return; | |||||
| } else { | |||||
| vector<int64_t> input_dims = op_desc->GetInputDescPtr(0)->GetShape().GetDims(); | |||||
| op_desc->GetInputDescPtr(0)->GetShapeRange(shape_ranges); | |||||
| SetInputDimsAndShapeRangesInfo(input_dims, shape_ranges, input); | |||||
| return; | |||||
| } | |||||
| } | |||||
| Status HybridModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) { | |||||
| auto node_item_list = root_graph_item_->GetInputNodes(); | |||||
| for (auto &node_item : node_item_list) { | |||||
| InputOutputDescInfo input; | |||||
| GE_CHECK_NOTNULL(node_item->node); | |||||
| auto op_desc = node_item->node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); | |||||
| Format format = op_desc->GetInputDescPtr(0)->GetFormat(); | |||||
| input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); | |||||
| input.name = op_desc->GetName(); | |||||
| int64_t input_size = 0; | |||||
| GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); | |||||
| // support dynamic shape | |||||
| if (input_size < 0) { | |||||
| GELOGD("dynamic shape scene, input size is unknown. " | |||||
| "format=%d, data_type=%d, input_size=%ld", | |||||
| format, input.data_type, input_size); | |||||
| input_size = kMemSizeUnknownShape; // -1 | |||||
| } | |||||
| // not support dynamic shape input for now, so input_size here will be not less than zero. | |||||
| input.size = input_size; | |||||
| CreateInputDimsInfo(op_desc, input); | |||||
| formats.push_back(format); | |||||
| input_desc.push_back(input); | |||||
| } | |||||
| is_new_model_desc_ = false; | |||||
| return SUCCESS; | |||||
| } | |||||
| void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output_desc_info, uint32_t &format_result) { | |||||
| GE_IF_BOOL_EXEC(output_desc == nullptr, GELOGE(FAILED, "output desc ptr is nullptr"); return ); | |||||
| Format format = output_desc->GetFormat(); | |||||
| GeShape shape = output_desc->GetShape(); | |||||
| std::vector<std::pair<int64_t,int64_t>> shape_ranges; | |||||
| output_desc->GetShapeRange(shape_ranges); | |||||
| DataType data_type = output_desc->GetDataType(); | |||||
| format_result = format; | |||||
| if (format == FORMAT_FRACTAL_Z) { // FraczToHWCK | |||||
| int64_t k = shape.GetDim(0); // 0: first dim | |||||
| int64_t c = shape.GetDim(1); // 1: second dim | |||||
| int64_t h = shape.GetDim(2); // 2: third dim | |||||
| int64_t w = shape.GetDim(3); // 3: forth dim | |||||
| output_desc_info.shape_info.dims.push_back(h); | |||||
| output_desc_info.shape_info.dims.push_back(w); | |||||
| output_desc_info.shape_info.dims.push_back(c); | |||||
| output_desc_info.shape_info.dims.push_back(k); | |||||
| if (shape_ranges.size() == 4) { // 4 dims | |||||
| output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[2]); // h:2 | |||||
| output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[3]); // w:3 | |||||
| output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[1]); // c:1 | |||||
| output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[0]); // k:0 | |||||
| } | |||||
| format_result = FORMAT_HWCN; | |||||
| } else { | |||||
| for (size_t j = 0; j < shape.GetDimNum(); j++) { | |||||
| output_desc_info.shape_info.dims.push_back(shape.GetDim(j)); | |||||
| } | |||||
| output_desc_info.shape_info.shape_ranges = shape_ranges; | |||||
| } | |||||
| int64_t tensor_size = 0; | |||||
| (void)TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size); | |||||
| output_desc_info.size = static_cast<uint64_t>(tensor_size); | |||||
| output_desc_info.data_type = output_desc->GetDataType(); | |||||
| } | |||||
| Status HybridModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats) { | |||||
| std::vector<ConstGeTensorDescPtr> output_desc_list; | |||||
| GE_CHK_STATUS_RET(root_graph_item_->GetOutputDescList(output_desc_list), "get output desc info failed"); // output_desc_list contains vaild input desc | |||||
| vector<std::string> out_node_names; | |||||
| (void)ge::AttrUtils::GetListStr(ge_root_model_->GetRootGraph(), ATTR_MODEL_OUT_NODES_NAME, out_node_names); | |||||
| GE_CHECK_NOTNULL(root_graph_item_->GetOutputNode()); | |||||
| auto op_desc = root_graph_item_->GetOutputNode()->op_desc; | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| auto out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | |||||
| GE_CHK_BOOL_RET_STATUS(out_size == output_desc_list.size(), FAILED, "output size[%u] not match output_desc_list size[%zu]", out_size, output_desc_list.size()); | |||||
| for (uint32_t index = 0; index < out_size; ++index) { | |||||
| string output_name; | |||||
| std::vector<std::string> src_name = op_desc->GetSrcName(); | |||||
| std::vector<int64_t> src_index = op_desc->GetSrcIndex(); | |||||
| if (out_size == out_node_names.size()) { | |||||
| bool contains_colon = out_node_names[index].find(":") != std::string::npos; | |||||
| output_name = contains_colon ? out_node_names[index] : out_node_names[index] + ":" + std::to_string(src_index[index]); | |||||
| } else { | |||||
| output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]); | |||||
| } | |||||
| InputOutputDescInfo output_desc_info; | |||||
| output_desc_info.name = output_name; | |||||
| uint32_t format_result; | |||||
| CreateOutput(output_desc_list[index], output_desc_info, format_result); | |||||
| output_desc.push_back(output_desc_info); | |||||
| formats.push_back(format_result); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | } | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -83,6 +83,30 @@ class HybridModel { | |||||
| const string &GetModelName() const; | const string &GetModelName() const; | ||||
| Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type); | |||||
| void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order); | |||||
| void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info); | |||||
| Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | |||||
| vector<InputOutputDescInfo> &output_desc, | |||||
| std::vector<uint32_t> &input_formats, | |||||
| std::vector<uint32_t> &outputFormats); | |||||
| Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats); | |||||
| void CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output, uint32_t &format_result); | |||||
| Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats); | |||||
| void CreateInputDimsInfo(const OpDescPtr &op_desc, InputOutputDescInfo &input); | |||||
| void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } | |||||
| void SetInputDimsAndShapeRangesInfo(const vector<int64_t> &model_input_dims, std::vector<std::pair<int64_t, int64_t>> &shape_ranges, | |||||
| InputOutputDescInfo &input); | |||||
| private: | private: | ||||
| friend class HybridModelBuilder; | friend class HybridModelBuilder; | ||||
| friend class HybridModelAsyncExecutor; | friend class HybridModelAsyncExecutor; | ||||
| @@ -101,6 +125,8 @@ class HybridModel { | |||||
| std::map<std::string, std::unique_ptr<GraphItem>> subgraph_items_; | std::map<std::string, std::unique_ptr<GraphItem>> subgraph_items_; | ||||
| std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | ||||
| bool is_new_model_desc_ = false; // support aipp | |||||
| // runtime fields | // runtime fields | ||||
| uint32_t device_id_ = 0; | uint32_t device_id_ = 0; | ||||
| uint32_t model_id_ = 0; | uint32_t model_id_ = 0; | ||||
| @@ -27,16 +27,41 @@ | |||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "hybrid/common/npu_memory_allocator.h" | #include "hybrid/common/npu_memory_allocator.h" | ||||
| #include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "graph/utils/attr_utils.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| namespace { | namespace { | ||||
| const uint32_t kSubgraphIndex = 0U; | const uint32_t kSubgraphIndex = 0U; | ||||
| const uint32_t kVarOutputIndex = 0U; | const uint32_t kVarOutputIndex = 0U; | ||||
| const uint32_t kAlignment = 32; | |||||
| const int kBytes = 8; | const int kBytes = 8; | ||||
| const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; | const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; | ||||
| Status SetOutputNameAttr(ComputeGraph &graph) { | |||||
| vector<string> output_names; | |||||
| for (const auto &node : graph.GetDirectNode()) { | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| if (op_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto op_type = op_desc->GetType(); | |||||
| if (op_type == NETOUTPUT) { | |||||
| for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { | |||||
| const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||||
| NodePtr in_node = peer_out_anchor->GetOwnerNode(); | |||||
| GE_CHECK_NOTNULL(in_node); | |||||
| output_names.push_back(in_node->GetName()); | |||||
| } | |||||
| } | |||||
| } | |||||
| GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&graph, ATTR_MODEL_OUT_NODES_NAME, output_names), | |||||
| GELOGE(FAILED, "SetListStr of ATTR_MODEL_OUT_NODES_NAME failed."); | |||||
| return FAILED); | |||||
| return SUCCESS; | |||||
| } | |||||
| int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) { | int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) { | ||||
| int64_t var_size = 0; | int64_t var_size = 0; | ||||
| auto data_type = desc.GetDataType(); | auto data_type = desc.GetDataType(); | ||||
| @@ -939,6 +964,10 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr | |||||
| Status HybridModelBuilder::IndexTaskDefs() { | Status HybridModelBuilder::IndexTaskDefs() { | ||||
| const auto &root_graph = ge_root_model_->GetRootGraph(); | const auto &root_graph = ge_root_model_->GetRootGraph(); | ||||
| if (SetOutputNameAttr(*root_graph) != SUCCESS) { | |||||
| GELOGW("Set output name attr failed."); | |||||
| } | |||||
| for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) { | for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) { | ||||
| auto &name = it.first; | auto &name = it.first; | ||||
| auto &ge_model = it.second; | auto &ge_model = it.second; | ||||
| @@ -957,7 +986,7 @@ Status HybridModelBuilder::IndexTaskDefs() { | |||||
| // index task defs | // index task defs | ||||
| GELOGD("To index tasks for subgraph: %s", name.c_str()); | GELOGD("To index tasks for subgraph: %s", name.c_str()); | ||||
| unordered_map<int64_t, NodePtr> node_map; | |||||
| std::unordered_map<int64_t, NodePtr> node_map; | |||||
| for (const auto &node : sub_graph->GetDirectNode()) { | for (const auto &node : sub_graph->GetDirectNode()) { | ||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
| #include "hybrid/executor/hybrid_execution_context.h" | #include "hybrid/executor/hybrid_execution_context.h" | ||||
| #include "hybrid/node_executor/aicore/aicore_task_builder.h" | #include "hybrid/node_executor/aicore/aicore_task_builder.h" | ||||
| #include "graph/load/new_model_manager/tbe_handle_store.h" | |||||
| using optiling::OpRunInfo; | using optiling::OpRunInfo; | ||||
| @@ -36,6 +37,58 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||||
| auto op_desc_ptr = std::make_shared<OpDesc>(op_desc); | |||||
| GE_CHECK_NOTNULL(op_desc_ptr); | |||||
| auto tbe_kernel = op_desc_ptr->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | |||||
| if (tbe_kernel == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | |||||
| rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| void *bin_handle = nullptr; | |||||
| if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | |||||
| GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | |||||
| rtDevBinary_t binary; | |||||
| std::string json_string; | |||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), | |||||
| GELOGI("Get original type of session_graph_id.")); | |||||
| if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { | |||||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; | |||||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { | |||||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF; | |||||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { | |||||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; | |||||
| } else { | |||||
| GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| binary.version = 0; | |||||
| binary.data = tbe_kernel->GetBinData(); | |||||
| binary.length = tbe_kernel->GetBinDataSize(); | |||||
| GELOGI("TBE: binary.length: %lu", binary.length); | |||||
| GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); | |||||
| std::string meta_data; | |||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_METADATA, meta_data), | |||||
| GELOGI("Get original type of json_string")); | |||||
| GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); | |||||
| GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); | |||||
| kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); | |||||
| } else { | |||||
| GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | |||||
| kernel_store.ReferTBEHandle(stub_name_.c_str()); | |||||
| } | |||||
| std::string kernel_name; | |||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, op_desc_ptr->GetName() + "_kernelname", kernel_name), | |||||
| GELOGI("Get original type of kernel_name")); | |||||
| GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str()); | |||||
| GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0)); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { | Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { | ||||
| GE_CHK_STATUS_RET(ValidateTaskDef(task_def), | GE_CHK_STATUS_RET(ValidateTaskDef(task_def), | ||||
| "[%s] Failed to validate task def: [%s]", | "[%s] Failed to validate task def: [%s]", | ||||
| @@ -45,6 +98,9 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef | |||||
| const domi::KernelDef &kernel_def = task_def.kernel(); | const domi::KernelDef &kernel_def = task_def.kernel(); | ||||
| const domi::KernelContext &context = kernel_def.context(); | const domi::KernelContext &context = kernel_def.context(); | ||||
| stub_name_ = kernel_def.stub_func(); | stub_name_ = kernel_def.stub_func(); | ||||
| GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc)); | |||||
| GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); | GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); | ||||
| args_size_ = kernel_def.args_size(); | args_size_ = kernel_def.args_size(); | ||||
| block_dim_ = kernel_def.block_dim(); | block_dim_ = kernel_def.block_dim(); | ||||
| @@ -62,6 +62,7 @@ class AiCoreOpTask { | |||||
| static Status ValidateTaskDef(const domi::TaskDef &task_def); | static Status ValidateTaskDef(const domi::TaskDef &task_def); | ||||
| Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def); | Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def); | ||||
| Status InitTilingInfo(const OpDesc &op_desc); | Status InitTilingInfo(const OpDesc &op_desc); | ||||
| Status RegisterTbeHandle(const OpDesc &op_desc); | |||||
| std::string stub_name_; | std::string stub_name_; | ||||
| void *stub_func_ = nullptr; | void *stub_func_ = nullptr; | ||||
| @@ -26,7 +26,7 @@ namespace hybrid { | |||||
| class AiCoreTaskCompiler : public TaskCompiler { | class AiCoreTaskCompiler : public TaskCompiler { | ||||
| public: | public: | ||||
| AiCoreTaskCompiler() = default; | AiCoreTaskCompiler() = default; | ||||
| ~AiCoreTaskCompiler() = default; | |||||
| ~AiCoreTaskCompiler() override = default; | |||||
| Status CompileOp(const NodePtr &node, std::vector<domi::TaskDef> &tasks) override; | Status CompileOp(const NodePtr &node, std::vector<domi::TaskDef> &tasks) override; | ||||
| Status Initialize() override; | Status Initialize() override; | ||||
| @@ -644,8 +644,12 @@ Status AicpuNodeTask::Init(const HybridModel &model) { | |||||
| const auto &context = kernel_def.context(); | const auto &context = kernel_def.context(); | ||||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | ||||
| if (kernel_type == ccKernelType::CUST_AI_CPU) { | if (kernel_type == ccKernelType::CUST_AI_CPU) { | ||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name), "load cust aicpu so failed."); | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); | |||||
| bool loaded = false; | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name, loaded), | |||||
| "load cust aicpu so failed."); | |||||
| if (!loaded) { | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); | |||||
| } | |||||
| } | } | ||||
| GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED, | GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED, | ||||
| @@ -37,6 +37,8 @@ class AicpuNodeTaskBase : public NodeTask { | |||||
| ~AicpuNodeTaskBase() override = default; | ~AicpuNodeTaskBase() override = default; | ||||
| using NodeTask::Init; | |||||
| virtual Status Init(const HybridModel &model) = 0; | virtual Status Init(const HybridModel &model) = 0; | ||||
| Status UpdateArgs(TaskContext &context) override; | Status UpdateArgs(TaskContext &context) override; | ||||
| @@ -405,7 +405,7 @@ Status ControlOpNodeExecutor::LoadTask(const HybridModel &model, | |||||
| auto node_item = model.GetNodeItem(node); | auto node_item = model.GetNodeItem(node); | ||||
| GE_CHECK_NOTNULL(node_item); | GE_CHECK_NOTNULL(node_item); | ||||
| unique_ptr<ControlOpNodeTask> node_task; | |||||
| std::unique_ptr<ControlOpNodeTask> node_task; | |||||
| auto node_type = node->GetType(); | auto node_type = node->GetType(); | ||||
| if (node_type == IF || node_type == STATELESSIF) { | if (node_type == IF || node_type == STATELESSIF) { | ||||
| node_task.reset(new(std::nothrow) IfOpNodeTask()); | node_task.reset(new(std::nothrow) IfOpNodeTask()); | ||||
| @@ -25,6 +25,7 @@ namespace ge { | |||||
| namespace hybrid { | namespace hybrid { | ||||
| class ControlOpNodeTask : public NodeTask { | class ControlOpNodeTask : public NodeTask { | ||||
| public: | public: | ||||
| using NodeTask::Init; | |||||
| virtual Status Init(const NodePtr &node, const HybridModel &model) = 0; | virtual Status Init(const NodePtr &node, const HybridModel &model) = 0; | ||||
| Status UpdateArgs(TaskContext &context) override; | Status UpdateArgs(TaskContext &context) override; | ||||
| @@ -68,7 +68,7 @@ Status RefInputTask::RefOneByOne(TaskContext &context) { | |||||
| node_name_.c_str(), node_type_.c_str(), output_num, input_num); | node_name_.c_str(), node_type_.c_str(), output_num, input_num); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| for (uint32_t out_index = 0; out_index < output_num; ++out_index) { | |||||
| for (uint32_t out_index = 0; out_index < static_cast<uint32_t>(output_num); ++out_index) { | |||||
| auto input = context.GetInput(out_index); | auto input = context.GetInput(out_index); | ||||
| GE_CHECK_NOTNULL(input); | GE_CHECK_NOTNULL(input); | ||||
| GE_CHK_STATUS_RET(context.SetOutput(out_index, *input)); | GE_CHK_STATUS_RET(context.SetOutput(out_index, *input)); | ||||
| @@ -20,7 +20,6 @@ | |||||
| #include "hybrid/node_executor/host_cpu/kernel_factory.h" | #include "hybrid/node_executor/host_cpu/kernel_factory.h" | ||||
| namespace { | namespace { | ||||
| const size_t kAssignInputNum = 2; | |||||
| const size_t kAssignRefInputIndex = 0; | const size_t kAssignRefInputIndex = 0; | ||||
| const size_t kAssignValueInputIndex = 1; | const size_t kAssignValueInputIndex = 1; | ||||
| const size_t kAssignRefOutputIndex = 0; | const size_t kAssignRefOutputIndex = 0; | ||||
| @@ -34,7 +34,6 @@ const char *const kEngineNameAiCpuTf = "aicpu_tf_kernel"; | |||||
| const char *const kEngineNameHccl = "ops_kernel_info_hccl"; | const char *const kEngineNameHccl = "ops_kernel_info_hccl"; | ||||
| const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; | const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; | ||||
| const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; | const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; | ||||
| const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; | |||||
| } | } | ||||
| Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { | Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { | ||||
| GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); | GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); | ||||
| @@ -41,7 +41,6 @@ class PartitionedCallNodeTask : public NodeTask { | |||||
| const GraphItem *graph_item_; | const GraphItem *graph_item_; | ||||
| std::unique_ptr<SubgraphExecutor> subgraph_executor_; | std::unique_ptr<SubgraphExecutor> subgraph_executor_; | ||||
| GraphExecutionContext *context_ = nullptr; | |||||
| }; | }; | ||||
| class PartitionedCallNodeExecutor : public NodeExecutor { | class PartitionedCallNodeExecutor : public NodeExecutor { | ||||
| @@ -29,7 +29,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| class GraphExecutionContext; | |||||
| struct GraphExecutionContext; | |||||
| class SubgraphContext; | class SubgraphContext; | ||||
| class TaskContext { | class TaskContext { | ||||
| @@ -11,13 +11,13 @@ set(SRC_LIST | |||||
| "main.cc" | "main.cc" | ||||
| "single_op_parser.cc" | "single_op_parser.cc" | ||||
| "../session/omg.cc" | "../session/omg.cc" | ||||
| "../ir_build/atc_ir_common.cc" | |||||
| "../ir_build/atc_ir_common.cc" | |||||
| ) | ) | ||||
| ############ atc ############ | ############ atc ############ | ||||
| add_executable(atc ${SRC_LIST} ${PROTO_HDRS}) | add_executable(atc ${SRC_LIST} ${PROTO_HDRS}) | ||||
| target_compile_options(atc PRIVATE | |||||
| target_compile_options(atc PRIVATE | |||||
| -Werror | -Werror | ||||
| -O2 | -O2 | ||||
| -Wno-deprecated-declarations | -Wno-deprecated-declarations | ||||
| @@ -74,10 +74,130 @@ target_link_libraries(atc PRIVATE | |||||
| -ldl | -ldl | ||||
| ) | ) | ||||
| ############ atc.bin ############ | |||||
| add_executable(atc.bin ${SRC_LIST} ${PROTO_HDRS}) | |||||
| target_compile_options(atc.bin PRIVATE | |||||
| -Werror | |||||
| -O2 | |||||
| -Wno-deprecated-declarations | |||||
| ) | |||||
| target_compile_definitions(atc.bin PRIVATE | |||||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||||
| COMPILE_OMG_PACKAGE | |||||
| google=ascend_private | |||||
| ) | |||||
| target_include_directories(atc.bin PRIVATE | |||||
| ${CMAKE_CURRENT_LIST_DIR} | |||||
| ${GE_CODE_DIR} | |||||
| ${GE_CODE_DIR}/ge | |||||
| ${GE_CODE_DIR}/inc/external | |||||
| ${GE_CODE_DIR}/common/inc/external | |||||
| ${GE_CODE_DIR}/common/inc/external/graph | |||||
| ${GE_CODE_DIR}/inc | |||||
| ${GE_CODE_DIR}/inc/framework | |||||
| ${METADEF_DIR}/inc | |||||
| ${METADEF_DIR}/inc/graph | |||||
| ${METADEF_DIR}/inc/register | |||||
| ${METADEF_DIR}/inc/external | |||||
| ${METADEF_DIR}/inc/external/graph | |||||
| ${METADEF_DIR}/inc/external/register | |||||
| ${PARSER_DIR} | |||||
| ${CMAKE_BINARY_DIR} | |||||
| ${CMAKE_BINARY_DIR}/proto/ge | |||||
| #### yellow zone #### | |||||
| ${GE_CODE_DIR}/../inc | |||||
| ${GE_CODE_DIR}/../inc/common | |||||
| #### blue zone #### | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | |||||
| ) | |||||
| target_link_libraries(atc.bin PRIVATE | |||||
| $<BUILD_INTERFACE:intf_pub> | |||||
| ascend_protobuf | |||||
| ge_common | |||||
| register | |||||
| c_sec | |||||
| graph | |||||
| error_manager | |||||
| ge_compiler | |||||
| parser_common | |||||
| gflags | |||||
| json | |||||
| runtime_compile | |||||
| slog | |||||
| static_mmpa | |||||
| -lrt | |||||
| -ldl | |||||
| ) | |||||
| ############ fwk_atc.bin ############ | |||||
| add_executable(fwk_atc.bin ${SRC_LIST} ${PROTO_HDRS}) | |||||
| target_compile_options(fwk_atc.bin PRIVATE | |||||
| -Werror | |||||
| -O2 | |||||
| -Wno-deprecated-declarations | |||||
| ) | |||||
| target_compile_definitions(fwk_atc.bin PRIVATE | |||||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||||
| COMPILE_OMG_PACKAGE | |||||
| google=ascend_private | |||||
| ) | |||||
| target_include_directories(fwk_atc.bin PRIVATE | |||||
| ${CMAKE_CURRENT_LIST_DIR} | |||||
| ${GE_CODE_DIR} | |||||
| ${GE_CODE_DIR}/ge | |||||
| ${GE_CODE_DIR}/inc/external | |||||
| ${GE_CODE_DIR}/common/inc/external | |||||
| ${GE_CODE_DIR}/common/inc/external/graph | |||||
| ${GE_CODE_DIR}/inc | |||||
| ${GE_CODE_DIR}/inc/framework | |||||
| ${METADEF_DIR}/inc | |||||
| ${METADEF_DIR}/inc/graph | |||||
| ${METADEF_DIR}/inc/register | |||||
| ${METADEF_DIR}/inc/external | |||||
| ${METADEF_DIR}/inc/external/graph | |||||
| ${METADEF_DIR}/inc/external/register | |||||
| ${PARSER_DIR} | |||||
| ${CMAKE_BINARY_DIR} | |||||
| ${CMAKE_BINARY_DIR}/proto/ge | |||||
| #### yellow zone #### | |||||
| ${GE_CODE_DIR}/../inc | |||||
| ${GE_CODE_DIR}/../inc/common | |||||
| #### blue zone #### | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc | |||||
| ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | |||||
| ) | |||||
| target_link_libraries(fwk_atc.bin PRIVATE | |||||
| $<BUILD_INTERFACE:intf_pub> | |||||
| ascend_protobuf | |||||
| ge_common | |||||
| register | |||||
| c_sec | |||||
| graph | |||||
| error_manager | |||||
| ge_runner | |||||
| parser_common | |||||
| gflags | |||||
| json | |||||
| runtime | |||||
| slog | |||||
| static_mmpa | |||||
| -lrt | |||||
| -ldl | |||||
| ) | |||||
| ############ install ############ | ############ install ############ | ||||
| set(INSTALL_BASE_DIR "") | set(INSTALL_BASE_DIR "") | ||||
| set(INSTALL_LIBRARY_DIR lib) | set(INSTALL_LIBRARY_DIR lib) | ||||
| install(TARGETS atc OPTIONAL | |||||
| install(TARGETS atc atc.bin fwk_atc.bin OPTIONAL | |||||
| LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} | LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR} | ||||
| ) | ) | ||||
| @@ -0,0 +1,20 @@ | |||||
| #!/bin/bash | |||||
| #------------------------------------------------------------------- | |||||
| # Purpose: | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd. All rights reserved. | |||||
| #------------------------------------------------------------------- | |||||
| LOCAL_PATH=$(cd "$(dirname "$0")"; pwd) | |||||
| PKG_PATH=$(cd ${LOCAL_PATH}/..; pwd) | |||||
| LIB_P="/lib64" | |||||
| PYTHON_P="/python/site-packages" | |||||
| LIB64_PATH="${PKG_PATH}${LIB_P}" | |||||
| PYTHON_PATH="${PKG_PATH}${PYTHON_P}" | |||||
| export LD_LIBRARY_PATH="${LIB64_PATH}:${LD_LIBRARY_PATH}" | |||||
| export PYTHONPATH="${PYTHON_PATH}:${PYTHONPATH}" | |||||
| if [ -f "${PKG_PATH}/bin/atc.bin" ];then | |||||
| atc.bin $@ | |||||
| else | |||||
| fwk_atc.bin $@ | |||||
| fi | |||||
| @@ -54,3 +54,108 @@ LOCAL_LDFLAGS := -lrt -ldl | |||||
| include $(BUILD_HOST_EXECUTABLE) | include $(BUILD_HOST_EXECUTABLE) | ||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := atc.bin | |||||
| LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dgoogle=ascend_private | |||||
| LOCAL_SRC_FILES := \ | |||||
| main.cc \ | |||||
| single_op_parser.cc \ | |||||
| ../session/omg.cc \ | |||||
| ../ir_build/atc_ir_common.cc \ | |||||
| LOCAL_C_INCLUDES := \ | |||||
| $(LOCAL_PATH)/../ ./ \ | |||||
| $(TOPDIR)inc \ | |||||
| $(TOPDIR)metadef/inc \ | |||||
| $(TOPDIR)graphengine/inc \ | |||||
| $(TOPDIR)inc/external \ | |||||
| $(TOPDIR)metadef/inc/external \ | |||||
| $(TOPDIR)graphengine/inc/external \ | |||||
| $(TOPDIR)metadef/inc/external/graph \ | |||||
| $(TOPDIR)graphengine/inc/framework \ | |||||
| $(TOPDIR)libc_sec/include \ | |||||
| $(TOPDIR)metadef/inc/common/util \ | |||||
| $(TOPDIR)parser \ | |||||
| third_party/json/include \ | |||||
| third_party/gflags/include \ | |||||
| third_party/protobuf/include \ | |||||
| proto/om.proto \ | |||||
| proto/ge_ir.proto \ | |||||
| proto/task.proto \ | |||||
| proto/insert_op.proto \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libge_common \ | |||||
| libascend_protobuf \ | |||||
| libslog \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| liberror_manager \ | |||||
| libge_compiler \ | |||||
| libruntime_compile \ | |||||
| libparser_common \ | |||||
| liberror_manager \ | |||||
| LOCAL_STATIC_LIBRARIES := libgflags | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| include $(BUILD_HOST_EXECUTABLE) | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := fwk_atc.bin | |||||
| LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dgoogle=ascend_private | |||||
| LOCAL_SRC_FILES := \ | |||||
| main.cc \ | |||||
| single_op_parser.cc \ | |||||
| ../session/omg.cc \ | |||||
| ../ir_build/atc_ir_common.cc \ | |||||
| LOCAL_C_INCLUDES := \ | |||||
| $(LOCAL_PATH)/../ ./ \ | |||||
| $(TOPDIR)inc \ | |||||
| $(TOPDIR)metadef/inc \ | |||||
| $(TOPDIR)graphengine/inc \ | |||||
| $(TOPDIR)inc/external \ | |||||
| $(TOPDIR)metadef/inc/external \ | |||||
| $(TOPDIR)graphengine/inc/external \ | |||||
| $(TOPDIR)metadef/inc/external/graph \ | |||||
| $(TOPDIR)graphengine/inc/framework \ | |||||
| $(TOPDIR)libc_sec/include \ | |||||
| $(TOPDIR)metadef/inc/common/util \ | |||||
| $(TOPDIR)parser \ | |||||
| third_party/json/include \ | |||||
| third_party/gflags/include \ | |||||
| third_party/protobuf/include \ | |||||
| proto/om.proto \ | |||||
| proto/ge_ir.proto \ | |||||
| proto/task.proto \ | |||||
| proto/insert_op.proto \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libge_common \ | |||||
| libascend_protobuf \ | |||||
| libslog \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| liberror_manager \ | |||||
| libge_runner \ | |||||
| libruntime \ | |||||
| libparser_common \ | |||||
| liberror_manager \ | |||||
| LOCAL_STATIC_LIBRARIES := libgflags | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| include $(BUILD_HOST_EXECUTABLE) | |||||
| @@ -202,7 +202,7 @@ Status CsaInteract::WriteFile(const std::string &file_name, const std::string &c | |||||
| } | } | ||||
| } | } | ||||
| mmSsize_t ret = mmWrite(fd, (void *)content.c_str(), content.length()); | |||||
| mmSsize_t ret = mmWrite(fd, reinterpret_cast<void *>(const_cast<char *>(content.c_str())), content.length()); | |||||
| if (ret == EN_ERROR) { | if (ret == EN_ERROR) { | ||||
| GELOGE(INTERNAL_ERROR, "write file fail, errno is %d", errno); | GELOGE(INTERNAL_ERROR, "write file fail, errno is %d", errno); | ||||
| ret = mmClose(fd); | ret = mmClose(fd); | ||||
| @@ -33,6 +33,8 @@ const std::vector<std::string> kHcclBuilderLibs = { | |||||
| "libhvd_opskernel_builder.so", | "libhvd_opskernel_builder.so", | ||||
| "libhcom_gradtune_opskernel_builder.so" | "libhcom_gradtune_opskernel_builder.so" | ||||
| }; | }; | ||||
| const std::string kAicoreUtilsLib = "libaicore_utils_runtime.so"; | |||||
| } // namespace | } // namespace | ||||
| OpsKernelBuilderManager::~OpsKernelBuilderManager() { | OpsKernelBuilderManager::~OpsKernelBuilderManager() { | ||||
| // it's OK to call Finalize multiply times | // it's OK to call Finalize multiply times | ||||
| @@ -45,13 +47,11 @@ OpsKernelBuilderManager &OpsKernelBuilderManager::Instance() { | |||||
| } | } | ||||
| Status OpsKernelBuilderManager::Initialize(const map<std::string, std::string> &options, bool is_train) { | Status OpsKernelBuilderManager::Initialize(const map<std::string, std::string> &options, bool is_train) { | ||||
| if (is_train) { | |||||
| std::string lib_paths; | |||||
| GE_CHK_STATUS_RET_NOLOG(GetLibPaths(options, lib_paths)); | |||||
| plugin_manager_.reset(new (std::nothrow)PluginManager()); | |||||
| GE_CHECK_NOTNULL(plugin_manager_); | |||||
| GE_CHK_STATUS_RET(plugin_manager_->LoadSo(lib_paths), "Failed to load libs"); | |||||
| } | |||||
| std::string lib_paths; | |||||
| GE_CHK_STATUS_RET_NOLOG(GetLibPaths(options, lib_paths, is_train)); | |||||
| plugin_manager_.reset(new (std::nothrow)PluginManager()); | |||||
| GE_CHECK_NOTNULL(plugin_manager_); | |||||
| GE_CHK_STATUS_RET(plugin_manager_->LoadSo(lib_paths), "Failed to load libs"); | |||||
| auto &kernel_builders = OpsKernelBuilderRegistry::GetInstance().GetAll(); | auto &kernel_builders = OpsKernelBuilderRegistry::GetInstance().GetAll(); | ||||
| GELOGI("Number of OpBuild = %zu", kernel_builders.size()); | GELOGI("Number of OpBuild = %zu", kernel_builders.size()); | ||||
| @@ -100,7 +100,8 @@ OpsKernelBuilderPtr OpsKernelBuilderManager::GetOpsKernelBuilder(const string &n | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| Status OpsKernelBuilderManager::GetLibPaths(const std::map<std::string, std::string> &options, std::string &lib_paths) { | |||||
| Status OpsKernelBuilderManager::GetLibPaths(const std::map<std::string, std::string> &options, std::string &lib_paths, | |||||
| bool is_train) { | |||||
| GELOGD("Start to execute GetLibPaths"); | GELOGD("Start to execute GetLibPaths"); | ||||
| std::string path_base = PluginManager::GetPath(); | std::string path_base = PluginManager::GetPath(); | ||||
| std::string so_path = "plugin/opskernel/"; | std::string so_path = "plugin/opskernel/"; | ||||
| @@ -109,6 +110,9 @@ Status OpsKernelBuilderManager::GetLibPaths(const std::map<std::string, std::str | |||||
| for (const auto &lib_name : kBasicBuilderLibs) { | for (const auto &lib_name : kBasicBuilderLibs) { | ||||
| all_lib_paths += (path + lib_name + ":"); | all_lib_paths += (path + lib_name + ":"); | ||||
| } | } | ||||
| if (!is_train) { | |||||
| all_lib_paths += (path_base + kAicoreUtilsLib + ":"); | |||||
| } | |||||
| auto iter = options.find(OPTION_EXEC_HCCL_FLAG); | auto iter = options.find(OPTION_EXEC_HCCL_FLAG); | ||||
| if (iter == options.end() || iter->second != "0") { | if (iter == options.end() || iter->second != "0") { | ||||
| @@ -48,7 +48,7 @@ class OpsKernelBuilderManager { | |||||
| private: | private: | ||||
| OpsKernelBuilderManager() = default; | OpsKernelBuilderManager() = default; | ||||
| static Status GetLibPaths(const std::map<std::string, std::string> &options, std::string &lib_paths); | |||||
| static Status GetLibPaths(const std::map<std::string, std::string> &options, std::string &lib_paths, bool is_train); | |||||
| std::unique_ptr<PluginManager> plugin_manager_; | std::unique_ptr<PluginManager> plugin_manager_; | ||||
| std::map<std::string, OpsKernelBuilderPtr> ops_kernel_builders_{}; | std::map<std::string, OpsKernelBuilderPtr> ops_kernel_builders_{}; | ||||
| @@ -891,7 +891,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con | |||||
| if (status != ge::GRAPH_SUCCESS) { | if (status != ge::GRAPH_SUCCESS) { | ||||
| GELOGE(ge::FAILED, "Om file init failed."); | GELOGE(ge::FAILED, "Om file init failed."); | ||||
| if (model.model_data != nullptr) { | if (model.model_data != nullptr) { | ||||
| delete[](char *) model.model_data; | |||||
| delete[] reinterpret_cast<char *>(model.model_data); | |||||
| model.model_data = nullptr; | model.model_data = nullptr; | ||||
| } | } | ||||
| return status; | return status; | ||||
| @@ -902,7 +902,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con | |||||
| if (status != ge::GRAPH_SUCCESS) { | if (status != ge::GRAPH_SUCCESS) { | ||||
| GELOGE(ge::FAILED, "Get model part failed."); | GELOGE(ge::FAILED, "Get model part failed."); | ||||
| if (model.model_data != nullptr) { | if (model.model_data != nullptr) { | ||||
| delete[](char *) model.model_data; | |||||
| delete[] reinterpret_cast<char *>(model.model_data); | |||||
| model.model_data = nullptr; | model.model_data = nullptr; | ||||
| } | } | ||||
| return status; | return status; | ||||
| @@ -928,7 +928,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con | |||||
| } | } | ||||
| if (model.model_data != nullptr) { | if (model.model_data != nullptr) { | ||||
| delete[](char *) model.model_data; | |||||
| delete[] reinterpret_cast<char *>(model.model_data); | |||||
| model.model_data = nullptr; | model.model_data = nullptr; | ||||
| } | } | ||||
| return ret; | return ret; | ||||
| @@ -62,8 +62,12 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) { | |||||
| if (kernel_type == ccKernelType::CUST_AI_CPU) { | if (kernel_type == ccKernelType::CUST_AI_CPU) { | ||||
| task.is_custom_ = true; | task.is_custom_ = true; | ||||
| task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; | task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; | ||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); | |||||
| bool loaded = false; | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name, loaded), | |||||
| "launch cust aicpu so failed"); | |||||
| if (!loaded) { | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); | |||||
| } | |||||
| } | } | ||||
| task.num_inputs_ = op_desc_->GetInputsSize(); | task.num_inputs_ = op_desc_->GetInputsSize(); | ||||
| @@ -1,3 +1,10 @@ | |||||
| #!/usr/bin/python3.7 | |||||
| # -*- coding: UTF-8 -*- | |||||
| #------------------------------------------------------------------- | |||||
| # Purpose: | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd. All rights reserved. | |||||
| #------------------------------------------------------------------- | |||||
| import os | import os | ||||
| import re | import re | ||||
| import sys | import sys | ||||
| @@ -19,8 +19,6 @@ | |||||
| #include "runtime/rt.h" | #include "runtime/rt.h" | ||||
| using namespace std; | |||||
| namespace ge { | namespace ge { | ||||
| #define CC_FUSION_OP_MAX 32 | #define CC_FUSION_OP_MAX 32 | ||||
| @@ -234,6 +234,22 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
| ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &input_data, | ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &input_data, | ||||
| ge::RunModelData &output_data, bool async_mode = false); | ge::RunModelData &output_data, bool async_mode = false); | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Synchronous execution of offline model(Do not create thread) | |||||
| /// @param [in] uint32_t model_id: Model ID to execute | |||||
| /// @param [in] void* stream: stream to execute | |||||
| /// @param [in] bool async_mode: is asynchronize mode. | |||||
| /// @param [in] const domi::InputData *input_data: Model input data | |||||
| /// @param [in] const std::vector<GeTensorDesc> &input_desc: description of model input data | |||||
| /// @param [out] domi::OutputData *output_data: Model output data | |||||
| /// @param [out] std::vector<GeTensorDesc> &output_desc: description of model output data | |||||
| /// @return SUCCESS handle successfully / others handle failed | |||||
| /// | |||||
| ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, | |||||
| const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data, | |||||
| std::vector<GeTensorDesc> &output_desc, bool async_mode = false); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Get weight memory size from model file | /// @brief Get weight memory size from model file | ||||
| @@ -1 +1 @@ | |||||
| Subproject commit 29c31bb87d8bbe6904ab6fa72034a803fb50a746 | |||||
| Subproject commit 5b9a7f84a4347f8816d492aa51f2414ccf8a0744 | |||||
| @@ -1 +1 @@ | |||||
| Subproject commit ba956d349d8ad3e864d27467f4f0119333cbadc6 | |||||
| Subproject commit 70369668abebed84942d9f355494a89e82cc1eac | |||||
| @@ -1,42 +0,0 @@ | |||||
| # Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| cmake_minimum_required(VERSION 3.0) | |||||
| set(CMAKE_CXX_STANDARD 11) | |||||
| project(ge_st CXX C) | |||||
| set(CMAKE_CXX_FLAGS "-O1 -fPIC -Wl,-unresolved-symbols=ignore-in-shared-libs") | |||||
| file(GLOB_RECURSE RES50_TRAIN_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||||
| "resnet50/resnet50_train.cc" | |||||
| "resnet50/common.cc" | |||||
| ) | |||||
| include_directories(${GE_SOURCE_DIR}/inc) | |||||
| include_directories(${GE_SOURCE_DIR}/inc/graph) | |||||
| include_directories(${GE_SOURCE_DIR}/inc/framework) | |||||
| include_directories(${GE_SOURCE_DIR}/inc/external) | |||||
| include_directories(${GE_SOURCE_DIR}/inc/external/ge) | |||||
| include_directories(${GE_SOURCE_DIR}/inc/external/graph) | |||||
| include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) | |||||
| include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/ops) | |||||
| include_directories(/usr/local/HiAI/opp/op_proto/built-in/inc) | |||||
| add_executable(st_resnet50_train ${RES50_TRAIN_SRCS}) | |||||
| target_link_libraries(st_resnet50_train | |||||
| ${PROTOBUF_LIBRARY} | |||||
| ge_client_train ge_memory | |||||
| ) | |||||
| @@ -1,768 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <math.h> | |||||
| #include <stdint.h> | |||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #include <iostream> | |||||
| #include <vector> | |||||
| #include "common.h" | |||||
| #include "model.h" | |||||
| #define MAX_HEAD_SIZE 50 | |||||
| using namespace std; | |||||
| using namespace ge; | |||||
| void update_op_format(Operator ops, Format format) { | |||||
| printf("set format begin.........\n"); | |||||
| ge::TensorDesc tensor_desc_x = ops.GetInputDesc("x"); | |||||
| ge::TensorDesc tensor_desc_y = ops.GetOutputDesc("y"); | |||||
| Format f_x0 = tensor_desc_x.GetFormat(); | |||||
| Format f_y0 = tensor_desc_x.GetFormat(); | |||||
| printf("before set x format:%d \n", f_x0); | |||||
| printf("before set y format:%d \n", f_y0); | |||||
| printf("format to be set is :%d \n", format); | |||||
| tensor_desc_x.SetFormat(format); | |||||
| tensor_desc_y.SetFormat(format); | |||||
| ops.UpdateInputDesc("x", tensor_desc_x); | |||||
| ops.UpdateOutputDesc("y", tensor_desc_y); | |||||
| Format f_x = tensor_desc_x.GetFormat(); | |||||
| Format f_y = tensor_desc_y.GetFormat(); | |||||
| printf("after set x format:%d \n", f_x); | |||||
| printf("after set y format:%d \n", f_y); | |||||
| } | |||||
| /// getDimInfo: get dim info from data file | |||||
| /// param: | |||||
| /// fp: the testing datafile object | |||||
| /// | |||||
| /// return : | |||||
| /// dim_info: array to store the info of the dim in datafile, like [4,3,3,6,3,162(3*3*6*3)],4 is dim size,3,3,6,3 is the | |||||
| /// dim shape data_size: the size of the testing data including the data file | |||||
| void getDimInfo(FILE *fp, std::vector<uint64_t> &dim_info) { | |||||
| // get dim info from hisi testing data file | |||||
| uint32_t *dim_buffer = (uint32_t *)malloc(MAX_HEAD_SIZE * sizeof(uint32_t)); | |||||
| fread(dim_buffer, sizeof(uint32_t), MAX_HEAD_SIZE, fp); | |||||
| dim_info.push_back(*dim_buffer); // get dim size | |||||
| // get data shape to compute the datasize | |||||
| uint64_t data_size = 1; | |||||
| uint32_t i = 1; | |||||
| for (; i <= dim_info[0]; i++) { | |||||
| dim_info.push_back(*(dim_buffer + i)); | |||||
| data_size *= *(dim_buffer + i); | |||||
| } | |||||
| dim_info.push_back(data_size); | |||||
| free(dim_buffer); | |||||
| } | |||||
| /// readTestDataFile: read test date from hisi .t datafile | |||||
| /// param: | |||||
| /// infile: the path of hisi .t datafile | |||||
| /// return: | |||||
| /// dim_info: array to store the info of the dim in datafile, like [4,3,3,6,3],4 is dim size,3,3,6,3 is the dim shape | |||||
| void *readTestDataFile(std::string infile, std::vector<uint64_t> &dim_info) { | |||||
| FILE *fp; | |||||
| fp = fopen(infile.c_str(), "r"); | |||||
| if (fp == NULL) { | |||||
| printf("ERROR: cant't open file %s\n", infile.c_str()); | |||||
| return NULL; | |||||
| } else { | |||||
| getDimInfo(fp, dim_info); | |||||
| uint64_t data_size = dim_info[dim_info.size() - 1]; | |||||
| fclose(fp); | |||||
| fp = fopen(infile.c_str(), "r"); | |||||
| if (fp == NULL) { | |||||
| printf("ERROR: cant't open file %s\n", infile.c_str()); | |||||
| return NULL; | |||||
| } | |||||
| uint32_t *memory = (uint32_t *)malloc((dim_info[0] + 1 + data_size) * sizeof(uint32_t)); | |||||
| fread(memory, sizeof(uint32_t), (dim_info[0] + 1 + data_size), fp); | |||||
| fclose(fp); | |||||
| return memory + (dim_info[0] + 1); | |||||
| } | |||||
| } | |||||
| void *readUint8TestDataFile(std::string infile, int size) { | |||||
| FILE *fp; | |||||
| fp = fopen(infile.c_str(), "r"); | |||||
| if (fp == NULL) { | |||||
| printf("ERROR: cant't open file %s\n", infile.c_str()); | |||||
| return NULL; | |||||
| } | |||||
| uint8_t *memory = (uint8_t *)malloc((size) * sizeof(uint8_t)); | |||||
| fread(memory, sizeof(uint8_t), (size), fp); | |||||
| fclose(fp); | |||||
| return memory; | |||||
| } | |||||
| /// allclose | |||||
| /// param: | |||||
| /// a:compared file a | |||||
| /// b:compared file b | |||||
| /// count: the count size which will compare | |||||
| /// rtol: | |||||
| /// atol: | |||||
| /// return: | |||||
| /// true or false | |||||
| bool allclose(float *a, float *b, uint64_t count, float rtol = 1e-05, float atol = 1e-08) { | |||||
| uint32_t i = 0; | |||||
| for (; i < count; ++i) { | |||||
| if (fabs(a[i] - b[i]) > (atol + rtol * fabs(b[i]))) { | |||||
| printf("compara failed: i= %d, a[i]=%f, b[i]=%f,atol=%f,rtol=%f\n", i, a[i], b[i], atol, rtol); | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| /// compFp32WithTData: compare the data with the data in hisi .t file | |||||
| /// param: | |||||
| /// actual_output_data: the result of ge | |||||
| /// expected_data_file: the path of hisi .t result file | |||||
| /// rtol: | |||||
| /// atol: | |||||
| /// return: | |||||
| /// true of false | |||||
| bool compFp32WithTData(float *actual_output_data, std::string expected_data_file, float rtol = 1e-05, float atol = 1e-08) { | |||||
| std::vector<uint64_t> dim_info; | |||||
| float *expected_output_data = (float *)readTestDataFile(expected_data_file, dim_info); | |||||
| uint32_t i = 1; | |||||
| uint64_t data_size = 1; | |||||
| for (; i <= dim_info[0]; i++) { | |||||
| data_size *= dim_info[i]; | |||||
| } | |||||
| return allclose(actual_output_data, expected_output_data, data_size, rtol, atol); | |||||
| } | |||||
| int SwitchDatatype(DataType dt) { | |||||
| int size = 1; | |||||
| if (dt == ge::DT_FLOAT) size = 4; | |||||
| if (dt == ge::DT_INT32) size = 4; | |||||
| if (dt == ge::DT_FLOAT16) size = 2; | |||||
| if (dt == ge::DT_INT64) size = 8; | |||||
| return size; | |||||
| } | |||||
| ge::Tensor genTensor(std::vector<int64_t> tensor_shape, Format format, DataType dt) { | |||||
| int size = 1; | |||||
| for (int i = 0; i < tensor_shape.size(); i++) { | |||||
| size = size * tensor_shape[i]; | |||||
| } | |||||
| int data_type_size = SwitchDatatype(dt); | |||||
| size = abs(size * data_type_size); | |||||
| vector<uint8_t> data_value; | |||||
| if (size == 0) { | |||||
| TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), format, dt); | |||||
| input_tensor_desc.SetRealDimCnt(tensor_shape.size()); | |||||
| Tensor gen_tensor = Tensor(input_tensor_desc, data_value); | |||||
| return gen_tensor; | |||||
| } | |||||
| for (int i = 0; i < size; i++) { | |||||
| data_value.push_back(1); | |||||
| } | |||||
| TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), format, dt); | |||||
| input_tensor_desc.SetRealDimCnt(tensor_shape.size()); | |||||
| Tensor gen_tensor = Tensor(input_tensor_desc, data_value); | |||||
| return gen_tensor; | |||||
| } | |||||
| ge::Tensor genTensor_withVaule(std::vector<int64_t> tensor_shape, float value) { | |||||
| int size = 1; | |||||
| for (int i = 0; i < tensor_shape.size(); i++) { | |||||
| size = size * tensor_shape[i]; | |||||
| } | |||||
| float *data_value = new float[size]; | |||||
| for (int i = 0; i < size; i++) { | |||||
| *(data_value + i) = value; | |||||
| } | |||||
| Tensor gen_ge_tensor; | |||||
| TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), FORMAT_NCHW); | |||||
| gen_ge_tensor.SetTensorDesc(input_tensor_desc); | |||||
| gen_ge_tensor.SetData((uint8_t *)data_value, size * 4); | |||||
| return gen_ge_tensor; | |||||
| } | |||||
| Tensor genTesnor_Shape_as_data(std::vector<int64_t> tensor_shape) { | |||||
| Format format = FORMAT_NCHW; | |||||
| DataType dt = DT_INT32; | |||||
| int size = tensor_shape.size(); | |||||
| int32_t *tensor_data = new int32_t[size]; | |||||
| std::cout << "shape tensor size:" << size << endl; | |||||
| for (int i = 0; i < size; i++) { | |||||
| *(tensor_data + i) = tensor_shape[i]; | |||||
| } | |||||
| Tensor gen_tensor; | |||||
| TensorDesc input_tensor_desc = TensorDesc(ge::Shape({size}), FORMAT_NCHW, DT_INT32); | |||||
| gen_tensor.SetData((uint8_t *)tensor_data, size * GetDatTypeSize(dt)); | |||||
| gen_tensor.SetTensorDesc(input_tensor_desc); | |||||
| return gen_tensor; | |||||
| } | |||||
| /// train_flag is 0 when infer; train_flag is 1 when train; train_flag is 0 default | |||||
| /// run_mode_path is not 0,1,2 when TBE; run_mode_path is 1 when FE; run_mode_path is 0 default | |||||
| /// run_mode_path is 2 now when AICPU, ge.enabledlocalFmkop is 1 | |||||
| ge::Status GEInitialize_api(string train_flag, string run_mode_path) { | |||||
| ge::Status ret; | |||||
| if (run_mode_path == "0") { | |||||
| const std::map<string, string> config = { | |||||
| {"device_id", "0,2,4,6"}, | |||||
| {"rank_table_file", "hccl from csa/paas"}, | |||||
| {"ge.graphRunMode", train_flag}, | |||||
| {"ge.aicpuFlag", "1"}, | |||||
| {"ge.feFlag", "1"}, | |||||
| {DDK_VERSION_FLAG, "1.60.T17.B830"}, | |||||
| {"ge.soLoadPath", | |||||
| "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" | |||||
| "libaicpu_plugin.so"}}; | |||||
| ret = ge::GEInitialize(config); | |||||
| } else if (run_mode_path == "1") { | |||||
| const std::map<string, string> config = { | |||||
| {"device_id", "0,2,4,6"}, | |||||
| {"rank_table_file", "hccl from csa/paas"}, | |||||
| {"ge.graphRunMode", train_flag}, | |||||
| {"ge.feFlag", "1"}, | |||||
| {DDK_VERSION_FLAG, "1.60.T17.B830"}, | |||||
| {TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/bert"}, | |||||
| {"ge.soLoadPath", "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so"}}; | |||||
| ret = ge::GEInitialize(config); | |||||
| } else if (run_mode_path == "2") { | |||||
| const std::map<string, string> config = {{"device_id", "0,2,4,6"}, | |||||
| {"rank_table_file", "hccl from csa/paas"}, | |||||
| {"ge.graphRunMode", train_flag}, | |||||
| {LOCAL_FMKOP_FLAG, "1"}}; | |||||
| ret = ge::GEInitialize(config); | |||||
| } else { | |||||
| const std::map<string, string> config = { | |||||
| {"device_id", "0,2,4,6"}, | |||||
| {"rank_table_file", "hccl from csa/paas"}, | |||||
| {"ge.graphRunMode", train_flag}, | |||||
| {DDK_VERSION_FLAG, "1.60.T17.B830"}, | |||||
| {TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/" + run_mode_path}}; | |||||
| ret = ge::GEInitialize(config); | |||||
| } | |||||
| std::cout << "GEInitialize_ret is " << ret << std::endl; | |||||
| return ret; | |||||
| } | |||||
| /// train_flag is infer default | |||||
| /// run_mode: is multi group of [fe,aicpu,bert,deeplabv3,mobilenetv2,single_path_nas,ssd] | |||||
| /// but bert,deeplabv3,mobilenetv2,single_path_nas,ssd can only set one value from array | |||||
| /// eg:"fe,aicpu,bert" or "fe", default is “fe” | |||||
| /// "fe,aicpu,bert" remain open fe aicpu and bert | |||||
| ge::Status GEInitialize_api_new(string train_flag, string run_mode) { | |||||
| ge::Status ret; | |||||
| vector<string> modes; | |||||
| char *strs = new char[run_mode.length() + 1]; | |||||
| strcpy(strs, run_mode.c_str()); | |||||
| const char *delim = ","; | |||||
| char *p = strtok(strs, delim); | |||||
| while (p) { | |||||
| string s = p; // transform substr to string | |||||
| modes.push_back(s); // save to result array | |||||
| p = strtok(NULL, delim); | |||||
| } | |||||
| std::map<string, string> config = { | |||||
| {"device_id", "0,2,4,6"}, | |||||
| {"rank_table_file", "hccl from csa/paas"}, | |||||
| {DDK_VERSION_FLAG, "1.60.T17.B830"}, | |||||
| {"ge.opsProtoLibPath", "/usr/local/HiAI/runtime/ops/op_proto/built-in/libopsproto.so"}}; | |||||
| if (train_flag == "infer") | |||||
| config.insert(pair<string, string>("ge.graphRunMode", "0")); | |||||
| else if (train_flag == "train") | |||||
| config.insert(pair<string, string>("ge.graphRunMode", "1")); | |||||
| else | |||||
| std::cout << "GeInitialize give the error param" << std::endl; | |||||
| for (int i = 0; i < modes.size(); i++) { | |||||
| if (modes[i] == "fe") { | |||||
| config.insert(pair<string, string>("ge.feFlag", "1")); | |||||
| if (config.find("ge.soLoadPath") != config.end()) { | |||||
| config["ge.soLoadPath"] = | |||||
| "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" | |||||
| "libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/" | |||||
| "runtime/lib64/plugin/opskernel/librts_engine.so"; | |||||
| } else { | |||||
| config.insert(pair<string, string>( | |||||
| "ge.soLoadPath", | |||||
| "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" | |||||
| "libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so")); | |||||
| } | |||||
| } else if (modes[i] == "aicpu") { | |||||
| config.insert(pair<string, string>("ge.aicpuFlag", "1")); | |||||
| if (config.find("ge.soLoadPath") != config.end()) { | |||||
| config["ge.soLoadPath"] = | |||||
| "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/" | |||||
| "libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/" | |||||
| "runtime/lib64/plugin/opskernel/librts_engine.so"; | |||||
| } else { | |||||
| config.insert(pair<string, string>( | |||||
| "ge.soLoadPath", | |||||
| "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/" | |||||
| "opskernel/libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so")); | |||||
| } | |||||
| } else if (modes[i] == "bert" || modes[i] == "deeplabv3" || modes[i] == "mobilenetv2" || | |||||
| modes[i] == "single_path_nas" || modes[i] == "ssd") { | |||||
| config.insert(pair<string, string>(TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/" + modes[i])); | |||||
| } else if (modes[i] == "plugin") { | |||||
| } else | |||||
| std::cout << "GeInitialize give the error param" << std::endl; | |||||
| } | |||||
| ret = ge::GEInitialize(config); | |||||
| std::cout << "GEInitialize_ret is " << ret << std::endl; | |||||
| return ret; | |||||
| } | |||||
| ge::Status GEFinalize_api() { | |||||
| ge::Status ret = ge::GEFinalize(); | |||||
| std::cout << "GEFinalize ret is " << ret << std::endl; | |||||
| return ret; | |||||
| } | |||||
| /// set train_flag | |||||
| /// if run_mode_path is "fe" remain FE process; "fe,plugin" is FE and TBE plugin process | |||||
| /// "aicpu" is open aicpu plugin | |||||
| int RunGraph_initData(Graph &graph, string op_name, map<string, std::vector<int64_t>> attr_test, string train_flag, | |||||
| string run_mode_path) { | |||||
| std::map<string, string> options = {{RUN_FLAG, "1"}}; | |||||
| uint32_t graph_id = 0; | |||||
| ge::Status ret = GEInitialize_api_new(train_flag, run_mode_path); | |||||
| EXPECT_EQ(ret, ge::SUCCESS); | |||||
| ge::Session *session = new Session(options); | |||||
| ASSERT_TRUE(session != NULL); | |||||
| std::vector<Tensor> input; | |||||
| if (attr_test.find("input1") != attr_test.end()) { | |||||
| Tensor input_tensor = genTensor(attr_test["input1"]); | |||||
| input.push_back(input_tensor); | |||||
| } | |||||
| if (attr_test.find("input2") != attr_test.end()) { | |||||
| Tensor input_tensor = genTensor(attr_test["input2"]); | |||||
| input.push_back(input_tensor); | |||||
| } | |||||
| if (attr_test.find("input3") != attr_test.end()) { | |||||
| Tensor input_tensor = genTensor(attr_test["input3"]); | |||||
| input.push_back(input_tensor); | |||||
| } | |||||
| std::vector<Tensor> output; | |||||
| ret = session->AddGraph(graph_id, graph); | |||||
| EXPECT_EQ(ret, ge::SUCCESS); | |||||
| if (train_flag == "1") { | |||||
| setenv("GE_TRAIN", "1", true); | |||||
| ret = session->RunGraph(graph_id, input, output); | |||||
| setenv("GE_TRAIN", "0", true); | |||||
| } else { | |||||
| ret = session->RunGraph(graph_id, input, output); | |||||
| } | |||||
| delete session; | |||||
| GEFinalize_api(); | |||||
| if (ret != ge::SUCCESS) { | |||||
| std::cout << " run graph failed" << std::endl; | |||||
| return -1; | |||||
| } else { | |||||
| return 0; | |||||
| } | |||||
| } | |||||
| ge::Status session_add_and_run_graph(ge::Session *session, uint32_t graph_id, Graph &graph, std::vector<Tensor> inputs, | |||||
| std::vector<Tensor> &outputs) { | |||||
| ge::Status ret = session->AddGraph(graph_id, graph); | |||||
| EXPECT_EQ(ret, ge::SUCCESS); | |||||
| ret = session->RunGraph(graph_id, inputs, outputs); | |||||
| return ret; | |||||
| } | |||||
| ge::Session *create_session() { | |||||
| // Init session | |||||
| std::map<string, string> options = {{"a", "b"}, {TRAIN_FLAG, "1"}}; | |||||
| ge::Session *session = new Session(options); | |||||
| ASSERT_TRUE(session != NULL); | |||||
| return session; | |||||
| } | |||||
| ge::Session *create_aipp_session() { | |||||
| // Init session | |||||
| std::map<string, string> options = {{"a", "b"}, {TRAIN_FLAG, "1"}, {"ge.insertOpFile", "/root/host/ge/aipp.cfg"}}; | |||||
| ge::Session *session = new Session(options); | |||||
| ASSERT_TRUE(session != NULL); | |||||
| return session; | |||||
| } | |||||
| int buildCheckPointGraph(Graph &graph, map<string, TensorDesc> variables) { | |||||
| std::vector<Operator> inputs{}; | |||||
| std::vector<Operator> outputs{}; | |||||
| for (map<string, TensorDesc>::iterator it = variables.begin(); it != variables.end(); ++it) { | |||||
| auto var = op::Variable(string(it->first)); | |||||
| var.update_output_desc_y(it->second); | |||||
| inputs.push_back(var); | |||||
| graph.AddOp(var); | |||||
| } | |||||
| auto save = op::Save().create_dynamic_input_tensors(inputs.size()); | |||||
| for (int i = 0; i < inputs.size(); i++) { | |||||
| save.set_dynamic_input_tensors(i, inputs[i]); | |||||
| } | |||||
| graph.SetInputs(inputs).SetOutputs(outputs); | |||||
| return 0; | |||||
| } | |||||
| int buildInitGraph(Graph &graph, std::vector<TensorDesc> desc_var, std::vector<std::string> name_var, | |||||
| std::vector<float> values_var) { | |||||
| std::vector<Operator> inputs{}; | |||||
| std::vector<Operator> outputs{}; | |||||
| for (int i = 0; i < desc_var.size(); i++) { | |||||
| desc_var[i].SetRealDimCnt(desc_var[i].GetShape().GetDimNum()); | |||||
| auto tensor_data = genTensor_withVaule(desc_var[i].GetShape().GetDims(), values_var[i]); | |||||
| auto var_constant = op::Constant().set_attr_value(tensor_data); | |||||
| var_constant.update_output_desc_y(desc_var[i]); | |||||
| auto var_init = op::Variable(string(name_var[i])); | |||||
| var_init.update_output_desc_y(desc_var[i]); | |||||
| auto var_assign = op::Assign().set_input_ref(var_init).set_input_value(var_constant); | |||||
| inputs.push_back(var_init); | |||||
| } | |||||
| graph.SetInputs(inputs).SetOutputs(outputs); | |||||
| return 0; | |||||
| } | |||||
| int buildInitGraph_other_dataType(Graph &graph, std::vector<TensorDesc> desc_var, std::vector<std::string> name_var) { | |||||
| std::vector<Operator> inputs{}; | |||||
| std::vector<Operator> outputs{}; | |||||
| for (int i = 0; i < desc_var.size(); i++) { | |||||
| desc_var[i].SetRealDimCnt(desc_var[i].GetShape().GetDimNum()); | |||||
| auto tensor_data = genTensor(desc_var[i].GetShape().GetDims(), desc_var[i].GetFormat(), desc_var[i].GetDataType()); | |||||
| auto var_constant = op::Constant().set_attr_value(tensor_data); | |||||
| var_constant.update_output_desc_y(desc_var[i]); | |||||
| auto var_init = op::Variable(string(name_var[i])); | |||||
| var_init.update_output_desc_y(desc_var[i]); | |||||
| auto var_assign = op::Assign().set_input_ref(var_init).set_input_value(var_constant); | |||||
| inputs.push_back(var_init); | |||||
| graph.AddOp(var_constant); | |||||
| graph.AddOp(var_init); | |||||
| graph.AddOp(var_assign); | |||||
| } | |||||
| graph.SetInputs(inputs).SetOutputs(outputs); | |||||
| return 0; | |||||
| } | |||||
| bool build_multi_input_multi_output_graph(Graph &graph) { | |||||
| auto data1 = op::Data("Data1").set_attr_index(0); | |||||
| auto data2 = op::Data("Data2").set_attr_index(1); | |||||
| vector<uint64_t> dim_info; | |||||
| auto relu1 = op::Relu("Relu1").set_input_x(data1); | |||||
| auto relu2 = op::Relu("Relu2").set_input_x(data2); | |||||
| auto eltwise = op::Eltwise("Eltwise") | |||||
| .create_dynamic_input_x(2) | |||||
| .set_dynamic_input_x(0, relu1) | |||||
| .set_dynamic_input_x(1, relu2) | |||||
| .set_attr_N(2) | |||||
| .set_attr_mode(1) | |||||
| .set_attr_coeff({1, 1}); | |||||
| auto eltwise1 = op::Eltwise("Eltwise1") | |||||
| .create_dynamic_input_x(2) | |||||
| .set_dynamic_input_x(0, eltwise) | |||||
| .set_dynamic_input_x(1, eltwise) | |||||
| .set_attr_N(2) | |||||
| .set_attr_mode(1) | |||||
| .set_attr_coeff({1, 1}); | |||||
| auto eltwise2 = op::Eltwise("Eltwise2") | |||||
| .create_dynamic_input_x(2) | |||||
| .set_dynamic_input_x(0, eltwise) | |||||
| .set_dynamic_input_x(1, eltwise) | |||||
| .set_attr_N(2) | |||||
| .set_attr_mode(1) | |||||
| .set_attr_coeff({1, 1}); | |||||
| std::vector<Operator> inputs{data1, data2}; | |||||
| std::vector<Operator> outputs{eltwise1, eltwise2}; | |||||
| graph.SetInputs(inputs).SetOutputs(outputs); | |||||
| return true; | |||||
| } | |||||
| void build_big_graph(Graph &graph, map<string, std::vector<int64_t>> attr) { | |||||
| auto data = op::Data("Data").set_attr_index(0); | |||||
| auto weight = op::Const("weight1").set_attr_value(genTensor(attr["weight"])); | |||||
| vector<int64_t> weight_shape(attr["weight"].begin(), attr["weight"].end()); | |||||
| TensorDesc weight_desc(ge::Shape(weight_shape), FORMAT_NCHW, DT_FLOAT); | |||||
| weight.update_output_desc_y(weight_desc); | |||||
| auto conv_1 = op::Conv2D("conv1").set_input_x(data).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_2 = op::Conv2D("conv2").set_input_x(conv_1).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_3 = op::Conv2D("conv3").set_input_x(conv_2).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_4 = op::Conv2D("conv4").set_input_x(conv_3).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_5 = op::Conv2D("conv5").set_input_x(conv_4).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_6 = op::Conv2D("conv6").set_input_x(conv_5).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_7 = op::Conv2D("conv7").set_input_x(conv_6).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_8 = op::Conv2D("conv8").set_input_x(conv_7).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_9 = op::Conv2D("conv9").set_input_x(conv_8).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_10 = op::Conv2D("conv10").set_input_x(conv_9).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_11 = op::Conv2D("conv11").set_input_x(conv_10).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_12 = op::Conv2D("conv12").set_input_x(conv_11).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_13 = op::Conv2D("conv13").set_input_x(conv_12).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_14 = op::Conv2D("conv14").set_input_x(conv_13).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_15 = op::Conv2D("conv15").set_input_x(conv_14).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_16 = op::Conv2D("conv16").set_input_x(conv_15).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_17 = op::Conv2D("conv17").set_input_x(conv_16).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_18 = op::Conv2D("conv18").set_input_x(conv_17).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_19 = op::Conv2D("conv19").set_input_x(conv_18).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_20 = op::Conv2D("conv20").set_input_x(conv_19).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_21 = op::Conv2D("conv21").set_input_x(conv_20).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_22 = op::Conv2D("conv22").set_input_x(conv_21).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_23 = op::Conv2D("conv23").set_input_x(conv_22).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_24 = op::Conv2D("conv24").set_input_x(conv_23).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_25 = op::Conv2D("conv25").set_input_x(conv_24).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_26 = op::Conv2D("conv26").set_input_x(conv_25).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_27 = op::Conv2D("conv27").set_input_x(conv_26).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_28 = op::Conv2D("conv28").set_input_x(conv_27).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_29 = op::Conv2D("conv29").set_input_x(conv_28).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_30 = op::Conv2D("conv30").set_input_x(conv_29).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_31 = op::Conv2D("conv31").set_input_x(conv_30).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_32 = op::Conv2D("conv32").set_input_x(conv_31).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_33 = op::Conv2D("conv33").set_input_x(conv_32).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_34 = op::Conv2D("conv34").set_input_x(conv_33).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_35 = op::Conv2D("conv35").set_input_x(conv_34).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_36 = op::Conv2D("conv36").set_input_x(conv_35).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_37 = op::Conv2D("conv37").set_input_x(conv_36).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_38 = op::Conv2D("conv38").set_input_x(conv_37).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_39 = op::Conv2D("conv39").set_input_x(conv_38).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_40 = op::Conv2D("conv40").set_input_x(conv_39).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_41 = op::Conv2D("conv41").set_input_x(conv_40).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_42 = op::Conv2D("conv42").set_input_x(conv_41).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_43 = op::Conv2D("conv43").set_input_x(conv_42).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_44 = op::Conv2D("conv44").set_input_x(conv_43).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_45 = op::Conv2D("conv45").set_input_x(conv_44).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_46 = op::Conv2D("conv46").set_input_x(conv_45).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_47 = op::Conv2D("conv47").set_input_x(conv_46).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_48 = op::Conv2D("conv48").set_input_x(conv_47).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_49 = op::Conv2D("conv49").set_input_x(conv_48).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_50 = op::Conv2D("conv50").set_input_x(conv_49).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_51 = op::Conv2D("conv51").set_input_x(conv_50).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_52 = op::Conv2D("conv52").set_input_x(conv_51).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_53 = op::Conv2D("conv53").set_input_x(conv_52).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_54 = op::Conv2D("conv54").set_input_x(conv_53).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_55 = op::Conv2D("conv55").set_input_x(conv_54).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_56 = op::Conv2D("conv56").set_input_x(conv_55).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_57 = op::Conv2D("conv57").set_input_x(conv_56).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_58 = op::Conv2D("conv58").set_input_x(conv_57).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_59 = op::Conv2D("conv59").set_input_x(conv_58).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_60 = op::Conv2D("conv60").set_input_x(conv_59).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_61 = op::Conv2D("conv61").set_input_x(conv_60).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_62 = op::Conv2D("conv62").set_input_x(conv_61).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_63 = op::Conv2D("conv63").set_input_x(conv_62).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_64 = op::Conv2D("conv64").set_input_x(conv_63).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_65 = op::Conv2D("conv65").set_input_x(conv_64).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_66 = op::Conv2D("conv66").set_input_x(conv_65).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_67 = op::Conv2D("conv67").set_input_x(conv_66).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_68 = op::Conv2D("conv68").set_input_x(conv_67).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_69 = op::Conv2D("conv69").set_input_x(conv_68).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_70 = op::Conv2D("conv70").set_input_x(conv_69).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_71 = op::Conv2D("conv71").set_input_x(conv_70).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_72 = op::Conv2D("conv72").set_input_x(conv_71).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_73 = op::Conv2D("conv73").set_input_x(conv_72).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_74 = op::Conv2D("conv74").set_input_x(conv_73).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_75 = op::Conv2D("conv75").set_input_x(conv_74).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_76 = op::Conv2D("conv76").set_input_x(conv_75).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_77 = op::Conv2D("conv77").set_input_x(conv_76).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_78 = op::Conv2D("conv78").set_input_x(conv_77).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_79 = op::Conv2D("conv79").set_input_x(conv_78).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_80 = op::Conv2D("conv80").set_input_x(conv_79).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_81 = op::Conv2D("conv81").set_input_x(conv_80).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_82 = op::Conv2D("conv82").set_input_x(conv_81).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_83 = op::Conv2D("conv83").set_input_x(conv_82).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_84 = op::Conv2D("conv84").set_input_x(conv_83).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_85 = op::Conv2D("conv85").set_input_x(conv_84).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_86 = op::Conv2D("conv86").set_input_x(conv_85).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_87 = op::Conv2D("conv87").set_input_x(conv_86).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_88 = op::Conv2D("conv88").set_input_x(conv_87).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_89 = op::Conv2D("conv89").set_input_x(conv_88).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_90 = op::Conv2D("conv90").set_input_x(conv_89).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_91 = op::Conv2D("conv91").set_input_x(conv_80).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_92 = op::Conv2D("conv92").set_input_x(conv_91).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_93 = op::Conv2D("conv93").set_input_x(conv_92).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_94 = op::Conv2D("conv94").set_input_x(conv_93).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_95 = op::Conv2D("conv95").set_input_x(conv_94).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_96 = op::Conv2D("conv96").set_input_x(conv_95).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_97 = op::Conv2D("conv97").set_input_x(conv_96).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_98 = op::Conv2D("conv98").set_input_x(conv_97).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_99 = op::Conv2D("conv99").set_input_x(conv_98).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_100 = op::Conv2D("conv100").set_input_x(conv_99).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_101 = op::Conv2D("conv101").set_input_x(conv_100).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_102 = op::Conv2D("conv102").set_input_x(conv_101).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_103 = op::Conv2D("conv103").set_input_x(conv_102).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_104 = op::Conv2D("conv104").set_input_x(conv_103).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_105 = op::Conv2D("conv105").set_input_x(conv_104).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_106 = op::Conv2D("conv106").set_input_x(conv_105).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_107 = op::Conv2D("conv107").set_input_x(conv_106).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_108 = op::Conv2D("conv108").set_input_x(conv_107).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_109 = op::Conv2D("conv109").set_input_x(conv_108).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_110 = op::Conv2D("conv110").set_input_x(conv_109).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_111 = op::Conv2D("conv111").set_input_x(conv_110).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_112 = op::Conv2D("conv112").set_input_x(conv_111).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_113 = op::Conv2D("conv113").set_input_x(conv_112).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_114 = op::Conv2D("conv114").set_input_x(conv_113).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_115 = op::Conv2D("conv115").set_input_x(conv_114).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_116 = op::Conv2D("conv116").set_input_x(conv_115).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_117 = op::Conv2D("conv117").set_input_x(conv_116).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_118 = op::Conv2D("conv118").set_input_x(conv_117).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_119 = op::Conv2D("conv119").set_input_x(conv_118).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_120 = op::Conv2D("conv120").set_input_x(conv_119).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_121 = op::Conv2D("conv121").set_input_x(conv_120).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_122 = op::Conv2D("conv122").set_input_x(conv_121).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_123 = op::Conv2D("conv123").set_input_x(conv_122).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_124 = op::Conv2D("conv124").set_input_x(conv_123).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_125 = op::Conv2D("conv125").set_input_x(conv_124).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_126 = op::Conv2D("conv126").set_input_x(conv_125).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_127 = op::Conv2D("conv127").set_input_x(conv_126).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_128 = op::Conv2D("conv128").set_input_x(conv_127).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_129 = op::Conv2D("conv129").set_input_x(conv_128).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| auto conv_130 = op::Conv2D("conv130").set_input_x(conv_129).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1}); | |||||
| std::vector<Operator> inputs{data}; | |||||
| std::vector<Operator> outputs{conv_130}; | |||||
| graph.SetInputs(inputs).SetOutputs(outputs); | |||||
| } | |||||
| int GetDatTypeSize(DataType dt) { | |||||
| int dailation = 1; | |||||
| if (dt == ge::DT_FLOAT) | |||||
| dailation = 4; | |||||
| else if (dt == ge::DT_FLOAT16) | |||||
| dailation = 2; | |||||
| else if (dt == ge::DT_INT16) | |||||
| dailation = 2; | |||||
| else if (dt == ge::DT_UINT16) | |||||
| dailation = 2; | |||||
| else if (dt == ge::DT_INT32) | |||||
| dailation = 4; | |||||
| else if (dt == ge::DT_UINT32) | |||||
| dailation = 4; | |||||
| else if (dt == ge::DT_INT64) | |||||
| dailation = 8; | |||||
| else if (dt == ge::DT_UINT64) | |||||
| dailation = 8; | |||||
| else if (dt == ge::DT_INT8) | |||||
| dailation = 1; | |||||
| return dailation; | |||||
| } | |||||
| int buildConvGraph_new(Graph &graph, std::vector<TensorDesc> desc_var, std::vector<std::string> name_var, int flag, | |||||
| Format format) { | |||||
| auto data_x_shape = op::Data("xShape").set_attr_index(0); | |||||
| auto var = op::Variable(name_var[0]); | |||||
| auto var1 = op::Variable(name_var[1]); //add one seat of ApplyMomentum() | |||||
| auto label1 = op::Variable(name_var[2]); //add one seat of ApplyMomentum() | |||||
| auto conv2dgrad = op::Conv2DBackpropFilterD("output_1"); | |||||
| auto test2 = op::ApplyMomentum(); | |||||
| var.update_output_desc_y(desc_var[0]); | |||||
| var1.update_output_desc_y(desc_var[1]); | |||||
| label1.update_output_desc_y(desc_var[2]); | |||||
| graph.AddOp(var); | |||||
| graph.AddOp(var1); | |||||
| graph.AddOp(label1); | |||||
| auto conv2d = op::Conv2D().set_input_x(data_x_shape).set_input_filter(var).set_attr_strides({1, 1, 1, 1}).set_attr_pads({0,0,0,0}); | |||||
| update_op_format(conv2d, format); | |||||
| ge::TensorDesc tensor_desc_w = conv2d.GetInputDesc("filter"); | |||||
| tensor_desc_w.SetFormat(format); | |||||
| conv2d.UpdateInputDesc("filter", tensor_desc_w); | |||||
| if (flag >= 1) { | |||||
| conv2dgrad.set_input_x(data_x_shape) | |||||
| .set_attr_filter_size(desc_var[0].GetShape().GetDims()) | |||||
| .set_input_out_backprop(conv2d) | |||||
| .set_attr_strides({1, 1, 1, 1}) | |||||
| .set_attr_pads({0, 0, 0, 0}); | |||||
| update_op_format(conv2dgrad, format); | |||||
| graph.AddOp(conv2dgrad); | |||||
| } | |||||
| if (flag >= 2) { | |||||
| // set conv2dgrad var | |||||
| test2.set_input_accum(var1) | |||||
| .set_input_grad(conv2dgrad) | |||||
| .set_input_lr(label1) | |||||
| .set_input_momentum(label1) | |||||
| .set_input_var(var); | |||||
| graph.AddOp(test2); | |||||
| } | |||||
| std::vector<Operator> inputs{data_x_shape}; // set all val | |||||
| std::vector<Operator> outputs{conv2d}; | |||||
| graph.SetInputs(inputs).SetOutputs(outputs); | |||||
| graph.AddOp(conv2d); | |||||
| return 0; | |||||
| } | |||||
| /// load bin data_fail | |||||
| /// input_path: path of bin data_file | |||||
| /// shapes: the shape of Tensor | |||||
| /// ft: the format of Tensor | |||||
| /// dt: the dataType of Tensor | |||||
| Tensor load_variable_input_data(string input_path, std::vector<int64_t> shapes, Format ft, DataType dt) { | |||||
| vector<uint64_t> dim_info1; | |||||
| uint8_t *input_data = (uint8_t *)readTestDataFile(input_path, dim_info1); // common.h | |||||
| TensorDesc input_tensor_desc = TensorDesc(ge::Shape(shapes), ft, dt); | |||||
| input_tensor_desc.SetRealDimCnt(shapes.size()); | |||||
| Tensor input_tensor = Tensor(input_tensor_desc, input_data, GetDatTypeSize(dt) * dim_info1[dim_info1[0] + 1]); | |||||
| return input_tensor; | |||||
| } | |||||
| @@ -1,102 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef ST_RESNET50_GE_COMMON_H_ | |||||
| #define ST_RESNET50_GE_COMMON_H_ | |||||
| #include "common/ge_inner_error_codes.h" | |||||
| #include "utils/tensor_utils.h" | |||||
| #define MY_USER_GE_LOGI(...) GE_LOG_INFO(1, __VA_ARGS__) | |||||
| #define MY_USER_GE_LOGW(...) GE_LOG_WARN(1, __VA_ARGS__) | |||||
| #define MY_USER_GE_LOGE(...) GE_LOG_ERROR(1, 3, __VA_ARGS__) | |||||
| #ifndef USER_GE_LOGI | |||||
| #define USER_GE_LOGI MY_USER_GE_LOGI | |||||
| #endif // USER_GE_LOGI | |||||
| #ifndef USER_GE_LOGW | |||||
| #define USER_GE_LOGW MY_USER_GE_LOGW | |||||
| #endif // USER_GE_LOGW | |||||
| #ifndef USER_GE_LOGE | |||||
| #define USER_GE_LOGE MY_USER_GE_LOGE | |||||
| #endif // USER_GE_LOGE | |||||
| /// train_flag is 0 when infer, train_flag is 1 when train.this param is set for RunGranph_readData() and | |||||
| /// RunGraph_initData() | |||||
| #define TRAIN_FLAG_INFER "infer" | |||||
| #define TRAIN_FLAG_TRAIN "train" | |||||
| #include <string.h> | |||||
| #include <unistd.h> | |||||
| #include <algorithm> | |||||
| #include <chrono> | |||||
| #include <iostream> | |||||
| #include <thread> | |||||
| #include <vector> | |||||
| #include "ge_api.h" | |||||
| #include "graph.h" | |||||
| #include "ptest.h" | |||||
| #include "ops/all_ops.h" | |||||
| using namespace std; | |||||
| using namespace ge; | |||||
| // read bin file and compile result | |||||
| void update_op_format(Operator ops, Format format = ge::FORMAT_NCHW); | |||||
| void getDimInfo(FILE *fp, std::vector<uint64_t> &dim_info); | |||||
| void *readTestDataFile(std::string infile, std::vector<uint64_t> &dim_info); | |||||
| void *readUint8TestDataFile(std::string infile, int size); | |||||
| bool allclose(float *a, float *b, uint64_t count, float rtol, float atol); | |||||
| bool compFp32WithTData(float *actual_output_data, std::string expected_data_file, float rtol, float atol); | |||||
| Tensor load_variable_input_data(string input_path, std::vector<int64_t> shapes, Format ft = ge::FORMAT_NCHW, | |||||
| DataType dt = ge::DT_FLOAT); | |||||
| // constructor Tensor | |||||
| int GetDatTypeSize(DataType dt); | |||||
| ge::Tensor genTensor(std::vector<int64_t> tensor_shape, Format format = ge::FORMAT_NCHW, DataType dt = ge::DT_FLOAT); | |||||
| ge::Tensor genTensor_withVaule(std::vector<int64_t> tensor_shape, float value = 1); | |||||
| Tensor genTesnor_Shape_as_data(std::vector<int64_t> tensor_shape); | |||||
| // Init GE | |||||
| ge::Status GEInitialize_api(string train_flag = "0", string run_mode_path = "0"); | |||||
| ge::Status GEInitialize_api_new(string train_flag = "infer", string run_mode = "fe"); | |||||
| ge::Status GEFinalize_api(); | |||||
| // constructor session and build graph | |||||
| ge::Session *create_aipp_session(); | |||||
| ge::Session *create_session(); | |||||
| ge::Status session_add_and_run_graph(ge::Session *session, uint32_t graphId, Graph &graph, std::vector<Tensor> inputs, | |||||
| std::vector<Tensor> &outputs); | |||||
| // common interface for infer | |||||
| int RunGraph_initData(Graph &graph, string op_name, map<string, std::vector<int64_t>> attr_test, | |||||
| string train_flag = "infer", string run_mode_path = "fe"); | |||||
| void Inputs_load_Data(string op_name, std::vector<Tensor> &input, map<string, std::vector<int64_t>> attr_test, | |||||
| Format format = ge::FORMAT_NCHW, DataType dt = ge::DT_FLOAT); | |||||
| bool comparaData(std::vector<Tensor> &output, string op_name, map<string, std::vector<int64_t>> attr_test); | |||||
| int RunGraph_readData(Graph &graph, string op_name, map<string, std::vector<int64_t>> attr_test, | |||||
| string train_flag = "infer", string run_mode_path = "fe", Format format = ge::FORMAT_NCHW, | |||||
| DataType dt = ge::DT_FLOAT); | |||||
| // common interface for train | |||||
| int buildCheckPointGraph(Graph &graph, map<string, TensorDesc> variables); | |||||
| int buildInitGraph(Graph &graph, std::vector<TensorDesc> desc_var, std::vector<std::string> name_var, | |||||
| std::vector<float> values_var); | |||||
| int buildInitGraph_other_dataType(Graph &graph, std::vector<TensorDesc> desc_var, std::vector<std::string> name_var); | |||||
| bool build_multi_input_multi_output_graph(Graph &graph); | |||||
| void build_big_graph(Graph &graph, map<string, std::vector<int64_t>> attr); | |||||
| int buildConvGraph_new(Graph &graph, std::vector<TensorDesc> desc_var, std::vector<std::string> name_var, int flag = 2); | |||||
| #endif // ST_RESNET50_GE_COMMON_H_ | |||||
| @@ -1,225 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef ST_RESNET50_PTEST_H_ | |||||
| #define ST_RESNET50_PTEST_H_ | |||||
| #include <stdarg.h> | |||||
| #include <string.h> | |||||
| #include <exception> | |||||
| #include <functional> | |||||
| #include <iostream> | |||||
| #include <list> | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| namespace ptest { | |||||
| class assertion_error : public std::exception { | |||||
| public: | |||||
| const char *what() const throw() { return "Assertion Exception"; } | |||||
| }; | |||||
| class TestFixture { | |||||
| public: | |||||
| virtual void SetUp() {} | |||||
| virtual void TearDown() {} | |||||
| void Run() { _func(); } | |||||
| void BindFunction(std::function<void(void)> function) { _func = function; } | |||||
| void SetName(const std::string &name) { _name = name; } | |||||
| std::string Name() const { return _name; } | |||||
| virtual ~TestFixture() {} | |||||
| private: | |||||
| std::function<void(void)> _func; | |||||
| std::string _name; | |||||
| }; | |||||
| enum TestResult { SUCCESS, FAILED, UNAVAILABLE, UNKNOWN, NOCASEFOUND }; | |||||
| class TestManager { | |||||
| public: | |||||
| static TestManager &GetSingleton() { | |||||
| static TestManager instance; | |||||
| return instance; | |||||
| } | |||||
| void RegisterTest(const std::string &name, TestFixture *fixture) { _testfixtures[name] = fixture; } | |||||
| const std::string GetRunningTestcaseName() const { return _running_testcase_name; } | |||||
| const std::list<std::string> GetAllTestNames() const { | |||||
| std::list<std::string> result; | |||||
| for (auto &t : _testfixtures) { | |||||
| result.push_back(t.first); | |||||
| } | |||||
| return result; | |||||
| } | |||||
| TestResult RunTest(const std::string &name) { | |||||
| if (_testfixtures.find(name) == _testfixtures.end()) { | |||||
| return NOCASEFOUND; | |||||
| } | |||||
| _running_testcase_name = name; | |||||
| do { | |||||
| SetTestResult(name, UNKNOWN); | |||||
| _testfixtures[name]->SetUp(); | |||||
| if (_testresults[name] == FAILED) { | |||||
| _testresults[name] = UNAVAILABLE; | |||||
| break; | |||||
| } | |||||
| SetTestResult(name, SUCCESS); | |||||
| try { | |||||
| _testfixtures[name]->Run(); | |||||
| } catch (assertion_error &e) { | |||||
| // Do nothing as the error has been handled by the TestManager. | |||||
| } | |||||
| _testfixtures[name]->TearDown(); | |||||
| } while (0); | |||||
| return _testresults[name]; | |||||
| } | |||||
| void SetTestResult(const std::string &name, TestResult result) { _testresults[name] = result; } | |||||
| TestResult GetTestResult(const std::string &name) { return _testresults[name]; } | |||||
| private: | |||||
| std::map<std::string, TestFixture *> _testfixtures; | |||||
| std::map<std::string, TestResult> _testresults; | |||||
| std::string _running_testcase_name; | |||||
| }; | |||||
| class TestFixtureRegister { | |||||
| public: | |||||
| TestFixtureRegister(const std::string &name, TestFixture *fixture, std::function<void(void)> function) { | |||||
| fixture->BindFunction(function); | |||||
| fixture->SetName(name); | |||||
| TestManager::GetSingleton().RegisterTest(name, fixture); | |||||
| } | |||||
| }; | |||||
| } // namespace ptest | |||||
| #define _STR(x) #x | |||||
| #define _EMPTY_NAMESPACE | |||||
| #define _TEST(NAMESPACE, FIXTURECLASS, TESTNAME, CASENAME) \ | |||||
| void g_func_##TESTNAME##_##CASENAME(void); \ | |||||
| NAMESPACE::FIXTURECLASS g_fixture_##TESTNAME##_##CASENAME; \ | |||||
| ptest::TestFixtureRegister g_register_##TESTNAME##_##CASENAME( \ | |||||
| _STR(TESTNAME##_##CASENAME), &g_fixture_##TESTNAME##_##CASENAME, g_func_##TESTNAME##_##CASENAME); \ | |||||
| void g_func_##TESTNAME##_##CASENAME(void) | |||||
| #define TEST(TESTNAME, CASENAME) _TEST(ptest, TestFixture, TESTNAME, CASENAME) | |||||
| #define TEST_F(TESTFIXTURE, CASENAME) _TEST(_EMPTY_NAMESPACE, TESTFIXTURE, TESTFIXTURE, CASENAME) | |||||
| #define EXPECT_TRUE(X) \ | |||||
| do { \ | |||||
| if (!(X)) { \ | |||||
| std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ | |||||
| ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ | |||||
| std::cerr << #X << "Expectation Failed\n" \ | |||||
| << "Testcase Name: " << test_name << "\n" \ | |||||
| << "File: " __FILE__ << "\tLine:" << __LINE__ << std::endl; \ | |||||
| } \ | |||||
| } while (0); | |||||
| // With the macro definition ensures that the compiler can detect compiler warning. | |||||
| #define Max_Log_Len 1024 | |||||
| #define PRINT_ERR(lpszFormat, ...) \ | |||||
| do { \ | |||||
| char szTmpBuf[Max_Log_Len + 1] = {0}; \ | |||||
| snprintf(szTmpBuf, Max_Log_Len, lpszFormat, ##__VA_ARGS__); \ | |||||
| std::cerr << szTmpBuf << std::endl; \ | |||||
| } while (0) | |||||
| // Increase the content of print error messages and error to facilitate rapid analysis | |||||
| #define EXPECT_TRUE_C(X, ERR_TYPE, format, ...) \ | |||||
| do { \ | |||||
| if (!(X)) { \ | |||||
| std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ | |||||
| ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ | |||||
| std::cerr << #X << " Expectation Failed." \ | |||||
| << "Testcase Name: " << test_name << " File:" __FILE__ << " Line:" << __LINE__ << std::endl; \ | |||||
| PRINT_ERR("[" ERR_TYPE "]" format, ##__VA_ARGS__); \ | |||||
| } \ | |||||
| } while (0) | |||||
| #define ASSERT_TRUE(X) \ | |||||
| do { \ | |||||
| if (!(X)) { \ | |||||
| std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ | |||||
| ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ | |||||
| std::cerr << #X << "Assertion Failed\n" \ | |||||
| << "Testcase Name: " << test_name << "\n" \ | |||||
| << "File: " __FILE__ << "\tLine:" << __LINE__ << std::endl; \ | |||||
| throw ptest::assertion_error(); \ | |||||
| } \ | |||||
| } while (0); | |||||
| // Add printing error information and error line content for quick analysis | |||||
| #define ASSERT_TRUE_C(X, ERR_TYPE, format, ...) \ | |||||
| do { \ | |||||
| if (!(X)) { \ | |||||
| std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \ | |||||
| ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED); \ | |||||
| std::cerr << #X << " Assertion Failed." \ | |||||
| << "Testcase Name: " << test_name << " File:" __FILE__ << " Line:" << __LINE__ << std::endl; \ | |||||
| PRINT_ERR("[" ERR_TYPE "]" format, ##__VA_ARGS__); \ | |||||
| throw ptest::assertion_error(); \ | |||||
| } \ | |||||
| } while (0); | |||||
| #define CONFIG_ERR "CONFIG_ERR" | |||||
| #define LOAD_MODEL_ERR "LOAD_MODEL_ERR" | |||||
| #define FILE_READ_ERR "FILE_READ_ERR" | |||||
| #define RUN_ERROR "RUN_ERROR" | |||||
| #define MEM_ERROR "MEM_ERROR" | |||||
| #define RESULT_ERR "RESULT_ERR" | |||||
| #define EXPECT_FALSE(X) EXPECT_TRUE(!(X)) | |||||
| #define EXPECT_EQ(X, Y) EXPECT_TRUE(((X) == (Y))) | |||||
| #define EXPECT_NE(X, Y) EXPECT_TRUE(((X) != (Y))) | |||||
| #define EXPECT_GT(X, Y) EXPECT_TRUE(((X) > (Y))) | |||||
| #define EXPECT_GE(X, Y) EXPECT_TRUE(((X) >= (Y))) | |||||
| #define EXPECT_LT(X, Y) EXPECT_TRUE(((X) < (Y))) | |||||
| #define EXPECT_LE(X, Y) EXPECT_TRUE(((X) <= (Y))) | |||||
| #define EXPECT_FALSE_C(X, ERR_TYPE, format, ...) EXPECT_TRUE_C(!(X), ERR_TYPE, format, ##__VA_ARGS__) | |||||
| #define EXPECT_EQ_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) == (Y)), ERR_TYPE, format, ##__VA_ARGS__) | |||||
| #define EXPECT_NE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) != (Y)), ERR_TYPE, format, ##__VA_ARGS__) | |||||
| #define EXPECT_GT_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) > (Y)), ERR_TYPE, format, ##__VA_ARGS__) | |||||
| #define EXPECT_GE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) >= (Y)), ERR_TYPE, format, ##__VA_ARGS__) | |||||
| #define EXPECT_LT_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) < (Y)), ERR_TYPE, format, ##__VA_ARGS__) | |||||
| #define EXPECT_LE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) <= (Y)), ERR_TYPE, format, ##__VA_ARGS__) | |||||
| #define ASSERT_FALSE(X) ASSERT_TRUE(!(X)) | |||||
| #define ASSERT_EQ(X, Y) ASSERT_TRUE(((X) == (Y))) | |||||
| #define ASSERT_NE(X, Y) ASSERT_TRUE(((X) != (Y))) | |||||
| #define ASSERT_GT(X, Y) ASSERT_TRUE(((X) > (Y))) | |||||
| #define ASSERT_GE(X, Y) ASSERT_TRUE(((X) >= (Y))) | |||||
| #define ASSERT_LT(X, Y) ASSERT_TRUE(((X) < (Y))) | |||||
| #define ASSERT_LE(X, Y) ASSERT_TRUE(((X) <= (Y))) | |||||
| #define ASSERT_FALSE_C(X, ERR_TYPE, format, ...) ASSERT_TRUE_C(!(X), ERR_TYPE, format, ##__VA_ARGS__) | |||||
| #define ASSERT_EQ_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) == (Y)), ERR_TYPE, format, ##__VA_ARGS__) | |||||
| #define ASSERT_NE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) != (Y)), ERR_TYPE, format, ##__VA_ARGS__) | |||||
| #define ASSERT_GT_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) > (Y)), ERR_TYPE, format, ##__VA_ARGS__) | |||||
| #define ASSERT_GE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) >= (Y)), ERR_TYPE, format, ##__VA_ARGS__) | |||||
| #define ASSERT_LT_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) < (Y)), ERR_TYPE, format, ##__VA_ARGS__) | |||||
| #define ASSERT_LE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) <= (Y)), ERR_TYPE, format, ##__VA_ARGS__) | |||||
| #endif // ST_RESNET50_PTEST_H_ | |||||
| @@ -1,852 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <assert.h> | |||||
| #include <sys/stat.h> | |||||
| #include <sys/types.h> | |||||
| #include <algorithm> | |||||
| #include <chrono> | |||||
| #include <ctime> | |||||
| #include <sstream> | |||||
| #include "common.h" | |||||
| #include "ge_api.h" | |||||
| #include "graph.h" | |||||
| #include "ops/all_ops.h" | |||||
| #include "types.h" | |||||
| #include "utils/tensor_utils.h" | |||||
| using namespace std; | |||||
| using namespace ge; | |||||
| using namespace op; | |||||
| typedef bool (*Func)(Graph &graph); | |||||
| #define PADDING_MODE 6 | |||||
| #define GRAD_PADDING_MODE 3 | |||||
| vector<int64_t> pad_1{1, 1, 1, 1}; | |||||
| vector<int64_t> pad_0{0, 0, 0, 0}; | |||||
| vector<int64_t> stride_1{1, 1}; | |||||
| vector<int64_t> stride_2{2, 2}; | |||||
| // (int out_channels, int h, int w, vector<uint_64> stride{1,1}, vector<uint_64> pad{1,1,1,1}, op::Data() input) | |||||
| #define GENERATE_CONV_VAR(LAYER, BLK, OPNUM, in_channels, out_channels, h, w, stride, pad, input) \ | |||||
| auto &LAYER##_##BLK##_##OPNUM##_input = input; \ | |||||
| \ | |||||
| TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({out_channels, in_channels, h, w}), FORMAT_NCHW, DT_FLOAT); \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_weight = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_weight"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_mom_weight = \ | |||||
| op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_weight"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_mom_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| LAYER##_##BLK##_##OPNUM##_mom_weight.update_input_desc_x(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| cout << string(#LAYER) + string(#BLK) + string(#OPNUM) << "'s weight shape is:" << in_channels << out_channels << h \ | |||||
| << w << endl; \ | |||||
| cout << string(#LAYER) + string(#BLK) + string(#OPNUM) \ | |||||
| << "'s input_x op's shape is:" << input.GetOutputDesc("y").GetShape().GetDim(2) << endl; \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_tmp_dims = input.GetOutputDesc("y").GetShape().GetDims(); \ | |||||
| for (auto LAYER##_##BLK##_##OPNUM##_tmp_it = LAYER##_##BLK##_##OPNUM##_tmp_dims.begin(); \ | |||||
| LAYER##_##BLK##_##OPNUM##_tmp_it != LAYER##_##BLK##_##OPNUM##_tmp_dims.end(); \ | |||||
| LAYER##_##BLK##_##OPNUM##_tmp_it++) { \ | |||||
| cout << *LAYER##_##BLK##_##OPNUM##_tmp_it; \ | |||||
| } \ | |||||
| cout << endl; \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM = op::Conv2D(string(#LAYER) + string(#BLK) + string(#OPNUM)) \ | |||||
| .set_input_x(input, "y") \ | |||||
| .set_input_filter(LAYER##_##BLK##_##OPNUM##_weight) \ | |||||
| .set_attr_strides({1, 1, stride[0], stride[1]}) \ | |||||
| .set_attr_pads(pad) \ | |||||
| .set_attr_data_format("NCHW"); \ | |||||
| update_op_format(LAYER##_##BLK##_##OPNUM); | |||||
| #define GENERATE_CONSTANT(LAYER, BLK, OPNUM, CONSTNAME) \ | |||||
| Tensor LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor; \ | |||||
| float *LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data = new float[LAYER##_##BLK##_##OPNUM##_size]; \ | |||||
| for (int i = 0; i < (int)LAYER##_##BLK##_##OPNUM##_size; i++) { \ | |||||
| *(LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data + i) = 0.01; \ | |||||
| } \ | |||||
| LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor.SetData((uint8_t *)LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data, \ | |||||
| LAYER##_##BLK##_##OPNUM##_size * sizeof(float)); \ | |||||
| LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor.SetTensorDesc(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_constant = \ | |||||
| op::Constant().set_attr_value(LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor); \ | |||||
| LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_constant.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| delete[] LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data; | |||||
| #define GENERATE_CONV_VAR_VAR(LAYER, BLK, OPNUM, in_channels, out_channels, h, w, stride, pad, input) \ | |||||
| TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({out_channels, in_channels, h, w}), FORMAT_NCHW, DT_FLOAT); \ | |||||
| uint32_t LAYER##_##BLK##_##OPNUM##_size = LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetShapeSize(); \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_weight = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_weight"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_mom_weight = \ | |||||
| op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_weight"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_mom_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| GENERATE_CONSTANT(LAYER, BLK, OPNUM, weight); \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_weight_assign = op::Assign() \ | |||||
| .set_input_ref(LAYER##_##BLK##_##OPNUM##_weight) \ | |||||
| .set_input_value(LAYER##_##BLK##_##OPNUM##_weight_constant); \ | |||||
| \ | |||||
| GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_weight); \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_mom_weight_assign = \ | |||||
| op::Assign() \ | |||||
| .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_weight) \ | |||||
| .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_weight_constant); \ | |||||
| \ | |||||
| input.push_back(LAYER##_##BLK##_##OPNUM##_weight); \ | |||||
| input.push_back(LAYER##_##BLK##_##OPNUM##_mom_weight); | |||||
| // (int out_channels, Operator& input) | |||||
| #define GENERATE_BN_VAR(LAYER, BLK, OPNUM, out_channels, input) \ | |||||
| auto &LAYER##_##BLK##_##OPNUM##_input = input; \ | |||||
| \ | |||||
| TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({1, out_channels, 1, 1}), FORMAT_NCHW, DT_FLOAT); \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_scale = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_scale"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_mom_scale = \ | |||||
| op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_scale"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_mom_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_b"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_mom_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_b"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_mom_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_mean = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mean"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_mean.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_variance = \ | |||||
| op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_variance"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_variance.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM = op::FusedBatchNorm(string(#LAYER) + string(#BLK) + string(#OPNUM)) \ | |||||
| .set_input_x(input, "y") \ | |||||
| .set_input_scale(LAYER##_##BLK##_##OPNUM##_scale) \ | |||||
| .set_input_b(LAYER##_##BLK##_##OPNUM##_b) \ | |||||
| .set_input_mean(LAYER##_##BLK##_##OPNUM##_mean) \ | |||||
| .set_input_variance(LAYER##_##BLK##_##OPNUM##_variance) \ | |||||
| .set_attr_mode(1) \ | |||||
| .set_attr_epsilon(1e-5) \ | |||||
| .set_attr_is_training(true); | |||||
| #define GENERATE_BN_VAR_VAR(LAYER, BLK, OPNUM, out_channels, input) \ | |||||
| TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({1, out_channels, 1, 1}), FORMAT_NCHW, DT_FLOAT); \ | |||||
| uint32_t LAYER##_##BLK##_##OPNUM##_size = LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetShapeSize(); \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_scale = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_scale"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_mom_scale = \ | |||||
| op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_scale"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_mom_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_b"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_mom_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_b"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_mom_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_mean = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mean"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_mean.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_variance = \ | |||||
| op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_variance"); \ | |||||
| LAYER##_##BLK##_##OPNUM##_variance.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc); \ | |||||
| \ | |||||
| GENERATE_CONSTANT(LAYER, BLK, OPNUM, scale); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_scale_assign = op::Assign() \ | |||||
| .set_input_ref(LAYER##_##BLK##_##OPNUM##_scale) \ | |||||
| .set_input_value(LAYER##_##BLK##_##OPNUM##_scale_constant); \ | |||||
| GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_scale); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_mom_scale_assign = \ | |||||
| op::Assign() \ | |||||
| .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_scale) \ | |||||
| .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_scale_constant); \ | |||||
| \ | |||||
| GENERATE_CONSTANT(LAYER, BLK, OPNUM, b); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_b_assign = \ | |||||
| op::Assign().set_input_ref(LAYER##_##BLK##_##OPNUM##_b).set_input_value(LAYER##_##BLK##_##OPNUM##_b_constant); \ | |||||
| \ | |||||
| GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_b); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_mom_b_assign = op::Assign() \ | |||||
| .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_b) \ | |||||
| .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_b_constant); \ | |||||
| GENERATE_CONSTANT(LAYER, BLK, OPNUM, mean); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_mean_assign = op::Assign() \ | |||||
| .set_input_ref(LAYER##_##BLK##_##OPNUM##_mean) \ | |||||
| .set_input_value(LAYER##_##BLK##_##OPNUM##_mean_constant); \ | |||||
| \ | |||||
| GENERATE_CONSTANT(LAYER, BLK, OPNUM, variance); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_variance_assign = op::Assign() \ | |||||
| .set_input_ref(LAYER##_##BLK##_##OPNUM##_variance) \ | |||||
| .set_input_value(LAYER##_##BLK##_##OPNUM##_variance_constant); \ | |||||
| \ | |||||
| input.push_back(LAYER##_##BLK##_##OPNUM##_scale); \ | |||||
| input.push_back(LAYER##_##BLK##_##OPNUM##_mom_scale); \ | |||||
| input.push_back(LAYER##_##BLK##_##OPNUM##_b); \ | |||||
| input.push_back(LAYER##_##BLK##_##OPNUM##_mom_b); \ | |||||
| input.push_back(LAYER##_##BLK##_##OPNUM##_mean); \ | |||||
| input.push_back(LAYER##_##BLK##_##OPNUM##_variance); | |||||
| // (int out_channels, Operator& input) | |||||
| #define GENERATE_RELU_VAR(LAYER, BLK, OPNUM, input) \ | |||||
| auto &LAYER##_##BLK##_##OPNUM##_input = input; \ | |||||
| auto LAYER##_##BLK##_##OPNUM = op::Relu(string(#LAYER) + string(#BLK) + string(#OPNUM)).set_input_x(input, "y"); | |||||
| // (int out_channels, Operator& input) | |||||
| #define GENERATE_MAXPOOL_VAR(LAYER, BLK, OPNUM, input) \ | |||||
| auto &LAYER##_##BLK##_##OPNUM##_input = input; \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM = op::MaxPoolWithArgmax(string(#LAYER) + string(#BLK) + string(#OPNUM)) \ | |||||
| .set_input_x(input, "y") \ | |||||
| .set_attr_ksize({1, 3, 3, 1}) \ | |||||
| .set_attr_padding("SAME") \ | |||||
| .set_attr_strides({1, 2, 2, 1}); | |||||
| // (int out_channels, Operator& input) | |||||
| #define GENERATE_ADD_VAR(LAYER, BLK, OPNUM, input_x1, input_x2) \ | |||||
| auto LAYER##_##BLK##_##OPNUM = \ | |||||
| op::Add(string(#LAYER) + string(#BLK) + string(#OPNUM)).set_input_x1(input_x1, "y").set_input_x2(input_x2, "y"); | |||||
| // (int in_channels, int out_channels,vector<int64_t> stride{1,1}, Operator& input) | |||||
| #define MAKE_RESIDUAL_BLOCK(LAYER, BLK, in_channels, out_channels, stride, input) \ | |||||
| auto &LAYER##_##BLK##_input = input; \ | |||||
| auto &LAYER##_##BLK##_stride = stride; \ | |||||
| int LAYER##_##BLK##_out_chls = out_channels / 4; \ | |||||
| \ | |||||
| GENERATE_CONV_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ | |||||
| GENERATE_BN_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv1); \ | |||||
| GENERATE_RELU_VAR(LAYER, BLK, relu1, LAYER##_##BLK##_bn1); \ | |||||
| \ | |||||
| GENERATE_CONV_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ | |||||
| LAYER##_##BLK##_relu1); \ | |||||
| GENERATE_BN_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv2); \ | |||||
| GENERATE_RELU_VAR(LAYER, BLK, relu2, LAYER##_##BLK##_bn2); \ | |||||
| \ | |||||
| GENERATE_CONV_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, \ | |||||
| LAYER##_##BLK##_relu2); \ | |||||
| GENERATE_BN_VAR(LAYER, BLK, bn3, out_channels, LAYER##_##BLK##_conv3); \ | |||||
| \ | |||||
| GENERATE_CONV_VAR(LAYER, BLK, conv4, in_channels, out_channels, 1, 1, stride, pad_0, input); \ | |||||
| GENERATE_BN_VAR(LAYER, BLK, bn4, out_channels, LAYER##_##BLK##_conv4); \ | |||||
| \ | |||||
| GENERATE_ADD_VAR(LAYER, BLK, add5, LAYER##_##BLK##_bn3, LAYER##_##BLK##_bn4); \ | |||||
| GENERATE_RELU_VAR(LAYER, BLK, relu5, LAYER##_##BLK##_add5); \ | |||||
| \ | |||||
| auto &LAYER##_##BLK##_output = LAYER##_##BLK##_relu5; \ | |||||
| auto &LAYER##_##BLK##_output_label = "y"; | |||||
| #define MAKE_RESIDUAL_BLOCK_VAR(LAYER, BLK, in_channels, out_channels, stride, input) \ | |||||
| int LAYER##_##BLK##_out_chls = out_channels / 4; \ | |||||
| GENERATE_CONV_VAR_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ | |||||
| GENERATE_BN_VAR_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, input); \ | |||||
| \ | |||||
| GENERATE_CONV_VAR_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ | |||||
| input); \ | |||||
| GENERATE_BN_VAR_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, input); \ | |||||
| \ | |||||
| GENERATE_CONV_VAR_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, input); \ | |||||
| GENERATE_BN_VAR_VAR(LAYER, BLK, bn3, out_channels, input); \ | |||||
| \ | |||||
| GENERATE_CONV_VAR_VAR(LAYER, BLK, conv4, in_channels, out_channels, 1, 1, stride, pad_0, input); \ | |||||
| GENERATE_BN_VAR_VAR(LAYER, BLK, bn4, out_channels, input); | |||||
| // (int in_channels, int out_channels,vector<int64_t> stride{1,1}, Operator& input) | |||||
| #define MAKE_NORMAL_BLOCK(LAYER, BLK, in_channels, out_channels, stride, input) \ | |||||
| auto &LAYER##_##BLK##_input = input; \ | |||||
| auto &LAYER##_##BLK##_stride = stride; \ | |||||
| int LAYER##_##BLK##_out_chls = out_channels / 4; \ | |||||
| \ | |||||
| GENERATE_CONV_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ | |||||
| GENERATE_BN_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv1); \ | |||||
| GENERATE_RELU_VAR(LAYER, BLK, relu1, LAYER##_##BLK##_bn1); \ | |||||
| \ | |||||
| GENERATE_CONV_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ | |||||
| LAYER##_##BLK##_relu1); \ | |||||
| GENERATE_BN_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv2); \ | |||||
| GENERATE_RELU_VAR(LAYER, BLK, relu2, LAYER##_##BLK##_bn2); \ | |||||
| \ | |||||
| GENERATE_CONV_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, \ | |||||
| LAYER##_##BLK##_relu2); \ | |||||
| GENERATE_BN_VAR(LAYER, BLK, bn3, out_channels, LAYER##_##BLK##_conv3); \ | |||||
| \ | |||||
| GENERATE_ADD_VAR(LAYER, BLK, add5, LAYER##_##BLK##_bn3, input); \ | |||||
| GENERATE_RELU_VAR(LAYER, BLK, relu5, LAYER##_##BLK##_add5); \ | |||||
| \ | |||||
| auto &LAYER##_##BLK##_output = LAYER##_##BLK##_relu5; \ | |||||
| auto &LAYER##_##BLK##_output_label = "y"; | |||||
| #define MAKE_NORMAL_BLOCK_VAR(LAYER, BLK, in_channels, out_channels, stride, input) \ | |||||
| int LAYER##_##BLK##_out_chls = out_channels / 4; \ | |||||
| GENERATE_CONV_VAR_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input); \ | |||||
| GENERATE_BN_VAR_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, input); \ | |||||
| \ | |||||
| GENERATE_CONV_VAR_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \ | |||||
| input); \ | |||||
| GENERATE_BN_VAR_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, input); \ | |||||
| \ | |||||
| GENERATE_CONV_VAR_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, input); \ | |||||
| GENERATE_BN_VAR_VAR(LAYER, BLK, bn3, out_channels, input); | |||||
| // (int in_channels, int out_channels,vector<int64_t> stride{1,1}, Operator& input) | |||||
| #define MAKE_RESIDUAL_LAYER(LAYER, in_channels, out_channels, stride, input) \ | |||||
| MAKE_RESIDUAL_BLOCK(LAYER, blk1, in_channels, out_channels, stride, input); \ | |||||
| \ | |||||
| auto &LAYER##_output = LAYER##_blk1_output; \ | |||||
| auto &LAYER##_output_label = LAYER##_blk1_output_label; | |||||
| #define MAKE_RESIDUAL_LAYER_VAR(LAYER, in_channels, out_channels, stride, input) \ | |||||
| MAKE_RESIDUAL_BLOCK_VAR(LAYER, blk1, in_channels, out_channels, stride, input); | |||||
| // (int in_channels, int out_channels,vector<int64_t> stride{1,1}, Operator& input) | |||||
| #define MAKE_NORMAL_LAYER(LAYER, in_channels, out_channels, stride, input) \ | |||||
| MAKE_NORMAL_BLOCK(LAYER, blk1, in_channels, out_channels, stride, input); \ | |||||
| \ | |||||
| auto &LAYER##_output = LAYER##_blk1_output; \ | |||||
| auto &LAYER##_output_label = LAYER##_blk1_output_label; | |||||
| #define MAKE_NORMAL_LAYER_VAR(LAYER, in_channels, out_channels, stride, input) \ | |||||
| MAKE_NORMAL_BLOCK_VAR(LAYER, blk1, in_channels, out_channels, stride, input); | |||||
| #define MAKE_RESNET50(input) \ | |||||
| MAKE_RESIDUAL_LAYER(layer1, 64, 256, stride_1, input) \ | |||||
| MAKE_NORMAL_LAYER(layer2, 256, 256, stride_1, layer1_output) \ | |||||
| MAKE_NORMAL_LAYER(layer3, 256, 256, stride_1, layer2_output) \ | |||||
| MAKE_RESIDUAL_LAYER(layer4, 256, 512, stride_2, layer3_output) \ | |||||
| MAKE_NORMAL_LAYER(layer5, 512, 512, stride_1, layer4_output) \ | |||||
| MAKE_NORMAL_LAYER(layer6, 512, 512, stride_1, layer5_output) \ | |||||
| MAKE_NORMAL_LAYER(layer7, 512, 512, stride_1, layer6_output) \ | |||||
| MAKE_RESIDUAL_LAYER(layer8, 512, 1024, stride_2, layer7_output) \ | |||||
| MAKE_NORMAL_LAYER(layer9, 1024, 1024, stride_1, layer8_output) \ | |||||
| MAKE_NORMAL_LAYER(layer10, 1024, 1024, stride_1, layer9_output) \ | |||||
| MAKE_NORMAL_LAYER(layer11, 1024, 1024, stride_1, layer10_output) \ | |||||
| MAKE_NORMAL_LAYER(layer12, 1024, 1024, stride_1, layer11_output) \ | |||||
| MAKE_NORMAL_LAYER(layer13, 1024, 1024, stride_1, layer12_output) \ | |||||
| MAKE_RESIDUAL_LAYER(layer14, 1024, 2048, stride_2, layer13_output) \ | |||||
| MAKE_NORMAL_LAYER(layer15, 2048, 2048, stride_1, layer14_output) \ | |||||
| MAKE_NORMAL_LAYER(layer16, 2048, 2048, stride_1, layer15_output) \ | |||||
| \ | |||||
| auto &resnet50_output = layer16_output; \ | |||||
| auto &resnet50_output_label = layer16_output_label; | |||||
| #define MAKE_RESNET50_VAR(inputs) \ | |||||
| MAKE_RESIDUAL_LAYER_VAR(layer1, 64, 256, stride_1, inputs) \ | |||||
| MAKE_NORMAL_LAYER_VAR(layer2, 256, 256, stride_1, inputs) \ | |||||
| MAKE_NORMAL_LAYER_VAR(layer3, 256, 256, stride_1, inputs) \ | |||||
| MAKE_RESIDUAL_LAYER_VAR(layer4, 256, 512, stride_2, inputs) \ | |||||
| MAKE_NORMAL_LAYER_VAR(layer5, 512, 512, stride_1, inputs) \ | |||||
| MAKE_NORMAL_LAYER_VAR(layer6, 512, 512, stride_1, inputs) \ | |||||
| MAKE_NORMAL_LAYER_VAR(layer7, 512, 512, stride_1, inputs) \ | |||||
| MAKE_RESIDUAL_LAYER_VAR(layer8, 512, 1024, stride_2, inputs) \ | |||||
| MAKE_NORMAL_LAYER_VAR(layer9, 1024, 1024, stride_1, inputs) \ | |||||
| MAKE_NORMAL_LAYER_VAR(layer10, 1024, 1024, stride_1, inputs) \ | |||||
| MAKE_NORMAL_LAYER_VAR(layer11, 1024, 1024, stride_1, inputs) \ | |||||
| MAKE_NORMAL_LAYER_VAR(layer12, 1024, 1024, stride_1, inputs) \ | |||||
| MAKE_NORMAL_LAYER_VAR(layer13, 1024, 1024, stride_1, inputs) \ | |||||
| MAKE_RESIDUAL_LAYER_VAR(layer14, 1024, 2048, stride_2, inputs) \ | |||||
| MAKE_NORMAL_LAYER_VAR(layer15, 2048, 2048, stride_1, inputs) \ | |||||
| MAKE_NORMAL_LAYER_VAR(layer16, 2048, 2048, stride_1, inputs) \ | |||||
| //--------------------------------------------------------------------------------------------- | |||||
| // (Operator& input) | |||||
| #define GENERATE_BIASADD_GRAD(LAYER, BLK, OPNUM, input) \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_grad = \ | |||||
| op::BiasAddGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ | |||||
| .set_input_x(input, input.name_out_dx()); | |||||
| // (Operator& input) | |||||
| #define GENERATE_MATMUL_GRAD(LAYER, BLK, OPNUM, input) \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_grad = \ | |||||
| op::MatMul(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")).set_input_x1(input); | |||||
| // (Operator& input) | |||||
| #define GENERATE_RESHAPE_GRAD(LAYER, BLK, OPNUM, input) \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_grad = \ | |||||
| op::Reshape(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")).set_input_tensor(input); | |||||
| // (Operator& input_grad, Operator& input_maxpool) | |||||
| #define GENERATE_MAXPOOL_GRAD(LAYER, BLK, OPNUM, input_grad, input_maxpool) \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_grad = \ | |||||
| op::MaxPoolGradWithArgmax(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ | |||||
| .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \ | |||||
| .set_input_grad(input_grad) \ | |||||
| .set_input_argmax(input_maxpool, input_maxpool.name_out_argmax()) \ | |||||
| .set_attr_ksize({1, 1, 3, 3}) \ | |||||
| .set_attr_strides({1, 1, 2, 2}) \ | |||||
| .set_attr_padding("SAME"); | |||||
| // (Operator& input_dy) | |||||
| #define GENERATE_RELU_GRAD(LAYER, BLK, OPNUM, input_dy, dy_label) \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_grad = op::ReluGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ | |||||
| .set_input_gradients(input_dy, dy_label) \ | |||||
| .set_input_features(LAYER##_##BLK##_##OPNUM, "y"); | |||||
| // (Operator& input_dy) | |||||
| #define GENERATE_BN_GRAD(LAYER, BLK, OPNUM, input_dy) \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_grad = \ | |||||
| op::FusedBatchNormGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ | |||||
| .set_input_dy(input_dy, "backprops") \ | |||||
| .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \ | |||||
| .set_input_scale(LAYER##_##BLK##_##OPNUM##_scale) \ | |||||
| .set_input_save_mean(LAYER##_##BLK##_##OPNUM, "save_mean") \ | |||||
| .set_input_save_inv_variance(LAYER##_##BLK##_##OPNUM, "save_inv_variance") \ | |||||
| .set_attr_epsilon(0.0001); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_momentum_scale = \ | |||||
| op::ApplyMomentum() \ | |||||
| .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_scale) \ | |||||
| .set_input_grad(LAYER##_##BLK##_##OPNUM##_grad, LAYER##_##BLK##_##OPNUM##_grad.name_out_bn_scale()) \ | |||||
| .set_input_lr(label1) \ | |||||
| .set_input_momentum(label1) \ | |||||
| .set_input_var(LAYER##_##BLK##_##OPNUM##_scale); \ | |||||
| \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_momentum_b = \ | |||||
| op::ApplyMomentum() \ | |||||
| .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_b) \ | |||||
| .set_input_grad(LAYER##_##BLK##_##OPNUM##_grad, LAYER##_##BLK##_##OPNUM##_grad.name_out_bn_bias()) \ | |||||
| .set_input_lr(label1) \ | |||||
| .set_input_momentum(label1) \ | |||||
| .set_input_var(LAYER##_##BLK##_##OPNUM##_b); | |||||
| // (Operator& input) | |||||
| #define GENERATE_CONV_PROP_FILTER(LAYER, BLK, OPNUM, input_bngrad, stride) \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_propfilter = \ | |||||
| op::Conv2DBackpropFilterD(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("_propfilter")) \ | |||||
| .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y") \ | |||||
| .set_attr_filter_size(LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetDims()) \ | |||||
| .set_input_out_backprop(input_bngrad, input_bngrad.name_out_dx()) \ | |||||
| .set_attr_strides(stride) \ | |||||
| .set_attr_pads({1, 1, 1, 1}); \ | |||||
| \ | |||||
| update_op_format(LAYER##_##BLK##_##OPNUM##_propfilter); \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_momentum_weight = op::ApplyMomentum() \ | |||||
| .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_weight) \ | |||||
| .set_input_grad(LAYER##_##BLK##_##OPNUM##_propfilter) \ | |||||
| .set_input_lr(label1) \ | |||||
| .set_input_momentum(label1) \ | |||||
| .set_input_var(LAYER##_##BLK##_##OPNUM##_weight); | |||||
| ///.set_attr_input_size({input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(0),LAYER##_##BLK##_##OPNUM##_weight.GetOutputDesc().GetShape().GetDim(1), | |||||
| ///input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(2)*stride[2], | |||||
| ///input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(3)*stride[3]}) | |||||
| #define GENERATE_CONV_PROP_INPUT(LAYER, BLK, OPNUM, input_bngrad, stride) \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_propinput = \ | |||||
| op::Conv2DBackpropInputD(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("_propinput")) \ | |||||
| .set_attr_input_size(LAYER##_##BLK##_##OPNUM##_input.GetOutputDesc("y").GetShape().GetDims()) \ | |||||
| .set_input_filter(LAYER##_##BLK##_##OPNUM##_weight) \ | |||||
| .set_input_out_backprop(input_bngrad, input_bngrad.name_out_dx()) \ | |||||
| .set_attr_strides(stride) \ | |||||
| .set_attr_pads({1, 1, 1, 1}); \ | |||||
| cout << string(#LAYER) + string(#BLK) + string(#OPNUM) + "_propinput" \ | |||||
| << "'s input_x op's shape is:" << input_bngrad.GetOutputDesc("dx").GetShape().GetDim(3) * stride[3] << endl; \ | |||||
| cout << string(#LAYER) + string(#BLK) + string(#OPNUM) + "_propinput" \ | |||||
| << "'s input_x op's shape is:" << input_bngrad.GetOutputDesc("dx").GetShape().GetDim(2) * stride[2] << endl; \ | |||||
| \ | |||||
| update_op_format(LAYER##_##BLK##_##OPNUM##_propinput); \ | |||||
| auto &LAYER##_##BLK##_##OPNUM##_propinput_label = "y" | |||||
| // (int out_channels, Operator& input) | |||||
| #define GENERATE_ADD_GRAD(LAYER, BLK, OPNUM, input_x1, input_x1_label, input_x2, input_x2_label) \ | |||||
| auto LAYER##_##BLK##_##OPNUM##_grad = op::Add(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \ | |||||
| .set_input_x1(input_x1, input_x1_label) \ | |||||
| .set_input_x2(input_x2, input_x2_label); | |||||
| // (Operator& input) | |||||
| #define MAKE_RESIDUAL_BLOCK_GRAD(LAYER, BLK, input_dy, dy_label) \ | |||||
| GENERATE_RELU_GRAD(LAYER, BLK, relu5, input_dy, dy_label); \ | |||||
| \ | |||||
| GENERATE_BN_GRAD(LAYER, BLK, bn4, LAYER##_##BLK##_relu5_grad); \ | |||||
| GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv4, LAYER##_##BLK##_bn4_grad, LAYER##_##BLK##_stride); \ | |||||
| GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv4, LAYER##_##BLK##_bn4_grad, LAYER##_##BLK##_stride); \ | |||||
| \ | |||||
| GENERATE_BN_GRAD(LAYER, BLK, bn3, LAYER##_##BLK##_relu5_grad); \ | |||||
| GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ | |||||
| GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ | |||||
| \ | |||||
| GENERATE_RELU_GRAD(LAYER, BLK, relu2, LAYER##_##BLK##_conv3_propinput, "y"); \ | |||||
| GENERATE_BN_GRAD(LAYER, BLK, bn2, LAYER##_##BLK##_relu2_grad); \ | |||||
| GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ | |||||
| GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ | |||||
| \ | |||||
| GENERATE_RELU_GRAD(LAYER, BLK, relu1, LAYER##_##BLK##_conv2_propinput, "y"); \ | |||||
| GENERATE_BN_GRAD(LAYER, BLK, bn1, LAYER##_##BLK##_relu1_grad); \ | |||||
| GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ | |||||
| GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ | |||||
| \ | |||||
| GENERATE_ADD_GRAD(LAYER, BLK, add5, LAYER##_##BLK##_conv1_propinput, LAYER##_##BLK##_conv1_propinput_label, \ | |||||
| LAYER##_##BLK##_conv4_propinput, LAYER##_##BLK##_conv4_propinput_label); \ | |||||
| \ | |||||
| auto &LAYER##_##BLK##_grad_output = LAYER##_##BLK##_add5_grad; \ | |||||
| auto &LAYER##_##BLK##_grad_output_label = "y" | |||||
| // (Operator& input) | |||||
| #define MAKE_NORMAL_BLOCK_GRAD(LAYER, BLK, input_dy, dy_label) \ | |||||
| GENERATE_RELU_GRAD(LAYER, BLK, relu5, input_dy, dy_label); \ | |||||
| \ | |||||
| GENERATE_BN_GRAD(LAYER, BLK, bn3, LAYER##_##BLK##_relu5_grad); \ | |||||
| GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ | |||||
| GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1); \ | |||||
| \ | |||||
| GENERATE_RELU_GRAD(LAYER, BLK, relu2, LAYER##_##BLK##_conv3_propinput, "y"); \ | |||||
| GENERATE_BN_GRAD(LAYER, BLK, bn2, LAYER##_##BLK##_relu2_grad); \ | |||||
| GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ | |||||
| GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1); \ | |||||
| \ | |||||
| GENERATE_RELU_GRAD(LAYER, BLK, relu1, LAYER##_##BLK##_conv2_propinput, "y"); \ | |||||
| GENERATE_BN_GRAD(LAYER, BLK, bn1, LAYER##_##BLK##_relu1_grad); \ | |||||
| GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ | |||||
| GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride); \ | |||||
| \ | |||||
| GENERATE_ADD_GRAD(LAYER, BLK, add5, LAYER##_##BLK##_conv1_propinput, LAYER##_##BLK##_conv1_propinput_label, \ | |||||
| input_dy, dy_label); \ | |||||
| \ | |||||
| auto &LAYER##_##BLK##_grad_output = LAYER##_##BLK##_add5_grad; \ | |||||
| auto &LAYER##_##BLK##_grad_output_label = "y" | |||||
| // (Operator& input_dy) | |||||
| #define MAKE_RESIDUAL_LAYER_GRAD(LAYER, input_dy, dy_label) \ | |||||
| MAKE_RESIDUAL_BLOCK_GRAD(LAYER, blk1, input_dy, dy_label); \ | |||||
| \ | |||||
| auto &LAYER##_grad_output = LAYER##_blk1_grad_output; \ | |||||
| auto &LAYER##_grad_output_label = LAYER##_blk1_grad_output_label; | |||||
| // (Operator& input_dy) | |||||
| #define MAKE_NORMAL_LAYER_GRAD(LAYER, input_dy, dy_label) \ | |||||
| MAKE_NORMAL_BLOCK_GRAD(LAYER, blk1, input_dy, dy_label); \ | |||||
| \ | |||||
| auto &LAYER##_grad_output = LAYER##_blk1_grad_output; \ | |||||
| auto &LAYER##_grad_output_label = LAYER##_blk1_grad_output_label; | |||||
| #define MAKE_RESNET50_GRAD(input_dy, dy_label) \ | |||||
| MAKE_NORMAL_LAYER_GRAD(layer16, input_dy, dy_label) \ | |||||
| MAKE_NORMAL_LAYER_GRAD(layer15, layer16_grad_output, layer16_grad_output_label) \ | |||||
| MAKE_RESIDUAL_LAYER_GRAD(layer14, layer15_grad_output, layer15_grad_output_label) \ | |||||
| MAKE_NORMAL_LAYER_GRAD(layer13, layer14_grad_output, layer14_grad_output_label) \ | |||||
| MAKE_NORMAL_LAYER_GRAD(layer12, layer13_grad_output, layer13_grad_output_label) \ | |||||
| MAKE_NORMAL_LAYER_GRAD(layer11, layer12_grad_output, layer12_grad_output_label) \ | |||||
| MAKE_NORMAL_LAYER_GRAD(layer10, layer11_grad_output, layer11_grad_output_label) \ | |||||
| MAKE_NORMAL_LAYER_GRAD(layer9, layer10_grad_output, layer10_grad_output_label) \ | |||||
| MAKE_RESIDUAL_LAYER_GRAD(layer8, layer9_grad_output, layer9_grad_output_label) \ | |||||
| MAKE_NORMAL_LAYER_GRAD(layer7, layer8_grad_output, layer8_grad_output_label) \ | |||||
| MAKE_NORMAL_LAYER_GRAD(layer6, layer7_grad_output, layer7_grad_output_label) \ | |||||
| MAKE_NORMAL_LAYER_GRAD(layer5, layer6_grad_output, layer6_grad_output_label) \ | |||||
| MAKE_RESIDUAL_LAYER_GRAD(layer4, layer5_grad_output, layer5_grad_output_label) \ | |||||
| MAKE_NORMAL_LAYER_GRAD(layer3, layer4_grad_output, layer4_grad_output_label) \ | |||||
| MAKE_NORMAL_LAYER_GRAD(layer2, layer3_grad_output, layer3_grad_output_label) \ | |||||
| MAKE_RESIDUAL_LAYER_GRAD(layer1, layer2_grad_output, layer2_grad_output_label) \ | |||||
| \ | |||||
| auto &resnet50_grad_output = layer1_grad_output; \ | |||||
| auto &resnet50_grad_output_label = layer1_grad_output_label; | |||||
| bool resnet50(Graph &graph) { | |||||
| auto data = op::Data().set_attr_index(0); | |||||
| auto data1 = op::Data().set_attr_index(1); | |||||
| TensorDesc shape_desc(ge::Shape({32, 3, 224, 224}), FORMAT_NCHW, DT_FLOAT); | |||||
| data.update_output_desc_y(shape_desc); | |||||
| TensorDesc desc(ge::Shape({64, 3, 7, 7}), FORMAT_NCHW, DT_FLOAT); | |||||
| auto var = op::Variable("conv2d_var"); | |||||
| var.update_output_desc_y(desc); | |||||
| var.update_input_desc_x(desc); | |||||
| auto varw1 = op::Variable("conv2d_varw1"); | |||||
| varw1.update_output_desc_y(desc); | |||||
| auto conv2d = op::Conv2D("Translate") | |||||
| .set_input_x(data) | |||||
| .set_input_filter(var) | |||||
| .set_attr_strides({1, 1, 2, 2}) | |||||
| .set_attr_pads({2, 3, 2, 3}) | |||||
| .set_attr_data_format("NCHW"); | |||||
| TensorDesc desc_y; | |||||
| desc_y.SetFormat(FORMAT_NCHW); // shape: 32 64 112 112 | |||||
| conv2d.update_output_desc_y(desc_y); | |||||
| TensorDesc desc1(ge::Shape({1, 64, 1, 1}), FORMAT_NCHW, DT_FLOAT); | |||||
| auto var1 = op::Variable("bn_var1"); | |||||
| var1.update_output_desc_y(desc1); | |||||
| auto var2 = op::Variable("bn_var2"); | |||||
| var2.update_output_desc_y(desc1); | |||||
| auto var3 = op::Variable("bn_var3"); | |||||
| var3.update_output_desc_y(desc1); | |||||
| auto var4 = op::Variable("bn_var4"); | |||||
| var4.update_output_desc_y(desc1); | |||||
| TensorDesc desc2(ge::Shape({2048, 1001}), FORMAT_NCHW, DT_FLOAT); | |||||
| auto var5 = op::Variable("var5"); | |||||
| var5.update_output_desc_y(desc2); | |||||
| auto var6 = op::Variable("var6"); | |||||
| var6.update_output_desc_y(desc2); | |||||
| TensorDesc desclabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); | |||||
| auto label1 = op::Variable("label1"); | |||||
| label1.update_output_desc_y(desclabel); | |||||
| TensorDesc descmatlabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); | |||||
| auto matvar = op::Variable("matvar"); | |||||
| matvar.update_output_desc_y(descmatlabel); | |||||
| auto matvar1 = op::Variable("matvar1"); | |||||
| matvar1.update_output_desc_y(descmatlabel); | |||||
| auto bn = op::FusedBatchNorm() | |||||
| .set_input_x(conv2d, "y") | |||||
| .set_input_scale(var1) | |||||
| .set_input_b(var2) | |||||
| .set_input_mean(var3) | |||||
| .set_input_variance(var4) | |||||
| .set_attr_mode(1) | |||||
| .set_attr_epsilon(1e-5) | |||||
| .set_attr_is_training(true) | |||||
| .set_attr_is_training_fusion(true) | |||||
| .set_attr_moving_average_fraction(994352128); | |||||
| auto relu = op::Relu().set_input_x(bn, "y"); | |||||
| auto maxpool = op::MaxPoolWithArgmax() | |||||
| .set_input_x(relu, "y") | |||||
| .set_attr_ksize({1, 3, 3, 1}) | |||||
| .set_attr_padding("SAME") | |||||
| .set_attr_strides({1, 2, 2, 1}); | |||||
| MAKE_RESNET50(maxpool); | |||||
| std::vector<Operator> inputs{data}; //,var,var1,layer1_blk1_bn1_b,var3,var4}; | |||||
| std::vector<Operator> outputs{}; | |||||
| graph.SetInputs(inputs).SetOutputs(outputs); | |||||
| return true; | |||||
| } | |||||
| #define GENERATE_CONSTANT_USE_DESC(OPNUM, desc, val) \ | |||||
| uint32_t OPNUM##_size = desc.GetShape().GetShapeSize(); \ | |||||
| Tensor OPNUM##_tensor; \ | |||||
| OPNUM##_tensor.SetTensorDesc(desc); \ | |||||
| if (desc.GetDataType() == DT_FLOAT) { \ | |||||
| float *OPNUM##_data = new float[OPNUM##_size]; \ | |||||
| for (int i = 0; i < (int)OPNUM##_size; i++) { \ | |||||
| *(OPNUM##_data + i) = val; \ | |||||
| } \ | |||||
| OPNUM##_tensor.SetData((uint8_t *)OPNUM##_data, OPNUM##_size * sizeof(float)); \ | |||||
| delete[] OPNUM##_data; \ | |||||
| } \ | |||||
| if (desc.GetDataType() == DT_INT64) { \ | |||||
| int64_t *OPNUM##_data = new int64_t[OPNUM##_size]; \ | |||||
| for (int i = 0; i < (int)OPNUM##_size; i++) { \ | |||||
| *(OPNUM##_data + i) = val; \ | |||||
| } \ | |||||
| OPNUM##_tensor.SetData((uint8_t *)OPNUM##_data, OPNUM##_size * sizeof(int64_t)); \ | |||||
| delete[] OPNUM##_data; \ | |||||
| } \ | |||||
| auto OPNUM##_constant = op::Constant().set_attr_value(OPNUM##_tensor); \ | |||||
| OPNUM##_constant.update_output_desc_y(desc); | |||||
| #define GENERATE_VAR_LAYER(OPNUM, desc, input) \ | |||||
| auto OPNUM##_weight = op::Variable(string(#OPNUM)); \ | |||||
| OPNUM##_weight.update_output_desc_y(desc); \ | |||||
| auto OPNUM##_assign = op::Assign().set_input_ref(OPNUM##_weight).set_input_value(OPNUM##_constant); \ | |||||
| \ | |||||
| input.push_back(OPNUM##_weight); | |||||
| #define GENERATE_VAR_LAYER_1(OPNUM, desc, var_format, input, name) \ | |||||
| auto OPNUM##_weight = op::Variable(string(name)); \ | |||||
| OPNUM##_weight.update_output_desc_y(desc); \ | |||||
| auto OPNUM##_assign = op::Assign().set_input_ref(OPNUM##_weight).set_input_value(OPNUM##_constant); \ | |||||
| \ | |||||
| input.push_back(OPNUM##_weight); | |||||
| int BuildInitVarGraph(Graph &graph) { | |||||
| std::vector<Operator> inputs{}; | |||||
| std::vector<Operator> outputs{}; | |||||
| TensorDesc desc(ge::Shape({64, 3, 7, 7}), FORMAT_NCHW, DT_FLOAT); | |||||
| GENERATE_CONSTANT_USE_DESC(conv2d_var, desc, 0.01); | |||||
| GENERATE_VAR_LAYER(conv2d_var, desc, inputs); | |||||
| GENERATE_CONSTANT_USE_DESC(conv2d_varw1, desc, 0.01); | |||||
| GENERATE_VAR_LAYER(conv2d_varw1, desc, inputs); | |||||
| TensorDesc desc1(ge::Shape({1, 64, 1, 1}), FORMAT_NCHW, DT_FLOAT); | |||||
| GENERATE_CONSTANT_USE_DESC(bn_var1, desc1, 0.01); | |||||
| GENERATE_VAR_LAYER(bn_var1, desc1, inputs); | |||||
| GENERATE_CONSTANT_USE_DESC(bn_var2, desc1, 0.01); | |||||
| GENERATE_VAR_LAYER(bn_var2, desc1, inputs); | |||||
| GENERATE_CONSTANT_USE_DESC(bn_var3, desc1, 0.01); | |||||
| GENERATE_VAR_LAYER(bn_var3, desc1, inputs); | |||||
| GENERATE_CONSTANT_USE_DESC(bn_var4, desc1, 0.01); | |||||
| GENERATE_VAR_LAYER(bn_var4, desc1, inputs); | |||||
| TensorDesc desc2(ge::Shape({2048, 1001}), FORMAT_NCHW, DT_FLOAT); | |||||
| GENERATE_CONSTANT_USE_DESC(var5, desc2, 0.01); | |||||
| GENERATE_VAR_LAYER(var5, desc2, inputs); | |||||
| GENERATE_CONSTANT_USE_DESC(var6, desc2, 0.01); | |||||
| GENERATE_VAR_LAYER(var6, desc2, inputs); | |||||
| TensorDesc desclabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); | |||||
| GENERATE_CONSTANT_USE_DESC(label1, desclabel, 0.1); | |||||
| GENERATE_VAR_LAYER(label1, desclabel, inputs); | |||||
| TensorDesc descmatlabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT); | |||||
| GENERATE_CONSTANT_USE_DESC(matvar, descmatlabel, 0.01); | |||||
| GENERATE_VAR_LAYER(matvar, descmatlabel, inputs); | |||||
| GENERATE_CONSTANT_USE_DESC(matvar1, descmatlabel, 0.01); | |||||
| GENERATE_VAR_LAYER(matvar1, descmatlabel, inputs); | |||||
| MAKE_RESNET50_VAR(inputs); | |||||
| TensorDesc ctrl(ge::Shape({1, 1, 1, 1}), FORMAT_NCHW, DT_INT64); | |||||
| GENERATE_CONSTANT_USE_DESC(iterations_per_loop, ctrl, 100); | |||||
| GENERATE_VAR_LAYER_1(iterations_per_loop, ctrl, "4D", inputs, "npu_runconfig/iterations_per_loop"); | |||||
| GENERATE_CONSTANT_USE_DESC(loop_cond, ctrl, 0); | |||||
| GENERATE_VAR_LAYER_1(loop_cond, ctrl, "4D", inputs, "npu_runconfig/loop_cond"); | |||||
| GENERATE_CONSTANT_USE_DESC(one, ctrl, 1); | |||||
| GENERATE_VAR_LAYER_1(one, ctrl, "4D", inputs, "npu_runconfig/one"); | |||||
| GENERATE_CONSTANT_USE_DESC(zero, ctrl, 0); | |||||
| GENERATE_VAR_LAYER_1(zero, ctrl, "4D", inputs, "npu_runconfig/zero"); | |||||
| graph.SetInputs(inputs).SetOutputs(outputs); | |||||
| return 0; | |||||
| } | |||||
| int TestBuildGraphTest(Func fun, Graph &graph, vector<ge::Tensor> &inputs, vector<ge::Tensor> &outputs) { | |||||
| bool graph_ret = fun(graph); | |||||
| ge::Tensor shapeTensor; | |||||
| TensorDesc shape_desc(ge::Shape({32, 3, 224, 224}), FORMAT_NCHW, DT_FLOAT); | |||||
| uint32_t sizeshape = shape_desc.GetShape().GetShapeSize(); | |||||
| printf("[test] desc size filter shape:%u\n", sizeshape); | |||||
| shapeTensor.SetTensorDesc(shape_desc); | |||||
| vector<float> dataValuec; | |||||
| for (int i = 0; i < sizeshape; i++) { | |||||
| dataValuec.push_back(1); | |||||
| } | |||||
| shapeTensor.SetData((uint8_t *)dataValuec.data(), 4 * sizeshape); | |||||
| inputs.push_back(shapeTensor); | |||||
| ge::Tensor shapeTensor1; | |||||
| TensorDesc shape_desc1(ge::Shape({1, 32, 1, 1}), FORMAT_NCHW, DT_FLOAT); | |||||
| uint32_t sizeshape1 = shape_desc1.GetShape().GetShapeSize(); | |||||
| printf("[test] desc size filter shape:%u\n", sizeshape1); | |||||
| shapeTensor1.SetTensorDesc(shape_desc1); | |||||
| vector<int32_t> dataValuec1; | |||||
| for (int i = 0; i < sizeshape1; i++) { | |||||
| dataValuec1.push_back(1); | |||||
| } | |||||
| shapeTensor1.SetData((uint8_t *)dataValuec1.data(), 4 * sizeshape1); | |||||
| return 0; | |||||
| } | |||||
| int runTrainGraph(Func fun, int loopCount) { | |||||
| printf("GE BBIT begin...\n"); | |||||
| std::chrono::system_clock::time_point start = std::chrono::system_clock::now(); | |||||
| std::map<std::string, std::string> ge_options = { | |||||
| {"device_id", "0"}, {"rank_table_file", ""}, {"graphType", "1"}, {"ge.graphRunMode", "2"}}; | |||||
| std::map<std::string, std::string> session_options = {{"a", "b"}, {TRAIN_FLAG, "1"}}; | |||||
| ge::Status ret; | |||||
| // init ge | |||||
| ret = GEInitialize_api_new("train", "fe,plugin"); | |||||
| printf("ge::GEInitialize ret:%d\n", ret); | |||||
| // init session | |||||
| ge::Session session(session_options); | |||||
| int graphId_initvar = 1; | |||||
| ge::Graph graph_initvar("initVarGraph"); | |||||
| bool graph_ret = BuildInitVarGraph(graph_initvar); | |||||
| // session addgraph | |||||
| int graphId = 0; | |||||
| // build graph | |||||
| ge::Graph graph("bigGraph"); | |||||
| std::vector<ge::Tensor> inputs; | |||||
| ge::Tensor outputTensor; | |||||
| std::vector<ge::Tensor> outputs; | |||||
| graph_ret = TestBuildGraphTest(fun, graph, inputs, outputs); | |||||
| printf("TestReluGrad ret:%d\n", graph_ret); | |||||
| ret = session.AddGraph(graphId_initvar, graph_initvar); | |||||
| printf("session.AddVarGraph ret:%d\n", ret); | |||||
| if (ret) return ret; | |||||
| ret = session.AddGraph(graphId, graph); | |||||
| printf("session.AddGraph ret:%d\n", ret); | |||||
| if (ret) return ret; | |||||
| std::vector<ge::Tensor> inputs1; | |||||
| std::vector<ge::Tensor> outputs1; | |||||
| ret = session.RunGraph(graphId_initvar, inputs1, outputs1); | |||||
| if (ret != SUCCESS) { | |||||
| return ret; | |||||
| } | |||||
| // add loop for test of stabilty: | |||||
| for (int i = 0; i < loopCount; i++) { | |||||
| // session rungraph | |||||
| printf("loopCount:%d\n", loopCount); | |||||
| ret = session.RunGraph(graphId, inputs, outputs); | |||||
| printf("session.RunGraph ret:%d\n", ret); | |||||
| if (ret) return ret; | |||||
| // define 99999 as loop forever | |||||
| if (loopCount == 99999) i = 0; | |||||
| } | |||||
| std::chrono::system_clock::time_point end = std::chrono::system_clock::now(); | |||||
| auto millisecondsduration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start); | |||||
| auto ms = millisecondsduration.count(); | |||||
| std::stringstream ss; | |||||
| ss << ms << "ms"; | |||||
| std::string run_time = ss.str(); | |||||
| printf("run time is : %s \n", run_time.c_str()); | |||||
| return 0; | |||||
| } | |||||
| int main(int argc, char *argv[]) { | |||||
| // add loop for test of stabilty: | |||||
| int loopCount = 1; | |||||
| if (argc >= 2) loopCount = atoi(argv[1]); | |||||
| Status ret = SUCCESS; | |||||
| ret = runTrainGraph(resnet50, loopCount); | |||||
| if (ret == SUCCESS) { | |||||
| std::cout << "[train resnet50 success]" << std::endl; | |||||
| } else { | |||||
| std::cout << "!!! train resnet50 fail !!!" << std::endl; | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| @@ -1,56 +0,0 @@ | |||||
| # Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """ | |||||
| ge st test. | |||||
| """ | |||||
| import pytest | |||||
| import subprocess | |||||
| import os | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| @pytest.mark.env_card | |||||
| @pytest.mark.component_ge | |||||
| def test_resnet50_train(): | |||||
| ge_st_dir=os.environ.get('GE_ST_DIR', | |||||
| '/home/jenkins/workspace/release_pkg/gate/graphengine_lib') | |||||
| ge_lib_dir=os.environ.get('GRAPHENGINE_LIB', '/home/jenkins/workspace/release_pkg/gate/graphengine_lib') | |||||
| real_pythonpath=os.environ.get('REAL_PYTHONPATH') | |||||
| pythonpath=os.environ.get('PYTHONPATH') | |||||
| if real_pythonpath: | |||||
| if pythonpath: | |||||
| os.environ['PYTHONPATH']=real_pythonpath+':'+pythonpath | |||||
| else: | |||||
| os.environ['PYTHONPATH']=real_pythonpath | |||||
| print('PYTHONPATH: '+os.environ.get('PYTHONPATH')) | |||||
| os.environ['ASCEND_OPP_PATH']='/usr/local/Ascend/opp' | |||||
| os.environ['ASCEND_ENGINE_PATH']='/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:' \ | |||||
| '/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libfe.so:' \ | |||||
| '/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/librts_engine.so:'+ \ | |||||
| ge_lib_dir + '/libge_local_engine.so' | |||||
| print('ASCEND_OPP_PATH: '+os.environ.get('ASCEND_OPP_PATH')) | |||||
| print('ASCEND_ENGINE_PATH: '+os.environ.get('ASCEND_ENGINE_PATH')) | |||||
| print('LD_LIBRARY_PATH: '+os.environ.get('LD_LIBRARY_PATH')) | |||||
| cmd=ge_st_dir + '/st_resnet50_train' | |||||
| print('cmd: '+cmd) | |||||
| os.environ['SLOG_PRINT_TO_STDOUT']="1" | |||||
| ret=subprocess.call([cmd], shell=True) | |||||
| assert ret==0 | |||||