| @@ -77,6 +77,15 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session { | |||||
| /// | /// | ||||
| Status RunGraph(uint32_t graphId, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs); | Status RunGraph(uint32_t graphId, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs); | ||||
| /// | |||||
| /// @ingroup ge_graph | |||||
| /// @brief build graph in the session with specific session id | |||||
| /// @param [in] graphId: graph id | |||||
| /// @param [in] inputs: input data | |||||
| /// @return Status result of function | |||||
| /// | |||||
| Status BuildGraph(uint32_t graphId, const std::vector<InputTensorInfo> &inputs); | |||||
| /// | /// | ||||
| /// @ingroup ge_graph | /// @ingroup ge_graph | ||||
| /// @brief run graph in the session with specific session id asynchronously | /// @brief run graph in the session with specific session id asynchronously | ||||
| @@ -157,6 +157,9 @@ const std::string OUTPUT_DATATYPE = "ge.outputDatatype"; | |||||
| // congigure opSelectImplmode to setting op select implmode | // congigure opSelectImplmode to setting op select implmode | ||||
| const std::string OP_SELECT_IMPL_MODE = "ge.opSelectImplmode"; | const std::string OP_SELECT_IMPL_MODE = "ge.opSelectImplmode"; | ||||
| // congigure optypelist_for_implmode to setting which op use implmode | |||||
| const std::string OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode"; | |||||
| // configure whether to enable hcom parallel by session constructor options param, | // configure whether to enable hcom parallel by session constructor options param, | ||||
| // its value should be "0" or "1", default value is "0" | // its value should be "0" or "1", default value is "0" | ||||
| const std::string HCOM_PARALLEL = "ge.hcomParallel"; | const std::string HCOM_PARALLEL = "ge.hcomParallel"; | ||||
| @@ -258,12 +261,12 @@ using RunAsyncCallback = std::function<void(Status, std::vector<ge::OutputTensor | |||||
| namespace ir_option { | namespace ir_option { | ||||
| static const char *const INPUT_FORMAT = "input_format"; | static const char *const INPUT_FORMAT = "input_format"; | ||||
| static const char *const INPUT_SHAPE = "input_shape"; | static const char *const INPUT_SHAPE = "input_shape"; | ||||
| static const char *const OP_NAME_MAP = "op_name_map"; | |||||
| static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; | static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; | ||||
| static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; | static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; | ||||
| static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); | static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); | ||||
| static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); | static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); | ||||
| static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; | static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; | ||||
| static const char *const HEAD_STREAM = ge::HEAD_STREAM.c_str(); | |||||
| static const char *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); | static const char *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); | ||||
| static const char *const CORE_TYPE = ge::CORE_TYPE.c_str(); | static const char *const CORE_TYPE = ge::CORE_TYPE.c_str(); | ||||
| static const char *const SOC_VERSION = ge::SOC_VERSION.c_str(); | static const char *const SOC_VERSION = ge::SOC_VERSION.c_str(); | ||||
| @@ -280,16 +283,20 @@ static const char *const COMPRESS_WEIGHT_CONF = "compress_weight_conf"; | |||||
| static const char *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str(); | static const char *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str(); | ||||
| static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str(); | static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str(); | ||||
| static const char *const LOG_LEVEL = "log"; | static const char *const LOG_LEVEL = "log"; | ||||
| static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str(); | |||||
| // for interface: aclgrphBuildModel | // for interface: aclgrphBuildModel | ||||
| const std::set<std::string> ir_builder_suppported_options = { | const std::set<std::string> ir_builder_suppported_options = { | ||||
| INPUT_FORMAT, INPUT_SHAPE, DYNAMIC_BATCH_SIZE, DYNAMIC_IMAGE_SIZE, | |||||
| INSERT_OP_FILE, OUTPUT_TYPE, BUFFER_OPTIMIZE, ENABLE_COMPRESS_WEIGHT, | |||||
| COMPRESS_WEIGHT_CONF, OUT_NODES, INPUT_FP16_NODES, LOG_LEVEL}; | |||||
| INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, DYNAMIC_BATCH_SIZE, | |||||
| DYNAMIC_IMAGE_SIZE, INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY, | |||||
| AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES, INPUT_FP16_NODES, | |||||
| LOG_LEVEL}; | |||||
| // for interface: aclgrphBuildInitialize | // for interface: aclgrphBuildInitialize | ||||
| const std::set<std::string> global_options = {HEAD_STREAM, | |||||
| CORE_TYPE, | |||||
| const std::set<std::string> global_options = {CORE_TYPE, | |||||
| SOC_VERSION, | SOC_VERSION, | ||||
| BUFFER_OPTIMIZE, | |||||
| ENABLE_COMPRESS_WEIGHT, | |||||
| COMPRESS_WEIGHT_CONF, | |||||
| PRECISION_MODE, | PRECISION_MODE, | ||||
| EXEC_DISABLE_REUSED_MEMORY, | EXEC_DISABLE_REUSED_MEMORY, | ||||
| AUTO_TUNE_MODE, | AUTO_TUNE_MODE, | ||||
| @@ -298,7 +305,8 @@ const std::set<std::string> global_options = {HEAD_STREAM, | |||||
| FUSION_SWITCH_FILE, | FUSION_SWITCH_FILE, | ||||
| ENABLE_SMALL_CHANNEL, | ENABLE_SMALL_CHANNEL, | ||||
| QUANT_OPTIMIZE, | QUANT_OPTIMIZE, | ||||
| OP_SELECT_IMPL_MODE}; | |||||
| OP_SELECT_IMPL_MODE, | |||||
| OPTYPELIST_FOR_IMPLMODE}; | |||||
| } // namespace ir_option | } // namespace ir_option | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -143,6 +143,7 @@ enum Format { | |||||
| FORMAT_DHWNC, | FORMAT_DHWNC, | ||||
| FORMAT_FRACTAL_Z_3D_TRANSPOSE, // 3D filter(transpose) input tensor format | FORMAT_FRACTAL_Z_3D_TRANSPOSE, // 3D filter(transpose) input tensor format | ||||
| FORMAT_FRACTAL_ZN_LSTM, | FORMAT_FRACTAL_ZN_LSTM, | ||||
| FORMAT_FRACTAL_Z_G, | |||||
| FORMAT_RESERVED, | FORMAT_RESERVED, | ||||
| FORMAT_ALL | FORMAT_ALL | ||||
| }; | }; | ||||
| @@ -25,11 +25,12 @@ namespace domi { | |||||
| /// @brief AI framework types | /// @brief AI framework types | ||||
| /// | /// | ||||
| enum FrameworkType { | enum FrameworkType { | ||||
| FMK_TYPE_C = 0, | |||||
| FMK_TYPE_MINDSPORE = 1, | |||||
| FMK_TYPE_T = 3, | |||||
| FMK_TYPE_A_NN, | |||||
| FMK_TYPE_RESERVED, | |||||
| CAFFE = 0, | |||||
| MINDSPORE = 1, | |||||
| TENSORFLOW = 3, | |||||
| ANDROID_NN, | |||||
| ONNX, | |||||
| FRAMEWORK_RESERVED, | |||||
| }; | }; | ||||
| } // namespace domi | } // namespace domi | ||||
| @@ -231,7 +231,7 @@ using cce::ccStatus_t; | |||||
| DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ | DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ | ||||
| exec_expr; \ | exec_expr; \ | ||||
| } \ | } \ | ||||
| } | |||||
| }; | |||||
| // If expr is not RT_ERROR_NONE, print the log and return | // If expr is not RT_ERROR_NONE, print the log and return | ||||
| #define GE_CHK_RT_RET(expr) \ | #define GE_CHK_RT_RET(expr) \ | ||||
| @@ -259,7 +259,7 @@ using cce::ccStatus_t; | |||||
| if (expr) { \ | if (expr) { \ | ||||
| exec_expr; \ | exec_expr; \ | ||||
| } \ | } \ | ||||
| } | |||||
| }; | |||||
| // If make_shared is abnormal, print the log and execute the statement | // If make_shared is abnormal, print the log and execute the statement | ||||
| #define GE_MAKE_SHARED(exec_expr0, exec_expr1) \ | #define GE_MAKE_SHARED(exec_expr0, exec_expr1) \ | ||||
| @@ -280,6 +280,8 @@ GE_ERRORNO_RUNTIME(GE_RTI_CALL_HCCL_REDUCE_SCATTER_FAILED, 47, "call hccl hcom r | |||||
| // Executor module error code definition | // Executor module error code definition | ||||
| GE_ERRORNO_EXECUTOR(GE_EXEC_NOT_INIT, 1, "GE Executor is not yet initialized."); | GE_ERRORNO_EXECUTOR(GE_EXEC_NOT_INIT, 1, "GE Executor is not yet initialized."); | ||||
| GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 2, "GE AIPP is not exist."); | |||||
| GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 3, "GE Dynamic AIPP is not support to query temporarily."); | |||||
| // Generator module error code definition | // Generator module error code definition | ||||
| GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, 1, "Graph manager initialize failed."); | GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, 1, "Graph manager initialize failed."); | ||||
| @@ -33,11 +33,11 @@ enum RuntimeType { HOST = 0, DEVICE = 1 }; | |||||
| enum PerfLevel { GEN_TASK_WITH_FUSION = -1, GEN_TASK_WITHOUT_L2FUSION = 3, GEN_TASK_WITHOUT_FUSION = 4 }; | enum PerfLevel { GEN_TASK_WITH_FUSION = -1, GEN_TASK_WITHOUT_L2FUSION = 3, GEN_TASK_WITHOUT_FUSION = 4 }; | ||||
| enum FrameworkType { | enum FrameworkType { | ||||
| FMK_TYPE_C = 0, | |||||
| FMK_TYPE_MINDSPORE = 1, | |||||
| FMK_TYPE_T = 3, | |||||
| FMK_TYPE_A_NN, | |||||
| FMK_TYPE_RESERVED, | |||||
| CAFFE = 0, | |||||
| MINDSPORE = 1, | |||||
| TENSORFLOW = 3, | |||||
| ANDROID_NN, | |||||
| FRAMEWORK_RESERVED, | |||||
| }; | }; | ||||
| enum OpEngineType { | enum OpEngineType { | ||||
| @@ -111,6 +111,72 @@ struct InputOutputDescInfo { | |||||
| ShapeDescription shape_info; | ShapeDescription shape_info; | ||||
| }; | }; | ||||
| // Definition of model io dims | |||||
| struct InputOutputDims { | |||||
| std::string name; | |||||
| size_t dim_num; | |||||
| uint32_t size; | |||||
| std::vector<int64_t> dims; | |||||
| }; | |||||
| // Definition of model io dims | |||||
| struct OriginInputInfo { | |||||
| Format format; | |||||
| DataType data_type; | |||||
| uint32_t dim_num; | |||||
| }; | |||||
| // The structure of AIPP info | |||||
| struct AippConfigInfo { | |||||
| int8_t input_format; | |||||
| int32_t src_image_size_w; | |||||
| int32_t src_image_size_h; | |||||
| int8_t crop; | |||||
| int32_t load_start_pos_w; | |||||
| int32_t load_start_pos_h; | |||||
| int32_t crop_size_w; | |||||
| int32_t crop_size_h; | |||||
| int8_t resize; | |||||
| int32_t resize_output_w; | |||||
| int32_t resize_output_h; | |||||
| int8_t padding; | |||||
| int32_t left_padding_size; | |||||
| int32_t right_padding_size; | |||||
| int32_t top_padding_size; | |||||
| int32_t bottom_padding_size; | |||||
| int8_t csc_switch; | |||||
| int8_t rbuv_swap_switch; | |||||
| int8_t ax_swap_switch; | |||||
| int8_t single_line_mode; | |||||
| int32_t matrix_r0c0; | |||||
| int32_t matrix_r0c1; | |||||
| int32_t matrix_r0c2; | |||||
| int32_t matrix_r1c0; | |||||
| int32_t matrix_r1c1; | |||||
| int32_t matrix_r1c2; | |||||
| int32_t matrix_r2c0; | |||||
| int32_t matrix_r2c1; | |||||
| int32_t matrix_r2c2; | |||||
| int32_t output_bias_0; | |||||
| int32_t output_bias_1; | |||||
| int32_t output_bias_2; | |||||
| int32_t input_bias_0; | |||||
| int32_t input_bias_1; | |||||
| int32_t input_bias_2; | |||||
| int32_t mean_chn_0; | |||||
| int32_t mean_chn_1; | |||||
| int32_t mean_chn_2; | |||||
| int32_t mean_chn_3; | |||||
| float min_chn_0; | |||||
| float min_chn_1; | |||||
| float min_chn_2; | |||||
| float min_chn_3; | |||||
| float var_reci_chn_0; | |||||
| float var_reci_chn_1; | |||||
| float var_reci_chn_2; | |||||
| float var_reci_chn_3; | |||||
| }; | |||||
| // The structure of offline Modeldata | // The structure of offline Modeldata | ||||
| struct ModelData { | struct ModelData { | ||||
| void *model_data = nullptr; // Model binary data start addr | void *model_data = nullptr; // Model binary data start addr | ||||
| @@ -59,15 +59,14 @@ class OmFileLoadHelper { | |||||
| Status GetModelPartition(ModelPartitionType type, ModelPartition &partition); | Status GetModelPartition(ModelPartitionType type, ModelPartition &partition); | ||||
| OmFileContext context_; | |||||
| private: | private: | ||||
| Status CheckModelValid(const ge::ModelData &model) const; | Status CheckModelValid(const ge::ModelData &model) const; | ||||
| Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size); | Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size); | ||||
| bool is_inited_{false}; | bool is_inited_{false}; | ||||
| public: | |||||
| OmFileContext context_; | |||||
| }; | }; | ||||
| class OmFileSaveHelper { | class OmFileSaveHelper { | ||||
| @@ -160,6 +160,7 @@ REGISTER_OPTYPE_DECLARE(SLICE, "Slice"); | |||||
| REGISTER_OPTYPE_DECLARE(SLICED, "SliceD"); | REGISTER_OPTYPE_DECLARE(SLICED, "SliceD"); | ||||
| REGISTER_OPTYPE_DECLARE(FLOORDIV, "FloorDiv"); | REGISTER_OPTYPE_DECLARE(FLOORDIV, "FloorDiv"); | ||||
| REGISTER_OPTYPE_DECLARE(SQUEEZE, "Squeeze"); | REGISTER_OPTYPE_DECLARE(SQUEEZE, "Squeeze"); | ||||
| REGISTER_OPTYPE_DECLARE(UNSQUEEZE, "Unsqueeze"); | |||||
| REGISTER_OPTYPE_DECLARE(STRIDEDSLICE, "StridedSlice"); | REGISTER_OPTYPE_DECLARE(STRIDEDSLICE, "StridedSlice"); | ||||
| REGISTER_OPTYPE_DECLARE(RANGE, "Range"); | REGISTER_OPTYPE_DECLARE(RANGE, "Range"); | ||||
| REGISTER_OPTYPE_DECLARE(RPNPROPOSALS, "GenerateRpnProposals"); | REGISTER_OPTYPE_DECLARE(RPNPROPOSALS, "GenerateRpnProposals"); | ||||
| @@ -96,6 +96,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
| /// | /// | ||||
| ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | ||||
| ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Set dynamic image info | /// @brief Set dynamic image info | ||||
| @@ -110,6 +112,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
| const std::vector<kAippDynamicBatchPara> &aippBatchPara, | const std::vector<kAippDynamicBatchPara> &aippBatchPara, | ||||
| const kAippDynamicPara &aippParms); | const kAippDynamicPara &aippParms); | ||||
| ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | |||||
| ge::Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info); | |||||
| ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ||||
| std::vector<ge::TensorDesc> &output_desc); | std::vector<ge::TensorDesc> &output_desc); | ||||
| @@ -206,6 +211,11 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
| static ge::Status ReleaseSingleOpResource(void *stream); | static ge::Status ReleaseSingleOpResource(void *stream); | ||||
| ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); | |||||
| ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); | |||||
| ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims, | |||||
| std::vector<InputOutputDims> &output_dims); | |||||
| private: | private: | ||||
| static bool isInit_; | static bool isInit_; | ||||
| }; | }; | ||||
| @@ -1,113 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_ | |||||
| #define INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_ | |||||
| #include <memory> | |||||
| #include <vector> | |||||
| #include "ge_runtime/op_info.h" | |||||
| #include "ge_runtime/task_info.h" | |||||
| namespace ge { | |||||
| namespace model_runner { | |||||
| class DavinciModel { | |||||
| public: | |||||
| DavinciModel(const std::vector<std::shared_ptr<TaskInfo>> &task_info_list, | |||||
| const std::vector<std::shared_ptr<OpInfo>> &data_info_list, | |||||
| const std::vector<std::shared_ptr<OpInfo>> &output_info_list, | |||||
| const std::vector<std::shared_ptr<OpInfo>> &constant_info_list, | |||||
| const std::vector<model_runner::OpInfoPtr> &variable_info_list, | |||||
| const std::vector<uint32_t> &wait_active_stream_list, | |||||
| const std::vector<uint32_t> &force_copy_stream_list, uint64_t mem_size = 0, uint64_t weight_size = 0, | |||||
| uint64_t var_size = 0, uintptr_t logic_mem_base = 0, uintptr_t logic_weight_base = 0, | |||||
| uintptr_t logic_var_base = 0, uint32_t stream_num = 0, uint32_t batch_num = 0, uint32_t event_num = 0, | |||||
| int32_t priority = 0) | |||||
| : task_info_list_(task_info_list), | |||||
| data_info_list_(data_info_list), | |||||
| output_info_list_(output_info_list), | |||||
| constant_info_list_(constant_info_list), | |||||
| variable_info_list_(variable_info_list), | |||||
| wait_active_stream_list_(wait_active_stream_list), | |||||
| force_copy_stream_list_(force_copy_stream_list), | |||||
| mem_size_(mem_size), | |||||
| weight_size_(weight_size), | |||||
| var_size_(var_size), | |||||
| logic_mem_base_(logic_mem_base), | |||||
| logic_weight_base_(logic_weight_base), | |||||
| logic_var_base_(logic_var_base), | |||||
| stream_num_(stream_num), | |||||
| batch_num_(batch_num), | |||||
| event_num_(event_num), | |||||
| priority_(priority) {} | |||||
| ~DavinciModel() {} | |||||
| uint64_t GetMemSize() const { return mem_size_; } | |||||
| uint64_t GetWeightSize() const { return weight_size_; } | |||||
| uint64_t GetVarSize() const { return var_size_; } | |||||
| uintptr_t GetLogicMemBase() const { return logic_mem_base_; } | |||||
| uintptr_t GetLogicWeightBase() const { return logic_weight_base_; } | |||||
| uintptr_t GetLogicVarBase() const { return logic_var_base_; } | |||||
| uint32_t GetStreamNum() const { return stream_num_; } | |||||
| uint32_t GetBatchNum() const { return batch_num_; } | |||||
| uint32_t GetEventNum() const { return event_num_; } | |||||
| const std::vector<uint32_t> &GetWaitActiveStreams() const { return wait_active_stream_list_; } | |||||
| const std::vector<uint32_t> &GetForceCopyStreams() const { return force_copy_stream_list_; } | |||||
| int32_t GetPriority() const { return priority_; } | |||||
| const std::vector<std::shared_ptr<TaskInfo>> &GetTaskInfoList() const { return task_info_list_; } | |||||
| const std::vector<std::shared_ptr<OpInfo>> &GetDataInfoList() const { return data_info_list_; } | |||||
| const std::vector<std::shared_ptr<OpInfo>> &GetOutputInfoList() const { return output_info_list_; } | |||||
| const std::vector<std::shared_ptr<OpInfo>> &GetConstantInfoList() const { return output_info_list_; } | |||||
| const std::vector<model_runner::OpInfoPtr> &GetVariableInfoList() const { return variable_info_list_; } | |||||
| private: | |||||
| std::vector<std::shared_ptr<TaskInfo>> task_info_list_; | |||||
| std::vector<std::shared_ptr<OpInfo>> data_info_list_; | |||||
| std::vector<std::shared_ptr<OpInfo>> output_info_list_; | |||||
| std::vector<std::shared_ptr<OpInfo>> constant_info_list_; | |||||
| std::vector<model_runner::OpInfoPtr> variable_info_list_; | |||||
| std::vector<uint32_t> wait_active_stream_list_; | |||||
| std::vector<uint32_t> force_copy_stream_list_; | |||||
| uint64_t mem_size_; | |||||
| uint64_t weight_size_; | |||||
| uint64_t var_size_; | |||||
| uintptr_t logic_mem_base_; | |||||
| uintptr_t logic_weight_base_; | |||||
| uintptr_t logic_var_base_; | |||||
| uint32_t stream_num_; | |||||
| uint32_t batch_num_; | |||||
| uint32_t event_num_; | |||||
| int32_t priority_; | |||||
| // Disable to copy constructor and assignment operator | |||||
| DavinciModel &operator=(const DavinciModel &) = delete; | |||||
| DavinciModel(const DavinciModel &) = delete; | |||||
| }; | |||||
| } // namespace model_runner | |||||
| } // namespace ge | |||||
| #endif // INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_ | |||||
| @@ -1,58 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_ | |||||
| #define INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_ | |||||
| #include <memory> | |||||
| #include <unordered_map> | |||||
| #include <vector> | |||||
| #include "common/ge_inner_error_codes.h" | |||||
| #include "common/ge_types.h" | |||||
| #include "ge_runtime/davinci_model.h" | |||||
| namespace ge { | |||||
| namespace model_runner { | |||||
| class RuntimeModel; | |||||
| class ModelRunner { | |||||
| public: | |||||
| static ModelRunner &Instance(); | |||||
| bool LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint32_t model_id, | |||||
| std::shared_ptr<DavinciModel> davinci_model, std::shared_ptr<ModelListener> listener); | |||||
| const std::vector<uint32_t> &GetTaskIdList(uint32_t model_id) const; | |||||
| bool UnloadModel(uint32_t model_id); | |||||
| bool RunModel(uint32_t model_id, const InputData &input_data, OutputData *output_data); | |||||
| bool GetInputOutputDescInfo(uint32_t model_id, bool zero_copy, std::vector<InputOutputDescInfo> *input_desc, | |||||
| std::vector<InputOutputDescInfo> *output_desc, std::vector<uint32_t> *input_format, | |||||
| std::vector<uint32_t> *output_format); | |||||
| private: | |||||
| ModelRunner() = default; | |||||
| ~ModelRunner() = default; | |||||
| std::unordered_map<uint32_t, std::shared_ptr<RuntimeModel>> runtime_models_; | |||||
| }; | |||||
| } // namespace model_runner | |||||
| } // namespace ge | |||||
| #endif // INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_ | |||||
| @@ -1,72 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_ | |||||
| #define INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_ | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| namespace ge { | |||||
| namespace model_runner { | |||||
| struct TensorInfo { | |||||
| int64_t GetShapeSize() const { | |||||
| int64_t res = 1; | |||||
| if (dims.empty()) { | |||||
| return 0; | |||||
| } | |||||
| for (auto dim : dims) { | |||||
| res *= dim; | |||||
| } | |||||
| return res; | |||||
| } | |||||
| int64_t GetDim(uint32_t index) { | |||||
| if (index >= dims.size()) { | |||||
| return 0; | |||||
| } | |||||
| return dims[index]; | |||||
| } | |||||
| std::vector<int64_t> dims; | |||||
| uint32_t datatype; | |||||
| uint32_t format; | |||||
| uint32_t real_dim_cnt; | |||||
| uint32_t size; | |||||
| bool is_output; | |||||
| }; | |||||
| struct OpInfo { | |||||
| uint32_t index; | |||||
| std::string name; | |||||
| std::string type; | |||||
| bool var_is_broadcast; | |||||
| std::vector<uintptr_t> input_addrs; | |||||
| std::vector<uintptr_t> output_addrs; | |||||
| std::vector<TensorInfo> input_tensors; | |||||
| std::vector<TensorInfo> output_tensors; | |||||
| std::vector<TensorInfo> weight_tensors; | |||||
| std::vector<std::string> src_name; | |||||
| std::vector<int64_t> src_index; | |||||
| std::string weight_data; | |||||
| }; | |||||
| using TensorInfoPtr = std::shared_ptr<TensorInfo>; | |||||
| using OpInfoPtr = std::shared_ptr<OpInfo>; | |||||
| } // namespace model_runner | |||||
| } // namespace ge | |||||
| #endif // INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_ | |||||
| @@ -1,394 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_ | |||||
| #define INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_ | |||||
| #include <stdint.h> | |||||
| #include <functional> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include "cce/taskdown_api.h" | |||||
| namespace ge { | |||||
| namespace model_runner { | |||||
| enum TaskInfoType { | |||||
| CCE = 0, | |||||
| TBE, | |||||
| AICPU, | |||||
| LABEL_SET, | |||||
| LABEL_SWITCH, | |||||
| LABEL_GOTO, | |||||
| EVENT_RECORD, | |||||
| EVENT_WAIT, | |||||
| FUSION_START, | |||||
| FUSION_END, | |||||
| HCCL, | |||||
| PROFILER_TRACE, | |||||
| MEMCPY_ASYNC, | |||||
| STREAM_SWITCH, | |||||
| STREAM_ACTIVE, | |||||
| // Insert new task type here | |||||
| REVSERVED = 23 | |||||
| }; | |||||
| class TaskInfo { | |||||
| public: | |||||
| virtual ~TaskInfo() {} | |||||
| uint32_t stream_id() const { return stream_id_; } | |||||
| TaskInfoType type() const { return type_; } | |||||
| protected: | |||||
| TaskInfo(uint32_t stream_id, TaskInfoType type) : stream_id_(stream_id), type_(type) {} | |||||
| private: | |||||
| uint32_t stream_id_; | |||||
| TaskInfoType type_; | |||||
| }; | |||||
| class CceTaskInfo : public TaskInfo { | |||||
| public: | |||||
| CceTaskInfo(uint32_t stream_id, const cce::ccOpContext &ctx, const std::string &stub_func, uint32_t block_dim, | |||||
| const std::vector<uint8_t> &args, uint32_t args_size, const std::vector<uint8_t> &sm_desc, | |||||
| const std::vector<uint8_t> &flow_table, const std::vector<uint8_t> &args_offset, bool is_flowtable) | |||||
| : TaskInfo(stream_id, TaskInfoType::CCE), | |||||
| ctx_(ctx), | |||||
| stub_func_(stub_func), | |||||
| block_dim_(block_dim), | |||||
| args_(args), | |||||
| args_size_(args_size), | |||||
| sm_desc_(sm_desc), | |||||
| flow_table_(flow_table), | |||||
| args_offset_(args_offset), | |||||
| is_flowtable_(is_flowtable) {} | |||||
| ~CceTaskInfo() override {} | |||||
| cce::ccOpContext cc_context() const { return ctx_; } | |||||
| std::string stub_func() const { return stub_func_; } | |||||
| uint32_t block_dim() const { return block_dim_; } | |||||
| const std::vector<uint8_t> &args() const { return args_; } | |||||
| uint32_t args_size() const { return args_size_; } | |||||
| const std::vector<uint8_t> &sm_desc() const { return sm_desc_; } | |||||
| const std::vector<uint8_t> &flow_table() const { return flow_table_; } | |||||
| const std::vector<uint8_t> &args_offset() const { return args_offset_; } | |||||
| bool is_flowtable() const { return is_flowtable_; } | |||||
| private: | |||||
| cce::ccOpContext ctx_; | |||||
| std::string stub_func_; | |||||
| uint32_t block_dim_; | |||||
| std::vector<uint8_t> args_; | |||||
| uint32_t args_size_; | |||||
| std::vector<uint8_t> sm_desc_; | |||||
| std::vector<uint8_t> flow_table_; | |||||
| std::vector<uint8_t> args_offset_; | |||||
| bool is_flowtable_; | |||||
| }; | |||||
| class TbeTaskInfo : public TaskInfo { | |||||
| public: | |||||
| TbeTaskInfo(uint32_t stream_id, const std::string &stub_func, uint32_t block_dim, const std::vector<uint8_t> &args, | |||||
| uint32_t args_size, const std::vector<uint8_t> &sm_desc, void *binary, uint32_t binary_size, | |||||
| const std::vector<uint8_t> &meta_data, const std::vector<void *> &input_data_addrs, | |||||
| const std::vector<void *> &output_data_addrs, const std::vector<void *> &workspace_addrs) | |||||
| : TaskInfo(stream_id, TaskInfoType::TBE), | |||||
| stub_func_(stub_func), | |||||
| block_dim_(block_dim), | |||||
| args_(args), | |||||
| args_size_(args_size), | |||||
| sm_desc_(sm_desc), | |||||
| binary_(binary), | |||||
| binary_size_(binary_size), | |||||
| meta_data_(meta_data), | |||||
| input_data_addrs_(input_data_addrs), | |||||
| output_data_addrs_(output_data_addrs), | |||||
| workspace_addrs_(workspace_addrs) {} | |||||
| ~TbeTaskInfo() override {} | |||||
| const std::string &stub_func() const { return stub_func_; } | |||||
| uint32_t block_dim() const { return block_dim_; } | |||||
| const std::vector<uint8_t> &args() const { return args_; } | |||||
| uint32_t args_size() const { return args_size_; } | |||||
| const std::vector<uint8_t> &sm_desc() const { return sm_desc_; } | |||||
| void *binary() const { return binary_; } | |||||
| uint32_t binary_size() const { return binary_size_; } | |||||
| const std::vector<uint8_t> &meta_data() const { return meta_data_; } | |||||
| const std::vector<void *> &input_data_addrs() const { return input_data_addrs_; } | |||||
| const std::vector<void *> &output_data_addrs() const { return output_data_addrs_; } | |||||
| const std::vector<void *> &workspace_addrs() const { return workspace_addrs_; } | |||||
| void SetBinary(void *binary, uint32_t binary_size) { | |||||
| binary_ = binary; | |||||
| binary_size_ = binary_size; | |||||
| } | |||||
| private: | |||||
| std::string stub_func_; | |||||
| uint32_t block_dim_; | |||||
| std::vector<uint8_t> args_; | |||||
| uint32_t args_size_; | |||||
| std::vector<uint8_t> sm_desc_; | |||||
| void *binary_; | |||||
| uint32_t binary_size_; | |||||
| std::vector<uint8_t> meta_data_; | |||||
| std::vector<void *> input_data_addrs_; | |||||
| std::vector<void *> output_data_addrs_; | |||||
| std::vector<void *> workspace_addrs_; | |||||
| }; | |||||
| class AicpuTaskInfo : public TaskInfo { | |||||
| public: | |||||
| AicpuTaskInfo(uint32_t stream_id, const string &so_name, const std::string &kernel_name, const std::string &node_def, | |||||
| const std::vector<void *> &input_data_addrs, const std::vector<void *> &output_data_addrs) | |||||
| : TaskInfo(stream_id, TaskInfoType::AICPU), | |||||
| so_name_(so_name), | |||||
| kernel_name_(kernel_name), | |||||
| node_def_(node_def), | |||||
| input_data_addrs_(input_data_addrs), | |||||
| output_data_addrs_(output_data_addrs) {} | |||||
| ~AicpuTaskInfo() override {} | |||||
| const std::string &so_name() const { return so_name_; } | |||||
| const std::string &kernel_name() const { return kernel_name_; } | |||||
| const std::string &node_def() const { return node_def_; } | |||||
| const std::vector<void *> &input_data_addrs() const { return input_data_addrs_; } | |||||
| const std::vector<void *> &output_data_addrs() const { return output_data_addrs_; } | |||||
| private: | |||||
| std::string so_name_; | |||||
| std::string kernel_name_; | |||||
| std::string node_def_; | |||||
| std::vector<void *> input_data_addrs_; | |||||
| std::vector<void *> output_data_addrs_; | |||||
| }; | |||||
| class LabelTaskInfo : public TaskInfo { | |||||
| public: | |||||
| uint32_t label_id() const { return label_id_; } | |||||
| protected: | |||||
| LabelTaskInfo(uint32_t stream_id, TaskInfoType type, uint32_t label_id) | |||||
| : TaskInfo(stream_id, type), label_id_(label_id) {} | |||||
| virtual ~LabelTaskInfo() override {} | |||||
| uint32_t label_id_; | |||||
| }; | |||||
| class LabelSetTaskInfo : public LabelTaskInfo { | |||||
| public: | |||||
| LabelSetTaskInfo(uint32_t stream_id, uint32_t label_id) | |||||
| : LabelTaskInfo(stream_id, TaskInfoType::LABEL_SET, label_id) {} | |||||
| ~LabelSetTaskInfo() override {} | |||||
| }; | |||||
| class LabelSwitchTaskInfo : public LabelTaskInfo { | |||||
| public: | |||||
| LabelSwitchTaskInfo(uint32_t stream_id, uint32_t label_id) | |||||
| : LabelTaskInfo(stream_id, TaskInfoType::LABEL_SWITCH, label_id) {} | |||||
| ~LabelSwitchTaskInfo() override {} | |||||
| }; | |||||
| class LabelGotoTaskInfo : public LabelTaskInfo { | |||||
| public: | |||||
| LabelGotoTaskInfo(uint32_t stream_id, uint32_t label_id) | |||||
| : LabelTaskInfo(stream_id, TaskInfoType::LABEL_GOTO, label_id) {} | |||||
| ~LabelGotoTaskInfo() override {} | |||||
| }; | |||||
| class EventTaskInfo : public TaskInfo { | |||||
| public: | |||||
| uint32_t event_id() const { return event_id_; } | |||||
| protected: | |||||
| EventTaskInfo(uint32_t stream_id, TaskInfoType type, uint32_t event_id) | |||||
| : TaskInfo(stream_id, type), event_id_(event_id) {} | |||||
| virtual ~EventTaskInfo() override {} | |||||
| uint32_t event_id_; | |||||
| }; | |||||
| class EventRecordTaskInfo : public EventTaskInfo { | |||||
| public: | |||||
| EventRecordTaskInfo(uint32_t stream_id, uint32_t event_id) | |||||
| : EventTaskInfo(stream_id, TaskInfoType::EVENT_RECORD, event_id) {} | |||||
| ~EventRecordTaskInfo() override {} | |||||
| }; | |||||
| class EventWaitTaskInfo : public EventTaskInfo { | |||||
| public: | |||||
| EventWaitTaskInfo(uint32_t stream_id, uint32_t event_id) | |||||
| : EventTaskInfo(stream_id, TaskInfoType::EVENT_WAIT, event_id) {} | |||||
| ~EventWaitTaskInfo() override {} | |||||
| }; | |||||
| class FusionStartTaskInfo : public TaskInfo { | |||||
| public: | |||||
| explicit FusionStartTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_START) {} | |||||
| ~FusionStartTaskInfo() override {} | |||||
| }; | |||||
| class FusionEndTaskInfo : public TaskInfo { | |||||
| public: | |||||
| explicit FusionEndTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_END) {} | |||||
| ~FusionEndTaskInfo() override {} | |||||
| }; | |||||
| class HcclTaskInfo : public TaskInfo { | |||||
| public: | |||||
| HcclTaskInfo(uint32_t stream_id, const std::string hccl_type, void *input_data_addr, void *output_data_addr, | |||||
| void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num, | |||||
| const std::vector<uint8_t> &private_def, void *ops_kernel_store, int32_t count, int64_t root_id, | |||||
| int64_t op_type, int64_t data_type, std::function<bool(void *, void *)> hcom_bind_model, | |||||
| std::function<bool(void *)> hcom_unbind_model, | |||||
| std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task) | |||||
| : TaskInfo(stream_id, TaskInfoType::HCCL), | |||||
| hccl_type_(hccl_type), | |||||
| input_data_addr_(input_data_addr), | |||||
| output_data_addr_(output_data_addr), | |||||
| workspace_addr_(workspace_addr), | |||||
| workspace_size_(workspace_size), | |||||
| hccl_stream_num_(hccl_stream_num), | |||||
| private_def_(private_def), | |||||
| ops_kernel_store_(ops_kernel_store), | |||||
| count_(count), | |||||
| root_id_(root_id), | |||||
| op_type_(op_type), | |||||
| data_type_(data_type), | |||||
| hcom_bind_model_(hcom_bind_model), | |||||
| hcom_unbind_model_(hcom_unbind_model), | |||||
| hcom_distribute_task_(hcom_distribute_task) {} | |||||
| ~HcclTaskInfo() override {} | |||||
| const std::string &hccl_type() const { return hccl_type_; } | |||||
| void *input_data_addr() const { return input_data_addr_; } | |||||
| void *output_data_addr() const { return output_data_addr_; } | |||||
| void *workspace_addr() const { return workspace_addr_; } | |||||
| int64_t workspace_size() const { return workspace_size_; } | |||||
| int64_t hccl_stream_num() const { return hccl_stream_num_; } | |||||
| const std::vector<uint8_t> &private_def() const { return private_def_; } | |||||
| void *ops_kernel_store() const { return ops_kernel_store_; } | |||||
| int32_t count() const { return count_; } | |||||
| int64_t root_id() const { return root_id_; } | |||||
| int64_t op_type() const { return op_type_; } | |||||
| int64_t data_type() const { return data_type_; } | |||||
| std::function<bool(void *, void *)> hcom_bind_model() const { return hcom_bind_model_; } | |||||
| std::function<bool(void *)> hcom_unbind_model() const { return hcom_unbind_model_; } | |||||
| std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task() const { | |||||
| return hcom_distribute_task_; | |||||
| } | |||||
| private: | |||||
| std::string hccl_type_; | |||||
| void *input_data_addr_; | |||||
| void *output_data_addr_; | |||||
| void *workspace_addr_; | |||||
| int64_t workspace_size_; | |||||
| int64_t hccl_stream_num_; | |||||
| std::vector<uint8_t> private_def_; | |||||
| void *ops_kernel_store_; | |||||
| int32_t count_; | |||||
| int64_t root_id_; | |||||
| int64_t op_type_; | |||||
| int64_t data_type_; | |||||
| std::function<bool(void *, void *)> hcom_bind_model_; | |||||
| std::function<bool(void *)> hcom_unbind_model_; | |||||
| std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task_; | |||||
| }; | |||||
| class ProfilerTraceTaskInfo : public TaskInfo { | |||||
| public: | |||||
| ProfilerTraceTaskInfo(uint32_t stream_id, uint64_t log_id, bool notify, uint32_t flat) | |||||
| : TaskInfo(stream_id, TaskInfoType::PROFILER_TRACE), log_id_(log_id), notify_(notify), flat_(flat) {} | |||||
| ~ProfilerTraceTaskInfo() override {} | |||||
| uint64_t log_id() const { return log_id_; } | |||||
| bool notify() const { return notify_; } | |||||
| uint32_t flat() const { return flat_; } | |||||
| private: | |||||
| uint64_t log_id_; | |||||
| bool notify_; | |||||
| uint32_t flat_; | |||||
| }; | |||||
| class MemcpyAsyncTaskInfo : public TaskInfo { | |||||
| public: | |||||
| MemcpyAsyncTaskInfo(uint32_t stream_id, void *dst, uint64_t dst_max, void *src, uint64_t count, uint32_t kind) | |||||
| : TaskInfo(stream_id, TaskInfoType::MEMCPY_ASYNC), | |||||
| dst_(dst), | |||||
| dst_max_(dst_max), | |||||
| src_(src), | |||||
| count_(count), | |||||
| kind_(kind) {} | |||||
| ~MemcpyAsyncTaskInfo() override {} | |||||
| void *dst() const { return dst_; } | |||||
| uint64_t dst_max() const { return dst_max_; } | |||||
| void *src() const { return src_; } | |||||
| uint64_t count() const { return count_; } | |||||
| uint32_t kind() const { return kind_; } | |||||
| private: | |||||
| void *dst_; | |||||
| uint64_t dst_max_; | |||||
| void *src_; | |||||
| uint64_t count_; | |||||
| int32_t kind_; | |||||
| }; | |||||
| class StreamSwitchTaskInfo : public TaskInfo { | |||||
| public: | |||||
| StreamSwitchTaskInfo(uint32_t stream_id, int64_t true_stream_id, void *input_addr, void *value_addr, int64_t cond, | |||||
| int64_t data_type) | |||||
| : TaskInfo(stream_id, TaskInfoType::STREAM_SWITCH), | |||||
| true_stream_id_(true_stream_id), | |||||
| input_addr_(input_addr), | |||||
| value_addr_(value_addr), | |||||
| cond_(cond), | |||||
| data_type_(data_type) {} | |||||
| ~StreamSwitchTaskInfo() override {} | |||||
| int64_t true_stream_id() const { return true_stream_id_; } | |||||
| void *input_addr() const { return input_addr_; } | |||||
| void *value_addr() const { return value_addr_; } | |||||
| int64_t cond() const { return cond_; } | |||||
| int64_t data_type() const { return data_type_; } | |||||
| private: | |||||
| int64_t true_stream_id_; | |||||
| void *input_addr_; | |||||
| void *value_addr_; | |||||
| int64_t cond_; | |||||
| int64_t data_type_; | |||||
| }; | |||||
| class StreamActiveTaskInfo : public TaskInfo { | |||||
| public: | |||||
| StreamActiveTaskInfo(uint32_t stream_id, uint32_t active_stream_id) | |||||
| : TaskInfo(stream_id, TaskInfoType::STREAM_ACTIVE), active_stream_id_(active_stream_id) {} | |||||
| ~StreamActiveTaskInfo() override {} | |||||
| uint32_t active_stream_id() const { return active_stream_id_; } | |||||
| private: | |||||
| uint32_t active_stream_id_; | |||||
| }; | |||||
| } // namespace model_runner | |||||
| } // namespace ge | |||||
| #endif // INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_ | |||||
| @@ -23,6 +23,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "framework/common/types.h" | #include "framework/common/types.h" | ||||
| #include "framework/omg/omg_inner_types.h" | #include "framework/omg/omg_inner_types.h" | ||||
| #include "framework/omg/parser/parser_inner_ctx.h" | |||||
| #include "proto/ge_ir.pb.h" | #include "proto/ge_ir.pb.h" | ||||
| #include "proto/om.pb.h" | #include "proto/om.pb.h" | ||||
| @@ -99,6 +100,11 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const | |||||
| Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | ||||
| std::vector<std::string> &output_nodes_name); | std::vector<std::string> &output_nodes_name); | ||||
| void UpdateOmgCtxWithParserCtx(); | |||||
| void UpdateParserCtxWithOmgCtx(); | |||||
| } // namespace ge | } // namespace ge | ||||
| namespace domi { | namespace domi { | ||||
| @@ -31,7 +31,7 @@ | |||||
| using domi::DOMI_TENSOR_ND; | using domi::DOMI_TENSOR_ND; | ||||
| using domi::DOMI_TENSOR_RESERVED; | using domi::DOMI_TENSOR_RESERVED; | ||||
| using domi::domiTensorFormat_t; | using domi::domiTensorFormat_t; | ||||
| using domi::FMK_TYPE_RESERVED; | |||||
| using domi::FRAMEWORK_RESERVED; | |||||
| using domi::FrameworkType; | using domi::FrameworkType; | ||||
| using std::map; | using std::map; | ||||
| using std::string; | using std::string; | ||||
| @@ -100,7 +100,7 @@ struct OmgContext { | |||||
| std::string ddk_version; | std::string ddk_version; | ||||
| // preferential format used by the entire network | // preferential format used by the entire network | ||||
| domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED; | domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED; | ||||
| domi::FrameworkType type = domi::FMK_TYPE_RESERVED; | |||||
| domi::FrameworkType type = domi::FRAMEWORK_RESERVED; | |||||
| RunMode run_mode = ONLY_PRE_CHECK; | RunMode run_mode = ONLY_PRE_CHECK; | ||||
| bool train_flag = false; | bool train_flag = false; | ||||
| // whether to use FP16 high precision | // whether to use FP16 high precision | ||||
| @@ -80,6 +80,7 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A | |||||
| Vistor<NodePtr> GetOutputNodes() const; | Vistor<NodePtr> GetOutputNodes() const; | ||||
| NodePtr FindNode(const std::string &name) const; | NodePtr FindNode(const std::string &name) const; | ||||
| NodePtr FindFirstNodeMatchType(const std::string &name) const; | |||||
| // AddNode with NodePtr | // AddNode with NodePtr | ||||
| NodePtr AddNode(NodePtr node); | NodePtr AddNode(NodePtr node); | ||||
| NodePtr AddNode(OpDescPtr op); | NodePtr AddNode(OpDescPtr op); | ||||
| @@ -235,8 +236,6 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A | |||||
| std::vector<NodePtr> &stack); | std::vector<NodePtr> &stack); | ||||
| graphStatus BFSTopologicalSorting(std::vector<NodePtr> &node_vec, std::map<NodePtr, uint32_t> &map_in_edge_num, | graphStatus BFSTopologicalSorting(std::vector<NodePtr> &node_vec, std::map<NodePtr, uint32_t> &map_in_edge_num, | ||||
| std::deque<NodePtr> &stack); | std::deque<NodePtr> &stack); | ||||
| graphStatus BFSTopologicalSortingWithGroup(std::vector<NodePtr> &node_vec, | |||||
| std::map<NodePtr, uint32_t> &map_in_edge_num, std::deque<NodePtr> &stack); | |||||
| graphStatus CollectBreadthOutNode(const NodePtr &node, std::map<NodePtr, uint32_t> &map_in_edge_num, | graphStatus CollectBreadthOutNode(const NodePtr &node, std::map<NodePtr, uint32_t> &map_in_edge_num, | ||||
| std::map<string, NodePtr> &breadth_node_map); | std::map<string, NodePtr> &breadth_node_map); | ||||
| graphStatus TopologicalSortingGraph(); | graphStatus TopologicalSortingGraph(); | ||||
| @@ -136,6 +136,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string NEW_AIPP_CONV_OP; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string NEW_AIPP_CONV_OP; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_INPUTS; | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS; | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME; | ||||
| @@ -176,6 +179,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_LABEL; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_LABEL; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG; | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS; | |||||
| // to be deleted | // to be deleted | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED; | ||||
| @@ -102,6 +102,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDesc : public AttrH | |||||
| Format GetOriginFormat() const; | Format GetOriginFormat() const; | ||||
| void SetOriginFormat(Format originFormat); | void SetOriginFormat(Format originFormat); | ||||
| void SetName(const std::string &name); | |||||
| const std::string GetName() const; | |||||
| DataType GetDataType() const; | DataType GetDataType() const; | ||||
| void SetDataType(DataType dt); | void SetDataType(DataType dt); | ||||
| @@ -22,6 +22,7 @@ | |||||
| #include <map> | #include <map> | ||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||
| #include <list> | |||||
| #include "graph/anchor.h" | #include "graph/anchor.h" | ||||
| #include "graph/node.h" | #include "graph/node.h" | ||||
| #include "graph/compute_graph.h" | #include "graph/compute_graph.h" | ||||
| @@ -111,21 +112,25 @@ enum IOType { kIn, kOut }; | |||||
| struct NodeIndexIO { | struct NodeIndexIO { | ||||
| NodeIndexIO(ge::NodePtr node, uint32_t index, IOType io_type) | NodeIndexIO(ge::NodePtr node, uint32_t index, IOType io_type) | ||||
| : node(std::move(node)), index(index), io_type(io_type) {} | |||||
| : node_(std::move(node)), index_(index), io_type_(io_type) { | |||||
| if (node_ != nullptr) { | |||||
| value_ = node_->GetName() + (io_type_ == kOut ? "_out_" : "_in_") + std::to_string(index_); | |||||
| } | |||||
| } | |||||
| NodeIndexIO(ge::NodePtr node, int index, IOType io_type) | NodeIndexIO(ge::NodePtr node, int index, IOType io_type) | ||||
| : node(std::move(node)), index(static_cast<uint32_t>(index)), io_type(io_type) {} | |||||
| : node_(std::move(node)), index_(static_cast<uint32_t>(index)), io_type_(io_type) { | |||||
| if (node_ != nullptr) { | |||||
| value_ = node_->GetName() + (io_type_ == kOut ? "_out_" : "_in_") + std::to_string(index_); | |||||
| } | |||||
| } | |||||
| ~NodeIndexIO() {} | ~NodeIndexIO() {} | ||||
| NodePtr node = nullptr; | |||||
| uint32_t index = 0; | |||||
| IOType io_type = kOut; | |||||
| NodePtr node_ = nullptr; | |||||
| uint32_t index_ = 0; | |||||
| IOType io_type_ = kOut; | |||||
| std::string value_; | |||||
| std::string ToString() const { | |||||
| if ((node == nullptr) || (node->GetOwnerComputeGraph() == nullptr)) { | |||||
| return ""; | |||||
| } | |||||
| return node->GetName() + (io_type == kOut ? "_out_" : "_in_") + std::to_string(index); | |||||
| } | |||||
| std::string ToString() const { return value_; } | |||||
| }; | }; | ||||
| class GraphUtils { | class GraphUtils { | ||||
| @@ -310,7 +315,7 @@ class GraphUtils { | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| static graphStatus GetRefMapping(const ComputeGraphPtr &graph, | static graphStatus GetRefMapping(const ComputeGraphPtr &graph, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol); | std::map<std::string, std::string> &anchor_to_symbol); | ||||
| /// | /// | ||||
| @@ -340,7 +345,7 @@ class GraphUtils { | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| static graphStatus HandleInAnchorMapping(const NodePtr &node, | static graphStatus HandleInAnchorMapping(const NodePtr &node, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol); | std::map<std::string, std::string> &anchor_to_symbol); | ||||
| /// | /// | ||||
| @@ -351,7 +356,7 @@ class GraphUtils { | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| static graphStatus HandleOutAnchorMapping(const NodePtr &node, | static graphStatus HandleOutAnchorMapping(const NodePtr &node, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol); | std::map<std::string, std::string> &anchor_to_symbol); | ||||
| /// | /// | ||||
| @@ -362,7 +367,7 @@ class GraphUtils { | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| static graphStatus HandleSubgraphInput(const NodePtr &node, | static graphStatus HandleSubgraphInput(const NodePtr &node, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol); | std::map<std::string, std::string> &anchor_to_symbol); | ||||
| /// | /// | ||||
| @@ -373,7 +378,7 @@ class GraphUtils { | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| static graphStatus HandleMergeInput(const NodePtr &node, | static graphStatus HandleMergeInput(const NodePtr &node, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol); | std::map<std::string, std::string> &anchor_to_symbol); | ||||
| /// | /// | ||||
| @@ -384,7 +389,7 @@ class GraphUtils { | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| static graphStatus HandleSubgraphOutput(const NodePtr &node, | static graphStatus HandleSubgraphOutput(const NodePtr &node, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol); | std::map<std::string, std::string> &anchor_to_symbol); | ||||
| /// | /// | ||||
| @@ -397,7 +402,7 @@ class GraphUtils { | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| static graphStatus UnionSymbolMapping(const NodeIndexIO &exist_node_info1, const NodeIndexIO &exist_node_info2, | static graphStatus UnionSymbolMapping(const NodeIndexIO &exist_node_info1, const NodeIndexIO &exist_node_info2, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol, std::string &symbol); | std::map<std::string, std::string> &anchor_to_symbol, std::string &symbol); | ||||
| /// | /// | ||||
| @@ -409,7 +414,7 @@ class GraphUtils { | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| static graphStatus UpdateRefMapping(const NodeIndexIO &cur_node_info, const NodeIndexIO &exist_node_info, | static graphStatus UpdateRefMapping(const NodeIndexIO &cur_node_info, const NodeIndexIO &exist_node_info, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol); | std::map<std::string, std::string> &anchor_to_symbol); | ||||
| /// | /// | ||||
| @@ -25,6 +25,7 @@ | |||||
| #include "graph/types.h" | #include "graph/types.h" | ||||
| #include "graph/usr_types.h" | #include "graph/usr_types.h" | ||||
| #include "register/register_types.h" | #include "register/register_types.h" | ||||
| #include "external/register/register_fmk_types.h" | |||||
| namespace ge { | namespace ge { | ||||
| class TypeUtils { | class TypeUtils { | ||||
| @@ -39,6 +40,7 @@ class TypeUtils { | |||||
| static Format SerialStringToFormat(const std::string &str); | static Format SerialStringToFormat(const std::string &str); | ||||
| static Format DataFormatToFormat(const std::string &str); | static Format DataFormatToFormat(const std::string &str); | ||||
| static Format DomiFormatToFormat(domi::domiTensorFormat_t domi_format); | static Format DomiFormatToFormat(domi::domiTensorFormat_t domi_format); | ||||
| static std::string FmkTypeToSerialString(domi::FrameworkType fmk_type); | |||||
| static graphStatus Usr2DefQuantizeFactorParams(const UsrQuantizeFactorParams &usr, QuantizeFactorParams &def); | static graphStatus Usr2DefQuantizeFactorParams(const UsrQuantizeFactorParams &usr, QuantizeFactorParams &def); | ||||
| static graphStatus Def2UsrQuantizeFactorParams(const QuantizeFactorParams &def, UsrQuantizeFactorParams &usr); | static graphStatus Def2UsrQuantizeFactorParams(const QuantizeFactorParams &def, UsrQuantizeFactorParams &usr); | ||||
| @@ -48,63 +48,6 @@ bool IsUseBFS() { | |||||
| } | } | ||||
| return false; | return false; | ||||
| } | } | ||||
| bool IsTailingOptimization() { | |||||
| string is_tailing_optimization_option; | |||||
| auto ret = GetContext().GetOption(ge::OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION, is_tailing_optimization_option); | |||||
| if (ret == GRAPH_SUCCESS) { | |||||
| GELOGI("Option ge.exec.isTailingOptimization is %s", is_tailing_optimization_option.c_str()); | |||||
| // "1" means it's True from frontend option | |||||
| return is_tailing_optimization_option == "1"; | |||||
| } | |||||
| GELOGW("OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION not set, use BFSTopologicalSorting by default."); | |||||
| return false; | |||||
| } | |||||
| bool IsFusedNode(const NodePtr &node) { | |||||
| bool is_fused_node = false; | |||||
| AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_HCCL_FUSED_FLAG, is_fused_node); | |||||
| return is_fused_node; | |||||
| } | |||||
| string GetGroupId(const NodePtr &node) { | |||||
| string group_id; | |||||
| AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_HCCL_FUSED_GROUP, group_id); | |||||
| return group_id; | |||||
| } | |||||
| bool IsGroupEnd(const NodePtr &node) { | |||||
| if (GetGroupId(node).empty()) { | |||||
| return false; | |||||
| } | |||||
| if (node->GetOutDataNodesSize() == 0) { | |||||
| return true; | |||||
| } | |||||
| for (const auto &out_data_node : node->GetOutDataNodes()) { | |||||
| if (IsFusedNode(out_data_node)) { | |||||
| return true; | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| void SplitNodeToStack(const std::map<string, NodePtr> &breadth_node_map, string current_group_id, | |||||
| std::vector<NodePtr> &stack_input, std::deque<NodePtr> &group_stack, std::deque<NodePtr> &stack) { | |||||
| for (const auto &name_node : breadth_node_map) { | |||||
| // group first | |||||
| string group_id; | |||||
| if (AttrUtils::GetStr(name_node.second->GetOpDesc(), ATTR_NAME_HCCL_FUSED_GROUP, group_id)) { | |||||
| GELOGI("current node %s, group id: %s , current group id %s", name_node.second->GetName().c_str(), | |||||
| group_id.c_str(), current_group_id.c_str()); | |||||
| if (!current_group_id.empty() && group_id != current_group_id) { | |||||
| GELOGI("node go to input_stack back: %s", name_node.second->GetName().c_str()); | |||||
| (void)stack_input.insert(stack_input.begin(), name_node.second); | |||||
| } else { | |||||
| current_group_id = group_id; | |||||
| GELOGI("node go to group_stack: %s", name_node.second->GetName().c_str()); | |||||
| (void)group_stack.push_front(name_node.second); | |||||
| } | |||||
| continue; | |||||
| } | |||||
| GELOGI("node go to stack: %s ", name_node.second->GetName().c_str()); | |||||
| (void)stack.push_front(name_node.second); | |||||
| } | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ComputeGraph::ComputeGraph(const std::string &name) | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ComputeGraph::ComputeGraph(const std::string &name) | ||||
| @@ -193,6 +136,19 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::FindNode(co | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr | |||||
| ComputeGraph::FindFirstNodeMatchType(const std::string &name) const { | |||||
| for (const auto &node : nodes_) { | |||||
| if (node == nullptr) { | |||||
| continue; | |||||
| } | |||||
| if (node->GetType() == name) { | |||||
| return node; | |||||
| } | |||||
| } | |||||
| return nullptr; | |||||
| } | |||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ComputeGraph::GraphAttrsAreEqual( | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ComputeGraph::GraphAttrsAreEqual( | ||||
| const ComputeGraph &r_graph) const { | const ComputeGraph &r_graph) const { | ||||
| // ProtoMsgOwner <::google::protobuf::Message> is temporarily ignored | // ProtoMsgOwner <::google::protobuf::Message> is temporarily ignored | ||||
| @@ -642,9 +598,9 @@ ComputeGraph::UpdateInputMapping(const std::map<uint32_t, uint32_t> &input_mappi | |||||
| /// | /// | ||||
| GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | ||||
| ComputeGraph::UpdateOutputMapping(const std::map<uint32_t, uint32_t> &output_mapping) { | ComputeGraph::UpdateOutputMapping(const std::map<uint32_t, uint32_t> &output_mapping) { | ||||
| NodePtr net_output = FindNode(NODE_NAME_NET_OUTPUT); | |||||
| NodePtr net_output = FindFirstNodeMatchType(NETOUTPUT); | |||||
| if (net_output == nullptr) { | if (net_output == nullptr) { | ||||
| GE_LOGE("UpdateOutputMapping failed: node %s not exist in graph.", NODE_NAME_NET_OUTPUT); | |||||
| GE_LOGE("UpdateOutputMapping failed: node type %s not exist in graph.", NETOUTPUT); | |||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| } | } | ||||
| OpDescPtr op_desc = net_output->GetOpDesc(); | OpDescPtr op_desc = net_output->GetOpDesc(); | ||||
| @@ -799,65 +755,6 @@ graphStatus ComputeGraph::BFSTopologicalSorting(std::vector<NodePtr> &node_vec, | |||||
| return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
| } | } | ||||
| graphStatus ComputeGraph::BFSTopologicalSortingWithGroup(std::vector<NodePtr> &node_vec, | |||||
| std::map<NodePtr, uint32_t> &map_in_edge_num, | |||||
| std::deque<NodePtr> &stack) { | |||||
| GELOGI("Runing_Bfs_Sort_With_Group"); | |||||
| std::string current_group_id; | |||||
| std::vector<NodePtr> stack_input; | |||||
| std::deque<NodePtr> group_stack; | |||||
| std::deque<NodePtr> fused_node_stack; | |||||
| std::map<string, NodePtr> breadth_node_map; | |||||
| // Record the number of non data nodes but no input nodes | |||||
| GE_CHK_BOOL_EXEC(SortNodes(stack_input, map_in_edge_num) == GRAPH_SUCCESS, return GRAPH_FAILED, "sort nodes failed"); | |||||
| // Only data nodes here | |||||
| while (!stack_input.empty() || !stack.empty() || !group_stack.empty()) { | |||||
| NodePtr node = nullptr; | |||||
| if (!group_stack.empty()) { | |||||
| // Traversal node in group has priority | |||||
| node = group_stack.back(); | |||||
| group_stack.pop_back(); | |||||
| } else if (!stack.empty()) { | |||||
| node = stack.back(); | |||||
| stack.pop_back(); | |||||
| } else { | |||||
| node = stack_input.back(); | |||||
| stack_input.pop_back(); | |||||
| } | |||||
| if (IsFusedNode(node) && current_group_id.empty()) { | |||||
| current_group_id = node->GetName(); | |||||
| } | |||||
| if (GetGroupId(node).empty() || GetGroupId(node) == current_group_id) { | |||||
| node_vec.push_back(node); | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| GELOGI("node_vec.push_back %s", node->GetOpDesc()->GetName().c_str()); | |||||
| } else { | |||||
| if (current_group_id.empty()) { | |||||
| current_group_id = GetGroupId(node); | |||||
| node_vec.push_back(node); | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| GELOGI("node_vec.push_back %s", node->GetOpDesc()->GetName().c_str()); | |||||
| } else { | |||||
| GELOGI("current group id is %s ,node go to input_stack back: %s", current_group_id.c_str(), | |||||
| node->GetName().c_str()); | |||||
| (void)stack_input.insert(stack_input.begin(), node); | |||||
| continue; | |||||
| } | |||||
| } | |||||
| CollectBreadthOutNode(node, map_in_edge_num, breadth_node_map); | |||||
| SplitNodeToStack(breadth_node_map, current_group_id, stack_input, group_stack, stack); | |||||
| breadth_node_map.clear(); | |||||
| // check the end of group | |||||
| if (IsGroupEnd(node)) { | |||||
| GELOGI("Current node %s is end of group %s.", node->GetName().c_str(), current_group_id.c_str()); | |||||
| current_group_id = ""; | |||||
| } | |||||
| } | |||||
| return GRAPH_SUCCESS; | |||||
| } | |||||
| graphStatus ComputeGraph::CollectBreadthOutNode(const NodePtr &node, std::map<NodePtr, uint32_t> &map_in_edge_num, | graphStatus ComputeGraph::CollectBreadthOutNode(const NodePtr &node, std::map<NodePtr, uint32_t> &map_in_edge_num, | ||||
| std::map<string, NodePtr> &breadth_node_map) { | std::map<string, NodePtr> &breadth_node_map) { | ||||
| for (const auto &anchor : node->GetAllOutDataAnchors()) { | for (const auto &anchor : node->GetAllOutDataAnchors()) { | ||||
| @@ -907,7 +804,11 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::Topolog | |||||
| } | } | ||||
| std::vector<std::shared_ptr<ComputeGraph>> subgraphs; | std::vector<std::shared_ptr<ComputeGraph>> subgraphs; | ||||
| (void)AllGraphNodes(subgraphs); | |||||
| auto nodes = AllGraphNodes(subgraphs); | |||||
| for (size_t i = 0; i < nodes.size(); i++) { | |||||
| NodePtr node = nodes.at(i); // [node: should not be null] | |||||
| node->GetOpDesc()->SetId(i); // [node->GetOpDesc(): should not be null] | |||||
| } | |||||
| if (sub_graph_.size() != subgraphs.size()) { // Graph Partition use subgraph, Keep original | if (sub_graph_.size() != subgraphs.size()) { // Graph Partition use subgraph, Keep original | ||||
| GELOGW("Keep original subgraph for graph size %zu not equal %zu.", sub_graph_.size(), subgraphs.size()); | GELOGW("Keep original subgraph for graph size %zu not equal %zu.", sub_graph_.size(), subgraphs.size()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -920,17 +821,10 @@ graphStatus ComputeGraph::TopologicalSortingGraph() { | |||||
| std::vector<NodePtr> node_vec; | std::vector<NodePtr> node_vec; | ||||
| std::map<NodePtr, uint32_t> map_in_edge_num; | std::map<NodePtr, uint32_t> map_in_edge_num; | ||||
| bool use_BFS = IsUseBFS(); | bool use_BFS = IsUseBFS(); | ||||
| bool is_tailing_optimization = IsTailingOptimization(); | |||||
| if (use_BFS) { | if (use_BFS) { | ||||
| std::deque<NodePtr> stack; | std::deque<NodePtr> stack; | ||||
| if (is_tailing_optimization) { | |||||
| if (BFSTopologicalSortingWithGroup(node_vec, map_in_edge_num, stack) != GRAPH_SUCCESS) { | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| } else { | |||||
| if (BFSTopologicalSorting(node_vec, map_in_edge_num, stack) != GRAPH_SUCCESS) { | |||||
| return GRAPH_FAILED; | |||||
| } | |||||
| if (BFSTopologicalSorting(node_vec, map_in_edge_num, stack) != GRAPH_SUCCESS) { | |||||
| return GRAPH_FAILED; | |||||
| } | } | ||||
| } else { | } else { | ||||
| std::vector<NodePtr> stack; | std::vector<NodePtr> stack; | ||||
| @@ -41,7 +41,7 @@ using namespace ge; | |||||
| using namespace std; | using namespace std; | ||||
| namespace ge { | namespace ge { | ||||
| namespace { | namespace { | ||||
| static const std::unordered_set<string> kChangeDimNodes = {RESHAPE, PERMUTE, EXPANDDIMS, SQUEEZE}; | |||||
| static const std::unordered_set<string> kChangeDimNodes = {PERMUTE, EXPANDDIMS, SQUEEZE}; | |||||
| static bool net_format_is_nd = true; | static bool net_format_is_nd = true; | ||||
| static Format g_user_set_format = FORMAT_ND; | static Format g_user_set_format = FORMAT_ND; | ||||
| static bool is_first_infer = true; | static bool is_first_infer = true; | ||||
| @@ -118,6 +118,9 @@ const std::string ATTR_NAME_NAN_OPT = "nan_opt"; | |||||
| const std::string ATTR_NAME_AIPP = "aipp"; | const std::string ATTR_NAME_AIPP = "aipp"; | ||||
| const std::string NEW_AIPP_CONV_OP = "new_conv_op_for_aipp"; | const std::string NEW_AIPP_CONV_OP = "new_conv_op_for_aipp"; | ||||
| const std::string ATTR_NAME_AIPP_INPUTS = "_aipp_inputs"; | |||||
| const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs"; | |||||
| const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id"; | const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id"; | ||||
| const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name"; | const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name"; | ||||
| @@ -150,6 +153,7 @@ const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG = "need_stream_cycle_event"; | |||||
| const std::string ATTR_NAME_RTSWITCH_RECV_EVENT_ID = "rtswitch_event_id"; | const std::string ATTR_NAME_RTSWITCH_RECV_EVENT_ID = "rtswitch_event_id"; | ||||
| const std::string ATTR_NAME_AUTOMIC_ADD_START = "automic_add_addr_start"; | const std::string ATTR_NAME_AUTOMIC_ADD_START = "automic_add_addr_start"; | ||||
| const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size"; | const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size"; | ||||
| const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims"; | |||||
| // To be deleted | // To be deleted | ||||
| const std::string ATTR_TO_BE_DELETED = "to_be_deleted"; | const std::string ATTR_TO_BE_DELETED = "to_be_deleted"; | ||||
| @@ -1000,7 +1004,7 @@ const std::string ATTR_NAME_FUSION_TYPE_LIST = "_fusion_type_list"; | |||||
| const std::string ATTR_NAME_VALID_INPUT_SHAPE_LIST_LIST = "_valid_input_shape_list_list"; | const std::string ATTR_NAME_VALID_INPUT_SHAPE_LIST_LIST = "_valid_input_shape_list_list"; | ||||
| const std::string ATTR_NAME_VALID_OUTPUT_SHAPE_LIST_LIST = "_valid_output_shape_list_list"; | const std::string ATTR_NAME_VALID_OUTPUT_SHAPE_LIST_LIST = "_valid_output_shape_list_list"; | ||||
| const std::string ATTR_NAME_SLICE_INPUT_OFFSET_LIST_LIST = "_input_offset_list_list"; | const std::string ATTR_NAME_SLICE_INPUT_OFFSET_LIST_LIST = "_input_offset_list_list"; | ||||
| const std::string ATTR_NAME_SLICE_OUTPUT_OFFSET_LIST_LIST = "_input_offset_list_list"; | |||||
| const std::string ATTR_NAME_SLICE_OUTPUT_OFFSET_LIST_LIST = "_output_offset_list_list"; | |||||
| // used for Horovod | // used for Horovod | ||||
| const std::string ATTR_INTER_EVENT_IDENTIFY = "event_id"; | const std::string ATTR_INTER_EVENT_IDENTIFY = "event_id"; | ||||
| @@ -1233,6 +1233,11 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDescPtr AttrUtils::CloneOpDesc( | |||||
| GELOGE(GRAPH_FAILED, "DelAttr _opt_input failed."); | GELOGE(GRAPH_FAILED, "DelAttr _opt_input failed."); | ||||
| } | } | ||||
| } | } | ||||
| if (!op_desc->output_name_idx_.empty()) { | |||||
| op_desc->output_name_idx_.clear(); | |||||
| } | |||||
| return op_desc; | return op_desc; | ||||
| } | } | ||||
| @@ -464,6 +464,24 @@ void GeTensorDesc::SetFormat(Format format) { | |||||
| } | } | ||||
| } | } | ||||
| void GeTensorDesc::SetName(const std::string &name) { | |||||
| auto tensor_descriptor_msg = tensor_descriptor_.GetProtoMsg(); | |||||
| if (tensor_descriptor_msg != nullptr) { | |||||
| tensor_descriptor_msg->set_name(name); | |||||
| return; | |||||
| } | |||||
| GELOGW("[SetName]tensor_descriptor_msg is null."); | |||||
| } | |||||
| const std::string GeTensorDesc::GetName() const { | |||||
| auto tensor_descriptor_msg = tensor_descriptor_.GetProtoMsg(); | |||||
| if (tensor_descriptor_msg != nullptr) { | |||||
| return tensor_descriptor_msg->name(); | |||||
| } | |||||
| GELOGW("[GetName]tensor_descriptor_msg is null."); | |||||
| return ""; | |||||
| } | |||||
| Format GeTensorDesc::GetOriginFormat() const { | Format GeTensorDesc::GetOriginFormat() const { | ||||
| std::string origin_format_str; | std::string origin_format_str; | ||||
| if (!AttrUtils::GetStr(this, TENSOR_UTILS_ORIGIN_FORMAT, origin_format_str)) { | if (!AttrUtils::GetStr(this, TENSOR_UTILS_ORIGIN_FORMAT, origin_format_str)) { | ||||
| @@ -0,0 +1,182 @@ | |||||
| LOCAL_PATH := $(call my-dir) | |||||
| COMMON_LOCAL_SRC_FILES := \ | |||||
| ./proto/om.proto \ | |||||
| ./proto/ge_ir.proto \ | |||||
| ./proto/ge_onnx.proto \ | |||||
| ./proto/insert_op.proto \ | |||||
| ./proto/task.proto \ | |||||
| ./proto/fwk_adapter.proto \ | |||||
| ./proto/op_mapping_info.proto \ | |||||
| ./anchor.cc \ | |||||
| ./ge_attr_value.cc \ | |||||
| ./attr_value.cc \ | |||||
| ./buffer.cc \ | |||||
| ./compute_graph.cc \ | |||||
| ./graph.cc \ | |||||
| ./inference_context.cc \ | |||||
| ./shape_refiner.cc \ | |||||
| ./format_refiner.cc \ | |||||
| ./ref_relation.cc \ | |||||
| ./model.cc \ | |||||
| ./model_serialize.cc \ | |||||
| ./node.cc \ | |||||
| ./op_desc.cc \ | |||||
| ./operator.cc \ | |||||
| ./operator_factory.cc \ | |||||
| ./operator_factory_impl.cc \ | |||||
| ./ge_attr_define.cc \ | |||||
| ./ge_tensor.cc \ | |||||
| ./detail/attributes_holder.cc \ | |||||
| ./utils/anchor_utils.cc \ | |||||
| ./utils/graph_utils.cc \ | |||||
| ./utils/ge_ir_utils.cc \ | |||||
| ./utils/node_utils.cc \ | |||||
| ./utils/op_desc_utils.cc \ | |||||
| ./utils/type_utils.cc \ | |||||
| ./utils/tensor_utils.cc \ | |||||
| ./tensor.cc \ | |||||
| ./debug/graph_debug.cc \ | |||||
| ./opsproto/opsproto_manager.cc \ | |||||
| ../ops/op_imp.cpp \ | |||||
| option/ge_context.cc \ | |||||
| option/ge_local_context.cc \ | |||||
| ./runtime_inference_context.cc \ | |||||
| COMMON_LOCAL_C_INCLUDES := \ | |||||
| proto/om.proto \ | |||||
| proto/ge_ir.proto \ | |||||
| proto_inner/ge_onnx.proto \ | |||||
| proto/insert_op.proto \ | |||||
| proto/task.proto \ | |||||
| proto/fwk_adapter.proto \ | |||||
| proto/op_mapping_info.proto \ | |||||
| inc \ | |||||
| inc/external \ | |||||
| inc/external/graph \ | |||||
| inc/graph \ | |||||
| inc/common \ | |||||
| common \ | |||||
| common/graph \ | |||||
| third_party/protobuf/include \ | |||||
| libc_sec/include \ | |||||
| ops/built-in/op_proto/inc \ | |||||
| #compiler for host | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libgraph | |||||
| LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2 | |||||
| LOCAL_CPPFLAGS += -fexceptions | |||||
| LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libprotobuf \ | |||||
| libslog \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| LOCAL_MULTILIB := 64 | |||||
| LOCAL_PROPRIETARY_MODULE := true | |||||
| include $(BUILD_HOST_SHARED_LIBRARY) | |||||
| #compiler for device | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libgraph | |||||
| LOCAL_CFLAGS += -O2 | |||||
| LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libprotobuf \ | |||||
| libslog \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| ifeq ($(device_os),android) | |||||
| LOCAL_LDFLAGS := -ldl | |||||
| endif | |||||
| LOCAL_MULTILIB := 64 | |||||
| LOCAL_PROPRIETARY_MODULE := true | |||||
| include $(BUILD_SHARED_LIBRARY) | |||||
| # compile for ut/st | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libgraph | |||||
| LOCAL_CFLAGS += | |||||
| LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libprotobuf \ | |||||
| libslog \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| LOCAL_MULTILIB := 64 | |||||
| LOCAL_PROPRIETARY_MODULE := true | |||||
| include $(BUILD_LLT_SHARED_LIBRARY) | |||||
| #compiler for host static lib | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libgraph | |||||
| LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2 | |||||
| LOCAL_CPPFLAGS += -fexceptions | |||||
| LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) | |||||
| LOCAL_STATIC_LIBRARIES := \ | |||||
| libprotobuf \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| LOCAL_MULTILIB := 64 | |||||
| LOCAL_PROPRIETARY_MODULE := true | |||||
| include $(BUILD_HOST_STATIC_LIBRARY) | |||||
| #compiler for device static lib | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libgraph | |||||
| LOCAL_CFLAGS += -O2 | |||||
| LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) | |||||
| LOCAL_STATIC_LIBRARIES := \ | |||||
| libprotobuf \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| LOCAL_MULTILIB := 64 | |||||
| LOCAL_PROPRIETARY_MODULE := true | |||||
| include $(BUILD_STATIC_LIBRARY) | |||||
| @@ -130,6 +130,16 @@ bool ModelSerializeImp::SerializeOpDesc(const ConstOpDescPtr &op_desc, proto::Op | |||||
| for (const std::string &name : op_desc->GetSubgraphInstanceNames()) { | for (const std::string &name : op_desc->GetSubgraphInstanceNames()) { | ||||
| op_def_proto->add_subgraph_name(name); | op_def_proto->add_subgraph_name(name); | ||||
| } | } | ||||
| proto::AttrDef key; | |||||
| proto::AttrDef value; | |||||
| for (auto &item : op_desc->output_name_idx_) { | |||||
| key.mutable_list()->add_s(item.first); | |||||
| value.mutable_list()->add_i(item.second); | |||||
| } | |||||
| auto op_desc_attr = op_def_proto->mutable_attr(); | |||||
| op_desc_attr->insert({"_output_name_key", key}); | |||||
| op_desc_attr->insert({"_output_name_value", value}); | |||||
| } | } | ||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -228,6 +238,25 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ModelSerializeImp::Unseriali | |||||
| } | } | ||||
| bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_def_proto) { | bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_def_proto) { | ||||
| std::vector<string> key; | |||||
| std::vector<uint32_t> value; | |||||
| if (op_def_proto.attr().count("_output_name_key") > 0) { | |||||
| auto &output_name_key_list = op_def_proto.attr().at("_output_name_key").list(); | |||||
| for (const auto &item_s : output_name_key_list.s()) { | |||||
| key.push_back(item_s); | |||||
| } | |||||
| auto op_desc_attr = op_def_proto.mutable_attr(); | |||||
| op_desc_attr->erase("_output_name_key"); | |||||
| } | |||||
| if (op_def_proto.attr().count("_output_name_value") > 0) { | |||||
| auto &output_name_value_list = op_def_proto.attr().at("_output_name_value").list(); | |||||
| for (const auto &item_i : output_name_value_list.i()) { | |||||
| value.push_back(static_cast<uint32_t>(item_i)); | |||||
| } | |||||
| auto op_desc_attr = op_def_proto.mutable_attr(); | |||||
| op_desc_attr->erase("_output_name_value"); | |||||
| } | |||||
| op_desc = std::shared_ptr<OpDesc>(new (std::nothrow) OpDesc(protobuf_owner_, &op_def_proto)); | op_desc = std::shared_ptr<OpDesc>(new (std::nothrow) OpDesc(protobuf_owner_, &op_def_proto)); | ||||
| GE_CHK_BOOL_EXEC(op_desc != nullptr, return false, "op_desc is nullptr."); | GE_CHK_BOOL_EXEC(op_desc != nullptr, return false, "op_desc is nullptr."); | ||||
| @@ -253,6 +282,16 @@ bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_d | |||||
| op_desc->SetSubgraphInstanceName(graph_index++, name); | op_desc->SetSubgraphInstanceName(graph_index++, name); | ||||
| } | } | ||||
| if (key.size() != 0) { | |||||
| if (key.size() != value.size()) { | |||||
| GELOGE(GRAPH_FAILED, "twe vector size is different. key_size: %zu, value_size: %zu.", key.size(), value.size()); | |||||
| } else { | |||||
| for (uint32_t i = 0; i < key.size(); ++i) { | |||||
| op_desc->output_name_idx_.insert(std::pair<string, uint32_t>(key.at(i), value.at(i))); | |||||
| } | |||||
| } | |||||
| } | |||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -0,0 +1,3 @@ | |||||
| LOCAL_PATH := $(call my-dir) | |||||
| include $(LOCAL_PATH)/graph.mk | |||||
| @@ -589,6 +589,7 @@ GeTensorDesc TensorAdapter::TensorDesc2GeTensorDesc(const TensorDesc &tensor_des | |||||
| tensor_desc.GetDataType()); | tensor_desc.GetDataType()); | ||||
| ge_tensor_desc.SetOriginShape(GeShape(tensor_desc.GetOriginShape().GetDims())); | ge_tensor_desc.SetOriginShape(GeShape(tensor_desc.GetOriginShape().GetDims())); | ||||
| ge_tensor_desc.SetOriginFormat(tensor_desc.GetOriginFormat()); | ge_tensor_desc.SetOriginFormat(tensor_desc.GetOriginFormat()); | ||||
| ge_tensor_desc.SetName(tensor_desc.GetName()); | |||||
| std::vector<std::pair<int64_t, int64_t>> shape_range; | std::vector<std::pair<int64_t, int64_t>> shape_range; | ||||
| auto status = tensor_desc.GetShapeRange(shape_range); | auto status = tensor_desc.GetShapeRange(shape_range); | ||||
| if (status != GRAPH_SUCCESS) { | if (status != GRAPH_SUCCESS) { | ||||
| @@ -613,6 +614,7 @@ TensorDesc TensorAdapter::GeTensorDesc2TensorDesc(const GeTensorDesc &ge_tensor_ | |||||
| ge_tensor_desc.GetDataType()); | ge_tensor_desc.GetDataType()); | ||||
| tensor_desc.SetOriginShape(Shape(ge_tensor_desc.GetOriginShape().GetDims())); | tensor_desc.SetOriginShape(Shape(ge_tensor_desc.GetOriginShape().GetDims())); | ||||
| tensor_desc.SetOriginFormat(ge_tensor_desc.GetOriginFormat()); | tensor_desc.SetOriginFormat(ge_tensor_desc.GetOriginFormat()); | ||||
| tensor_desc.SetName(ge_tensor_desc.GetName()); | |||||
| std::vector<std::pair<int64_t, int64_t>> shape_range; | std::vector<std::pair<int64_t, int64_t>> shape_range; | ||||
| auto status = ge_tensor_desc.GetShapeRange(shape_range); | auto status = ge_tensor_desc.GetShapeRange(shape_range); | ||||
| if (status != GRAPH_SUCCESS) { | if (status != GRAPH_SUCCESS) { | ||||
| @@ -1336,7 +1336,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ComputeGraphPtr GraphUtils::FindR | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| graphStatus GraphUtils::GetRefMapping(const ComputeGraphPtr &graph, | graphStatus GraphUtils::GetRefMapping(const ComputeGraphPtr &graph, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol) { | std::map<std::string, std::string> &anchor_to_symbol) { | ||||
| GE_CHECK_NOTNULL(graph); | GE_CHECK_NOTNULL(graph); | ||||
| for (auto &node : graph->GetAllNodes()) { | for (auto &node : graph->GetAllNodes()) { | ||||
| @@ -1384,7 +1384,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr GraphUtils::FindNodeFromA | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node, | graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol) { | std::map<std::string, std::string> &anchor_to_symbol) { | ||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| @@ -1402,7 +1402,7 @@ graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node, | |||||
| } | } | ||||
| for (auto &in_data_anchor : node->GetAllInDataAnchors()) { | for (auto &in_data_anchor : node->GetAllInDataAnchors()) { | ||||
| NodeIndexIO cur_node_info = NodeIndexIO(node, in_data_anchor->GetIdx(), kIn); | |||||
| NodeIndexIO cur_node_info(node, in_data_anchor->GetIdx(), kIn); | |||||
| OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | ||||
| if (peer_out_anchor == nullptr) { | if (peer_out_anchor == nullptr) { | ||||
| std::string symbol = cur_node_info.ToString(); | std::string symbol = cur_node_info.ToString(); | ||||
| @@ -1410,7 +1410,7 @@ graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node, | |||||
| symbol_to_anchors[symbol] = {cur_node_info}; | symbol_to_anchors[symbol] = {cur_node_info}; | ||||
| anchor_to_symbol[symbol] = symbol; | anchor_to_symbol[symbol] = symbol; | ||||
| } else { | } else { | ||||
| NodeIndexIO exist_node_info = NodeIndexIO(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut); | |||||
| NodeIndexIO exist_node_info(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut); | |||||
| if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { | if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { | ||||
| GE_LOGE("Update symbol mapping failed."); | GE_LOGE("Update symbol mapping failed."); | ||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| @@ -1429,18 +1429,18 @@ graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node, | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| graphStatus GraphUtils::HandleOutAnchorMapping(const NodePtr &node, | graphStatus GraphUtils::HandleOutAnchorMapping(const NodePtr &node, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol) { | std::map<std::string, std::string> &anchor_to_symbol) { | ||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { | for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { | ||||
| NodeIndexIO cur_node_info = NodeIndexIO(node, out_data_anchor->GetIdx(), kOut); | |||||
| NodeIndexIO cur_node_info(node, out_data_anchor->GetIdx(), kOut); | |||||
| if (anchor_to_symbol.find(cur_node_info.ToString()) != anchor_to_symbol.end()) { | if (anchor_to_symbol.find(cur_node_info.ToString()) != anchor_to_symbol.end()) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| int32_t reuse_in_index = -1; | int32_t reuse_in_index = -1; | ||||
| if (IsRefFromInput(out_data_anchor, reuse_in_index)) { | if (IsRefFromInput(out_data_anchor, reuse_in_index)) { | ||||
| NodeIndexIO exist_node_info = NodeIndexIO(node, reuse_in_index, kIn); | |||||
| NodeIndexIO exist_node_info(node, reuse_in_index, kIn); | |||||
| if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { | if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { | ||||
| GE_LOGE("Update symbol mapping failed."); | GE_LOGE("Update symbol mapping failed."); | ||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| @@ -1448,7 +1448,7 @@ graphStatus GraphUtils::HandleOutAnchorMapping(const NodePtr &node, | |||||
| } else { | } else { | ||||
| std::string symbol = cur_node_info.ToString(); | std::string symbol = cur_node_info.ToString(); | ||||
| GELOGD("Add anchor %s, symbol %s.", cur_node_info.ToString().c_str(), symbol.c_str()); | GELOGD("Add anchor %s, symbol %s.", cur_node_info.ToString().c_str(), symbol.c_str()); | ||||
| symbol_to_anchors.emplace(std::make_pair(symbol, std::vector<NodeIndexIO>{cur_node_info})); | |||||
| symbol_to_anchors.emplace(std::make_pair(symbol, std::list<NodeIndexIO>{cur_node_info})); | |||||
| anchor_to_symbol.emplace(std::make_pair(symbol, symbol)); | anchor_to_symbol.emplace(std::make_pair(symbol, symbol)); | ||||
| } | } | ||||
| } | } | ||||
| @@ -1464,7 +1464,7 @@ graphStatus GraphUtils::HandleOutAnchorMapping(const NodePtr &node, | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| graphStatus GraphUtils::HandleSubgraphInput(const NodePtr &node, | graphStatus GraphUtils::HandleSubgraphInput(const NodePtr &node, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol) { | std::map<std::string, std::string> &anchor_to_symbol) { | ||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| @@ -1482,8 +1482,8 @@ graphStatus GraphUtils::HandleSubgraphInput(const NodePtr &node, | |||||
| OutDataAnchorPtr peer_out_anchor = parent_in_anchor->GetPeerOutAnchor(); | OutDataAnchorPtr peer_out_anchor = parent_in_anchor->GetPeerOutAnchor(); | ||||
| if (peer_out_anchor != nullptr) { | if (peer_out_anchor != nullptr) { | ||||
| // Data has and only has one input | // Data has and only has one input | ||||
| NodeIndexIO cur_node_info = NodeIndexIO(node, 0, kIn); | |||||
| NodeIndexIO exist_node_info = NodeIndexIO(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut); | |||||
| NodeIndexIO cur_node_info(node, 0, kIn); | |||||
| NodeIndexIO exist_node_info(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut); | |||||
| if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { | if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { | ||||
| GE_LOGE("Update symbol mapping failed."); | GE_LOGE("Update symbol mapping failed."); | ||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| @@ -1501,7 +1501,7 @@ graphStatus GraphUtils::HandleSubgraphInput(const NodePtr &node, | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| graphStatus GraphUtils::HandleMergeInput(const NodePtr &node, | graphStatus GraphUtils::HandleMergeInput(const NodePtr &node, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol) { | std::map<std::string, std::string> &anchor_to_symbol) { | ||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| std::vector<NodeIndexIO> exist_node_infos; | std::vector<NodeIndexIO> exist_node_infos; | ||||
| @@ -1574,7 +1574,7 @@ graphStatus GraphUtils::HandleMergeInput(const NodePtr &node, | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node, | graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol) { | std::map<std::string, std::string> &anchor_to_symbol) { | ||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph(); | ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph(); | ||||
| @@ -1595,8 +1595,8 @@ graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node, | |||||
| } | } | ||||
| GE_CHECK_NOTNULL(parent_node->GetOutDataAnchor(index)); | GE_CHECK_NOTNULL(parent_node->GetOutDataAnchor(index)); | ||||
| // Union symbol of peer_out_anchor & parent_out_anchor | // Union symbol of peer_out_anchor & parent_out_anchor | ||||
| NodeIndexIO peer_node_info = NodeIndexIO(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut); | |||||
| NodeIndexIO parent_node_info = NodeIndexIO(parent_node, index, kOut); | |||||
| NodeIndexIO peer_node_info(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut); | |||||
| NodeIndexIO parent_node_info(parent_node, index, kOut); | |||||
| std::string symbol; | std::string symbol; | ||||
| if ((UnionSymbolMapping(peer_node_info, parent_node_info, symbol_to_anchors, anchor_to_symbol, symbol) != | if ((UnionSymbolMapping(peer_node_info, parent_node_info, symbol_to_anchors, anchor_to_symbol, symbol) != | ||||
| GRAPH_SUCCESS) || | GRAPH_SUCCESS) || | ||||
| @@ -1606,7 +1606,7 @@ graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node, | |||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| } | } | ||||
| NodeIndexIO cur_node_info = NodeIndexIO(node, in_data_anchor->GetIdx(), kIn); | |||||
| NodeIndexIO cur_node_info(node, in_data_anchor->GetIdx(), kIn); | |||||
| GELOGD("Add anchor %s, symbol %s.", cur_node_info.ToString().c_str(), symbol.c_str()); | GELOGD("Add anchor %s, symbol %s.", cur_node_info.ToString().c_str(), symbol.c_str()); | ||||
| symbol_to_anchors[symbol].emplace_back(cur_node_info); | symbol_to_anchors[symbol].emplace_back(cur_node_info); | ||||
| anchor_to_symbol.emplace(std::make_pair(cur_node_info.ToString(), symbol)); | anchor_to_symbol.emplace(std::make_pair(cur_node_info.ToString(), symbol)); | ||||
| @@ -1625,7 +1625,7 @@ graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node, | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| graphStatus GraphUtils::UnionSymbolMapping(const NodeIndexIO &exist_node_info1, const NodeIndexIO &exist_node_info2, | graphStatus GraphUtils::UnionSymbolMapping(const NodeIndexIO &exist_node_info1, const NodeIndexIO &exist_node_info2, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol, std::string &symbol) { | std::map<std::string, std::string> &anchor_to_symbol, std::string &symbol) { | ||||
| std::string symbol1 = anchor_to_symbol[exist_node_info1.ToString()]; | std::string symbol1 = anchor_to_symbol[exist_node_info1.ToString()]; | ||||
| std::string symbol2 = anchor_to_symbol[exist_node_info2.ToString()]; | std::string symbol2 = anchor_to_symbol[exist_node_info2.ToString()]; | ||||
| @@ -1675,7 +1675,7 @@ graphStatus GraphUtils::UnionSymbolMapping(const NodeIndexIO &exist_node_info1, | |||||
| /// @return success: GRAPH_SUCESS | /// @return success: GRAPH_SUCESS | ||||
| /// | /// | ||||
| graphStatus GraphUtils::UpdateRefMapping(const NodeIndexIO &cur_node_info, const NodeIndexIO &exist_node_info, | graphStatus GraphUtils::UpdateRefMapping(const NodeIndexIO &cur_node_info, const NodeIndexIO &exist_node_info, | ||||
| std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors, | |||||
| std::map<std::string, std::string> &anchor_to_symbol) { | std::map<std::string, std::string> &anchor_to_symbol) { | ||||
| auto iter1 = anchor_to_symbol.find(exist_node_info.ToString()); | auto iter1 = anchor_to_symbol.find(exist_node_info.ToString()); | ||||
| if (iter1 == anchor_to_symbol.end()) { | if (iter1 == anchor_to_symbol.end()) { | ||||
| @@ -524,7 +524,6 @@ OpDescPtr OpDescUtils::CreateConstOp(const GeTensorPtr &tensor_ptr) { | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| GE_CHK_BOOL_EXEC(const_opdesc != nullptr, return nullptr, "const_opdesc is nullptr!"); | |||||
| CHECK_FALSE_EXEC(SetWeights(const_opdesc, tensor_ptr) == ge::GRAPH_SUCCESS, return nullptr); | CHECK_FALSE_EXEC(SetWeights(const_opdesc, tensor_ptr) == ge::GRAPH_SUCCESS, return nullptr); | ||||
| const_opdesc->SetType(CONSTANT); | const_opdesc->SetType(CONSTANT); | ||||
| @@ -273,6 +273,7 @@ static graphStatus CalcTensorElementCnt(const std::vector<int64_t> &dims, Format | |||||
| case FORMAT_FRACTAL_Z: | case FORMAT_FRACTAL_Z: | ||||
| graph_status = CalcElementCntOfFractalZ(dims, data_type, element_cnt); | graph_status = CalcElementCntOfFractalZ(dims, data_type, element_cnt); | ||||
| break; | break; | ||||
| case FORMAT_NC1HWC0_C04: | |||||
| case FORMAT_FRACTAL_NZ: | case FORMAT_FRACTAL_NZ: | ||||
| case FORMAT_FRACTAL_ZZ: | case FORMAT_FRACTAL_ZZ: | ||||
| case FORMAT_NDHWC: | case FORMAT_NDHWC: | ||||
| @@ -283,6 +284,7 @@ static graphStatus CalcTensorElementCnt(const std::vector<int64_t> &dims, Format | |||||
| case FORMAT_FRACTAL_Z_3D_TRANSPOSE: | case FORMAT_FRACTAL_Z_3D_TRANSPOSE: | ||||
| case FORMAT_NDC1HWC0: | case FORMAT_NDC1HWC0: | ||||
| case FORMAT_FRACTAL_Z_C04: | case FORMAT_FRACTAL_Z_C04: | ||||
| case FORMAT_FRACTAL_ZN_LSTM: | |||||
| graph_status = CalcElementCntByDims(dims, element_cnt); | graph_status = CalcElementCntByDims(dims, element_cnt); | ||||
| break; | break; | ||||
| default: | default: | ||||
| @@ -59,6 +59,7 @@ static const std::map<Format, std::string> kFormatToStringMap = { | |||||
| {FORMAT_CN, "CN"}, | {FORMAT_CN, "CN"}, | ||||
| {FORMAT_NC, "NC"}, | {FORMAT_NC, "NC"}, | ||||
| {FORMAT_FRACTAL_ZN_LSTM, "FRACTAL_ZN_LSTM"}, | {FORMAT_FRACTAL_ZN_LSTM, "FRACTAL_ZN_LSTM"}, | ||||
| {FORMAT_FRACTAL_Z_G, "FRACTAL_Z_G"}, | |||||
| {FORMAT_RESERVED, "FORMAT_RESERVED"}, | {FORMAT_RESERVED, "FORMAT_RESERVED"}, | ||||
| {FORMAT_ALL, "ALL"}}; | {FORMAT_ALL, "ALL"}}; | ||||
| @@ -98,8 +99,9 @@ static const std::unordered_set<std::string> kInternalFormat = {"NC1HWC0", | |||||
| "FRACTAL_NZ", | "FRACTAL_NZ", | ||||
| "NDC1HWC0", | "NDC1HWC0", | ||||
| "FORMAT_FRACTAL_Z_3D", | "FORMAT_FRACTAL_Z_3D", | ||||
| "FORMAT_FRACTAL_Z_3D_TRANSPOSE" | |||||
| "FORMAT_FRACTAL_ZN_LSTM"}; | |||||
| "FORMAT_FRACTAL_Z_3D_TRANSPOSE", | |||||
| "FORMAT_FRACTAL_ZN_LSTM", | |||||
| "FORMAT_FRACTAL_Z_G"}; | |||||
| static const std::map<std::string, Format> kDataFormatMap = { | static const std::map<std::string, Format> kDataFormatMap = { | ||||
| {"NCHW", FORMAT_NCHW}, {"NHWC", FORMAT_NHWC}, {"NDHWC", FORMAT_NDHWC}, {"NCDHW", FORMAT_NCDHW}, {"ND", FORMAT_ND}}; | {"NCHW", FORMAT_NCHW}, {"NHWC", FORMAT_NHWC}, {"NDHWC", FORMAT_NDHWC}, {"NCDHW", FORMAT_NCDHW}, {"ND", FORMAT_ND}}; | ||||
| @@ -143,6 +145,7 @@ static const std::map<std::string, Format> kStringToFormatMap = { | |||||
| {"CN", FORMAT_CN}, | {"CN", FORMAT_CN}, | ||||
| {"NC", FORMAT_NC}, | {"NC", FORMAT_NC}, | ||||
| {"FRACTAL_ZN_LSTM", FORMAT_FRACTAL_ZN_LSTM}, | {"FRACTAL_ZN_LSTM", FORMAT_FRACTAL_ZN_LSTM}, | ||||
| {"FRACTAL_Z_G", FORMAT_FRACTAL_Z_G}, | |||||
| {"FORMAT_RESERVED", FORMAT_RESERVED}, | {"FORMAT_RESERVED", FORMAT_RESERVED}, | ||||
| {"ALL", FORMAT_ALL}}; | {"ALL", FORMAT_ALL}}; | ||||
| @@ -235,6 +238,11 @@ static const std::map<ge::DataType, uint32_t> kDataTypeToLength = { | |||||
| {DT_RESOURCE, sizeof(uint64_t)}, | {DT_RESOURCE, sizeof(uint64_t)}, | ||||
| }; | }; | ||||
| static const std::map<domi::FrameworkType, std::string> kFmkTypeToString = { | |||||
| {domi::CAFFE, "caffe"}, {domi::MINDSPORE, "mindspore"}, {domi::TENSORFLOW, "tensorflow"}, | |||||
| {domi::ANDROID_NN, "android_nn"}, {domi::ONNX, "onnx"}, {domi::FRAMEWORK_RESERVED, "framework_reserved"}, | |||||
| }; | |||||
| bool TypeUtils::IsDataTypeValid(DataType dt) { | bool TypeUtils::IsDataTypeValid(DataType dt) { | ||||
| uint32_t num = static_cast<uint32_t>(dt); | uint32_t num = static_cast<uint32_t>(dt); | ||||
| GE_CHK_BOOL_EXEC((num <= DT_UNDEFINED), return false, "The DataType is invalid"); | GE_CHK_BOOL_EXEC((num <= DT_UNDEFINED), return false, "The DataType is invalid"); | ||||
| @@ -312,6 +320,16 @@ Format TypeUtils::DomiFormatToFormat(domi::domiTensorFormat_t domi_format) { | |||||
| return FORMAT_RESERVED; | return FORMAT_RESERVED; | ||||
| } | } | ||||
| std::string TypeUtils::FmkTypeToSerialString(domi::FrameworkType fmk_type) { | |||||
| auto it = kFmkTypeToString.find(fmk_type); | |||||
| if (it != kFmkTypeToString.end()) { | |||||
| return it->second; | |||||
| } else { | |||||
| GELOGW("Framework type not support %d.", fmk_type); | |||||
| return ""; | |||||
| } | |||||
| } | |||||
| static inline void CopyDataFromBuffer(vector<uint8_t> &data, const Buffer &buffer) { | static inline void CopyDataFromBuffer(vector<uint8_t> &data, const Buffer &buffer) { | ||||
| data.clear(); | data.clear(); | ||||
| if (buffer.GetData() != nullptr && buffer.GetSize() != 0) { | if (buffer.GetData() != nullptr && buffer.GetSize() != 0) { | ||||
| @@ -45,7 +45,7 @@ include_directories(${GE_SOURCE_DIR}/inc/external) | |||||
| include_directories(${GE_SOURCE_DIR}/inc/external/graph) | include_directories(${GE_SOURCE_DIR}/inc/external/graph) | ||||
| include_directories(${GE_SOURCE_DIR}/inc/framework) | include_directories(${GE_SOURCE_DIR}/inc/framework) | ||||
| include_directories(${GE_SOURCE_DIR}/inc/framework/common) | include_directories(${GE_SOURCE_DIR}/inc/framework/common) | ||||
| include_directories(${GE_SOURCE_DIR}/inc/runtime) | |||||
| include_directories(${GE_SOURCE_DIR}/inc/graph) | |||||
| include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib) | include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib) | ||||
| include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) | include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) | ||||
| include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) | include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) | ||||
| @@ -108,6 +108,10 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
| "graph/partition/engine_place.cc" | "graph/partition/engine_place.cc" | ||||
| "graph/partition/graph_partition.cc" | "graph/partition/graph_partition.cc" | ||||
| "graph/passes/*.cc" | "graph/passes/*.cc" | ||||
| "graph/preprocess/graph_preprocess.cc" | |||||
| "graph/preprocess/insert_op/ge_aipp_op.cc" | |||||
| "graph/preprocess/insert_op/util_insert_aipp_op.cc" | |||||
| "graph/preprocess/multi_batch_copy_graph.cc" | |||||
| "host_kernels/add_kernel.cc" | "host_kernels/add_kernel.cc" | ||||
| "host_kernels/broadcast_args_kernel.cc" | "host_kernels/broadcast_args_kernel.cc" | ||||
| "host_kernels/broadcast_gradient_args_kernel.cc" | "host_kernels/broadcast_gradient_args_kernel.cc" | ||||
| @@ -144,10 +148,7 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
| "host_kernels/transdata_kernel.cc" | "host_kernels/transdata_kernel.cc" | ||||
| "host_kernels/transpose_kernel.cc" | "host_kernels/transpose_kernel.cc" | ||||
| "host_kernels/unpack_kernel.cc" | "host_kernels/unpack_kernel.cc" | ||||
| "graph/preprocess/graph_preprocess.cc" | |||||
| "graph/preprocess/insert_op/ge_aipp_op.cc" | |||||
| "graph/preprocess/insert_op/util_insert_aipp_op.cc" | |||||
| "graph/preprocess/multi_batch_copy_graph.cc" | |||||
| "host_kernels/unsqueeze_kernel.cc" | |||||
| "hybrid/common/npu_memory_allocator.cc" | "hybrid/common/npu_memory_allocator.cc" | ||||
| "hybrid/common/tensor_value.cc" | "hybrid/common/tensor_value.cc" | ||||
| "hybrid/executor/*.cc" | "hybrid/executor/*.cc" | ||||
| @@ -155,6 +156,7 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
| "hybrid/hybrid_davinci_model.cc" | "hybrid/hybrid_davinci_model.cc" | ||||
| "hybrid/model/*.cc" | "hybrid/model/*.cc" | ||||
| "hybrid/node_executor/aicore/*.cc" | "hybrid/node_executor/aicore/*.cc" | ||||
| "hybrid/node_executor/aicpu/aicpu_ext_info.cc" | |||||
| "hybrid/node_executor/aicpu/aicpu_node_executor.cc" | "hybrid/node_executor/aicpu/aicpu_node_executor.cc" | ||||
| "hybrid/node_executor/compiledsubgraph/known_node_executor.cc" | "hybrid/node_executor/compiledsubgraph/known_node_executor.cc" | ||||
| "hybrid/node_executor/hostcpu/ge_local_node_executor.cc" | "hybrid/node_executor/hostcpu/ge_local_node_executor.cc" | ||||
| @@ -246,6 +248,10 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
| "graph/partition/engine_place.cc" | "graph/partition/engine_place.cc" | ||||
| "graph/partition/graph_partition.cc" | "graph/partition/graph_partition.cc" | ||||
| "graph/passes/*.cc" | "graph/passes/*.cc" | ||||
| "graph/preprocess/graph_preprocess.cc" | |||||
| "graph/preprocess/insert_op/ge_aipp_op.cc" | |||||
| "graph/preprocess/insert_op/util_insert_aipp_op.cc" | |||||
| "graph/preprocess/multi_batch_copy_graph.cc" | |||||
| "host_kernels/add_kernel.cc" | "host_kernels/add_kernel.cc" | ||||
| "host_kernels/broadcast_args_kernel.cc" | "host_kernels/broadcast_args_kernel.cc" | ||||
| "host_kernels/broadcast_gradient_args_kernel.cc" | "host_kernels/broadcast_gradient_args_kernel.cc" | ||||
| @@ -282,11 +288,8 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
| "host_kernels/transdata_kernel.cc" | "host_kernels/transdata_kernel.cc" | ||||
| "host_kernels/transpose_kernel.cc" | "host_kernels/transpose_kernel.cc" | ||||
| "host_kernels/unpack_kernel.cc" | "host_kernels/unpack_kernel.cc" | ||||
| "host_kernels/unsqueeze_kernel.cc" | |||||
| "hybrid/hybrid_davinci_model_stub.cc" | "hybrid/hybrid_davinci_model_stub.cc" | ||||
| "graph/preprocess/graph_preprocess.cc" | |||||
| "graph/preprocess/insert_op/ge_aipp_op.cc" | |||||
| "graph/preprocess/insert_op/util_insert_aipp_op.cc" | |||||
| "graph/preprocess/multi_batch_copy_graph.cc" | |||||
| "init/gelib.cc" | "init/gelib.cc" | ||||
| "ir_build/atc_ir_common.cc" | "ir_build/atc_ir_common.cc" | ||||
| "ir_build/ge_ir_build.cc" | "ir_build/ge_ir_build.cc" | ||||
| @@ -29,6 +29,7 @@ | |||||
| #include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
| #include "graph/manager/util/rt_context_util.h" | #include "graph/manager/util/rt_context_util.h" | ||||
| #include "register/op_registry.h" | #include "register/op_registry.h" | ||||
| #include "common/ge/tbe_plugin_manager.h" | |||||
| using domi::GetContext; | using domi::GetContext; | ||||
| using domi::OpRegistry; | using domi::OpRegistry; | ||||
| @@ -132,6 +133,9 @@ Status GEInitialize(const std::map<string, string> &options) { | |||||
| } | } | ||||
| GE_TIMESTAMP_END(CheckOptionsValid, "GEInitialize::CheckOptionsValid"); | GE_TIMESTAMP_END(CheckOptionsValid, "GEInitialize::CheckOptionsValid"); | ||||
| GE_TIMESTAMP_START(InitPreparation); | |||||
| TBEPluginManager::Instance().InitPreparation(options); | |||||
| GE_TIMESTAMP_END(InitPreparation, "GEInitialize::InitPreparation"); | |||||
| // call Initialize | // call Initialize | ||||
| GELOGT(TRACE_RUNNING, "Initializing environment"); | GELOGT(TRACE_RUNNING, "Initializing environment"); | ||||
| GE_TIMESTAMP_START(GELibInitialize); | GE_TIMESTAMP_START(GELibInitialize); | ||||
| @@ -178,6 +182,10 @@ Status GEFinalize() { | |||||
| ret = middle_ret; | ret = middle_ret; | ||||
| } | } | ||||
| } | } | ||||
| middle_ret = TBEPluginManager::Instance().Finalize(); | |||||
| if (middle_ret != SUCCESS) { | |||||
| ret = middle_ret; | |||||
| } | |||||
| if (kGeInitialized && ret == SUCCESS) { | if (kGeInitialized && ret == SUCCESS) { | ||||
| // Unified destruct rt_context | // Unified destruct rt_context | ||||
| @@ -262,10 +270,10 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph) { | |||||
| } | } | ||||
| Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options) { | Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options) { | ||||
| GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, sessinon_id: %lu.", graph_id, sessionId_); | |||||
| GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); | |||||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | ||||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | ||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Sesson."); | |||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| GELOGD("Adding graph to session"); | GELOGD("Adding graph to session"); | ||||
| @@ -340,7 +348,7 @@ void PrintOutputResult(std::vector<Tensor> &outputs) { | |||||
| GELOGI("output data[%zu]=%lf", i, *(reinterpret_cast<double *>(outputs[0].GetData()) + i)); | GELOGI("output data[%zu]=%lf", i, *(reinterpret_cast<double *>(outputs[0].GetData()) + i)); | ||||
| break; | break; | ||||
| default: | default: | ||||
| GELOGI("Output datatype %s is not support print.", TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
| GELOGI("Output datatype %s is not supported.", TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
| return; | return; | ||||
| } | } | ||||
| } | } | ||||
| @@ -378,6 +386,21 @@ Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc | |||||
| return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback); | return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback); | ||||
| } | } | ||||
| Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) { | |||||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||||
| if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); | |||||
| return FAILED; | |||||
| } | |||||
| GELOGT(TRACE_RUNNING, "Building Graph"); | |||||
| Status ret = instance_ptr->SessionManagerObj().BuildGraph(sessionId_, graph_id, inputs); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "Session BuildGraph failed"); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs, | Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs, | ||||
| RunAsyncCallback callback) { | RunAsyncCallback callback) { | ||||
| std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | ||||
| @@ -0,0 +1,111 @@ | |||||
| LOCAL_PATH := $(call my-dir) | |||||
| COMMON_LOCAL_SRC_FILES := \ | |||||
| proto/ge_api.proto \ | |||||
| ge_api.cc \ | |||||
| COMMON_LOCAL_C_INCLUDES := \ | |||||
| proto/ge_ir.proto \ | |||||
| proto/task.proto \ | |||||
| proto/om.proto \ | |||||
| proto/insert_op.proto \ | |||||
| $(LOCAL_PATH) ./ \ | |||||
| $(LOCAL_PATH)/../ \ | |||||
| $(LOCAL_PATH)/../../ \ | |||||
| $(TOPDIR)inc \ | |||||
| $(TOPDIR)inc/external \ | |||||
| $(TOPDIR)inc/external/graph \ | |||||
| $(TOPDIR)inc/common \ | |||||
| $(TOPDIR)inc/framework \ | |||||
| $(TOPDIR)inc/graph \ | |||||
| $(TOPDIR)libc_sec/include \ | |||||
| $(TOPDIR)ops/built-in/op_proto/inc \ | |||||
| third_party/json/include \ | |||||
| third_party/protobuf/include \ | |||||
| third_party/opencv/include \ | |||||
| DEVICE_LOCAL_C_INCLUDES := \ | |||||
| proto/ge_ir.proto \ | |||||
| proto/task.proto \ | |||||
| proto/om.proto \ | |||||
| proto/insert_op.proto \ | |||||
| $(LOCAL_PATH) ./ \ | |||||
| $(LOCAL_PATH)/../ \ | |||||
| $(LOCAL_PATH)/../../ \ | |||||
| $(TOPDIR)inc \ | |||||
| $(TOPDIR)inc/external \ | |||||
| $(TOPDIR)inc/external/graph \ | |||||
| $(TOPDIR)inc/framework \ | |||||
| $(TOPDIR)inc/common \ | |||||
| $(TOPDIR)inc/graph \ | |||||
| $(TOPDIR)libc_sec/include \ | |||||
| $(TOPDIR)ops/built-in/op_proto/inc \ | |||||
| third_party/json/include \ | |||||
| third_party/protobuf/include \ | |||||
| third_party/opencv/include \ | |||||
| #compiler for host infer | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_client | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 | |||||
| ifeq ($(DEBUG), 1) | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| endif | |||||
| LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libprotobuf \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| libge_compiler \ | |||||
| libge_common \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| LOCAL_SHARED_LIBRARIES += \ | |||||
| libruntime \ | |||||
| include $(BUILD_HOST_SHARED_LIBRARY) | |||||
| #compiler for device | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_client | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||||
| LOCAL_CFLAGS += -DOMG_DEVICE_VERSION -DREUSE_MEMORY=1 | |||||
| LOCAL_MODULE_CLASS := SHARED_LIBRARIES | |||||
| LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libprotobuf \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| libruntime \ | |||||
| libge_compiler \ | |||||
| libge_common \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| LOCAL_CFLAGS += \ | |||||
| -Wall | |||||
| include $(BUILD_SHARED_LIBRARY) | |||||
| @@ -40,9 +40,8 @@ Status FileSaver::OpenFile(int32_t &fd, const std::string &file_path) { | |||||
| } | } | ||||
| char real_path[PATH_MAX] = {0}; | char real_path[PATH_MAX] = {0}; | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_path.length() >= PATH_MAX, return FAILED, "File path is longer than PATH_MAX!"); | |||||
| GE_IF_BOOL_EXEC(realpath(file_path.c_str(), real_path) == nullptr, | GE_IF_BOOL_EXEC(realpath(file_path.c_str(), real_path) == nullptr, | ||||
| GELOGI("File %s is not exit, it will be created.", file_path.c_str())); | |||||
| GELOGI("File %s is not exist, it will be created.", file_path.c_str())); | |||||
| // Open file | // Open file | ||||
| mode_t mode = S_IRUSR | S_IWUSR; | mode_t mode = S_IRUSR | S_IWUSR; | ||||
| fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode); | fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode); | ||||
| @@ -50,13 +50,13 @@ PluginManager::~PluginManager() { ClearHandles_(); } | |||||
| string PluginManager::GetPath() { | string PluginManager::GetPath() { | ||||
| Dl_info dl_info; | Dl_info dl_info; | ||||
| if (dladdr(reinterpret_cast<void *>(&PluginManager::GetPath), &dl_info) == 0) { | if (dladdr(reinterpret_cast<void *>(&PluginManager::GetPath), &dl_info) == 0) { | ||||
| GELOGW("Failed to read so_path!"); | |||||
| GELOGW("Failed to read the shared library file path!"); | |||||
| return string(); | return string(); | ||||
| } else { | } else { | ||||
| std::string so_path = dl_info.dli_fname; | std::string so_path = dl_info.dli_fname; | ||||
| char path[PATH_MAX] = {0}; | char path[PATH_MAX] = {0}; | ||||
| if (so_path.length() >= PATH_MAX) { | if (so_path.length() >= PATH_MAX) { | ||||
| GELOGW("File path is too long!"); | |||||
| GELOGW("The shared library file path is too long!"); | |||||
| return string(); | return string(); | ||||
| } | } | ||||
| if (realpath(so_path.c_str(), path) == nullptr) { | if (realpath(so_path.c_str(), path) == nullptr) { | ||||
| @@ -93,11 +93,15 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||||
| std::vector<std::string> path_vec; | std::vector<std::string> path_vec; | ||||
| SplitPath(path, path_vec); | SplitPath(path, path_vec); | ||||
| for (const auto &single_path : path_vec) { | for (const auto &single_path : path_vec) { | ||||
| GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX, GELOGE(GE_PLGMGR_PATH_INVALID, "File path is too long!"); | |||||
| GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX, | |||||
| GELOGE(GE_PLGMGR_PATH_INVALID, "The shared library file path is too long!"); | |||||
| continue); | continue); | ||||
| // load break when number of loaded so reach maximum | // load break when number of loaded so reach maximum | ||||
| if (num_of_loaded_so >= kMaxNumOfSo) { | if (num_of_loaded_so >= kMaxNumOfSo) { | ||||
| GELOGW("Number of loaded so reaches maximum, only the first %d are loaded!", kMaxNumOfSo); | |||||
| GELOGW( | |||||
| "The number of dynamic libraries loaded exceeds the kMaxNumOfSo," | |||||
| " and only the first %d shared libraries will be loaded.", | |||||
| kMaxNumOfSo); | |||||
| break; | break; | ||||
| } | } | ||||
| @@ -110,11 +114,11 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||||
| int64_t file_size = 0; | int64_t file_size = 0; | ||||
| if (ValidateSo(file_path_dlopen, size_of_loaded_so, file_size) != SUCCESS) { | if (ValidateSo(file_path_dlopen, size_of_loaded_so, file_size) != SUCCESS) { | ||||
| GELOGW("Failed to validate so %s", file_path_dlopen.c_str()); | |||||
| GELOGW("Failed to validate the shared library: %s", file_path_dlopen.c_str()); | |||||
| continue; | continue; | ||||
| } | } | ||||
| GELOGI("dlopen so path name: %s. ", file_path_dlopen.c_str()); | |||||
| GELOGI("dlopen the shared library path name: %s.", file_path_dlopen.c_str()); | |||||
| // load continue when dlopen is failed | // load continue when dlopen is failed | ||||
| auto handle = dlopen(file_path_dlopen.c_str(), RTLD_NOW | RTLD_GLOBAL); | auto handle = dlopen(file_path_dlopen.c_str(), RTLD_NOW | RTLD_GLOBAL); | ||||
| @@ -128,14 +132,14 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||||
| for (const auto &func_name : func_check_list) { | for (const auto &func_name : func_check_list) { | ||||
| auto real_fn = (void (*)())dlsym(handle, func_name.c_str()); | auto real_fn = (void (*)())dlsym(handle, func_name.c_str()); | ||||
| if (real_fn == nullptr) { | if (real_fn == nullptr) { | ||||
| GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not exist!", func_name.c_str(), | |||||
| GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(), | |||||
| func_name.c_str()); | func_name.c_str()); | ||||
| is_valid = false; | is_valid = false; | ||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| if (!is_valid) { | if (!is_valid) { | ||||
| GE_LOGE_IF(dlclose(handle), "Failed to dlclose ret"); | |||||
| GE_LOGE_IF(dlclose(handle), "Failed to dlclose."); | |||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -146,13 +150,13 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||||
| num_of_loaded_so++; | num_of_loaded_so++; | ||||
| } | } | ||||
| GELOGI("load so total num %u", num_of_loaded_so); | |||||
| GELOGI("The total number of shared libraries loaded: %u", num_of_loaded_so); | |||||
| for (auto name : so_list_) { | for (auto name : so_list_) { | ||||
| GELOGI("load %s successfully", name.c_str()); | |||||
| GELOGI("load shared library %s successfully", name.c_str()); | |||||
| } | } | ||||
| if (num_of_loaded_so == 0) { | if (num_of_loaded_so == 0) { | ||||
| GELOGW("Failed to find any valid so in path %s!", path.c_str()); | |||||
| GELOGW("No loadable shared library found in the path: %s", path.c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -163,7 +167,7 @@ Status PluginManager::ValidateSo(const string &file_path, int64_t size_of_loaded | |||||
| // read file size | // read file size | ||||
| struct stat stat_buf; | struct stat stat_buf; | ||||
| if (stat(file_path.c_str(), &stat_buf) != 0) { | if (stat(file_path.c_str(), &stat_buf) != 0) { | ||||
| GELOGW("%s check fail.", file_path.c_str()); | |||||
| GELOGW("The shared library file check failed: %s", file_path.c_str()); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -178,8 +182,8 @@ Status PluginManager::ValidateSo(const string &file_path, int64_t size_of_loaded | |||||
| // load continue if the total size of so reaches maximum when it is loaded | // load continue if the total size of so reaches maximum when it is loaded | ||||
| if (size_of_loaded_so + file_size > kMaxSizeOfLoadedSo) { | if (size_of_loaded_so + file_size > kMaxSizeOfLoadedSo) { | ||||
| GELOGW( | GELOGW( | ||||
| "%s is skipped because the size of loaded so reaches maximum if it is load! " | |||||
| "(size: %ldB, size of loaded so: %ldB, maximum: %dB)", | |||||
| "%s is skipped because the size of loaded share library reaches maximum if it is loaded! " | |||||
| "(size: %ldB, size of loaded share library: %ldB, maximum: %dB)", | |||||
| file_path.c_str(), file_size, size_of_loaded_so, kMaxSizeOfLoadedSo); | file_path.c_str(), file_size, size_of_loaded_so, kMaxSizeOfLoadedSo); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -227,7 +231,10 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_ | |||||
| // load break when number of loaded so reach maximum | // load break when number of loaded so reach maximum | ||||
| if (num_of_loaded_so >= kMaxNumOfSo) { | if (num_of_loaded_so >= kMaxNumOfSo) { | ||||
| GELOGW("Number of loaded so reaches maximum, only the first %d are loaded!", kMaxNumOfSo); | |||||
| GELOGW( | |||||
| "The number of dynamic libraries loaded exceeds the kMaxNumOfSo," | |||||
| " and only the first %d shared libraries will be loaded.", | |||||
| kMaxNumOfSo); | |||||
| break; | break; | ||||
| } | } | ||||
| @@ -240,7 +247,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_ | |||||
| int64_t file_size = 0; | int64_t file_size = 0; | ||||
| if (ValidateSo(file_path_dlopen, size_of_loaded_so, file_size) != SUCCESS) { | if (ValidateSo(file_path_dlopen, size_of_loaded_so, file_size) != SUCCESS) { | ||||
| GELOGW("Failed to validate so %s", canonical_path_str.c_str()); | |||||
| GELOGW("Failed to validate the shared library: %s", canonical_path_str.c_str()); | |||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -266,8 +273,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_ | |||||
| } | } | ||||
| } | } | ||||
| if (!is_valid) { | if (!is_valid) { | ||||
| GE_LOGE_IF(dlclose(handle), "Dlclose ret fail"); | |||||
| GELOGW("Dlclose ret fail!"); | |||||
| GE_LOGE_IF(dlclose(handle), "Failed to dlclose."); | |||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -279,7 +285,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_ | |||||
| } | } | ||||
| closedir(dir); | closedir(dir); | ||||
| if (num_of_loaded_so == 0) { | if (num_of_loaded_so == 0) { | ||||
| GELOGW("Failed to find any valid so under %s!", path.c_str()); | |||||
| GELOGW("No loadable shared library found in the path: %s", path.c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -0,0 +1,293 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "common/ge/tbe_plugin_manager.h" | |||||
| #include <dirent.h> | |||||
| #include <unistd.h> | |||||
| #include <algorithm> | |||||
| #include <cstring> | |||||
| #include <fstream> | |||||
| #include <iostream> | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "common/ge/ge_util.h" | |||||
| #include "framework/common/debug/log.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "framework/common/ge_inner_error_codes.h" | |||||
| #include "framework/engine/dnnengine.h" | |||||
| #include "framework/omg/omg_inner_types.h" | |||||
| #include "external/ge/ge_api_types.h" | |||||
| #include "register/op_registry.h" | |||||
| #include "graph/opsproto_manager.h" | |||||
| #include "graph/utils/type_utils.h" | |||||
| namespace ge { | |||||
| std::map<string, string> TBEPluginManager::options_ = {}; | |||||
| // Get Singleton Instance | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY TBEPluginManager &TBEPluginManager::Instance() { | |||||
| static TBEPluginManager instance_ptr_; | |||||
| return instance_ptr_; | |||||
| } | |||||
| Status TBEPluginManager::ClearHandles_() { | |||||
| Status ret = SUCCESS; | |||||
| for (const auto &handle : handles_vec_) { | |||||
| if (dlclose(handle) != 0) { | |||||
| ret = FAILED; | |||||
| GELOGW("Failed to close handle: %s", dlerror()); | |||||
| } | |||||
| } | |||||
| handles_vec_.clear(); | |||||
| return ret; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status TBEPluginManager::Finalize() { | |||||
| Status ret = ClearHandles_(); | |||||
| return ret; | |||||
| } | |||||
| string TBEPluginManager::GetPath() { | |||||
| Dl_info dl_info; | |||||
| if (dladdr(reinterpret_cast<void *>(&TBEPluginManager::GetPath), &dl_info) == 0) { | |||||
| GELOGW("Failed to read so path!"); | |||||
| return string(); | |||||
| } else { | |||||
| string so_path = dl_info.dli_fname; | |||||
| char path[PATH_MAX] = {0}; | |||||
| if (so_path.length() >= PATH_MAX) { | |||||
| GELOGW("File path is too long!"); | |||||
| return string(); | |||||
| } | |||||
| if (realpath(so_path.c_str(), path) == nullptr) { | |||||
| GELOGW("Failed to get realpath of %s", so_path.c_str()); | |||||
| return string(); | |||||
| } | |||||
| so_path = path; | |||||
| so_path = so_path.substr(0, so_path.rfind('/') + 1); | |||||
| return so_path; | |||||
| } | |||||
| } | |||||
| void TBEPluginManager::ProcessSoFullName(vector<string> &file_list, string &caffe_parser_path, string &full_name, | |||||
| const string &caffe_parser_so_suff, const string &aicpu_so_suff, | |||||
| const string &aicpu_host_so_suff) { | |||||
| if (full_name.size() >= caffe_parser_so_suff.size() && | |||||
| full_name.compare(full_name.size() - caffe_parser_so_suff.size(), caffe_parser_so_suff.size(), | |||||
| caffe_parser_so_suff) == 0) { | |||||
| caffe_parser_path = full_name; | |||||
| } else if ((full_name.size() >= aicpu_so_suff.size() && | |||||
| full_name.compare(full_name.size() - aicpu_so_suff.size(), aicpu_so_suff.size(), aicpu_so_suff) == 0) || | |||||
| (full_name.size() >= aicpu_host_so_suff.size() && | |||||
| full_name.compare(full_name.size() - aicpu_host_so_suff.size(), aicpu_host_so_suff.size(), | |||||
| aicpu_host_so_suff) == 0)) { | |||||
| // aicpu so, Put the file path into the omgcontext and save into the model in the builder stage. | |||||
| domi::GetContext().aicpu_op_run_paths.push_back(full_name); | |||||
| } else { | |||||
| // Save parser so path into file_list vector | |||||
| file_list.push_back(full_name); | |||||
| } | |||||
| } | |||||
| void TBEPluginManager::FindParserSo(const string &path, vector<string> &file_list, string &caffe_parser_path) { | |||||
| // Path, change to absolute path | |||||
| string real_path = RealPath(path.c_str()); | |||||
| // Plugin path does not exist | |||||
| if (real_path.empty()) { | |||||
| GELOGW("RealPath is empty."); | |||||
| return; | |||||
| } | |||||
| struct stat stat_buf; | |||||
| if ((stat(real_path.c_str(), &stat_buf) != 0) || (!S_ISDIR(stat_buf.st_mode))) { | |||||
| GELOGW("%s is not a dir.", real_path.c_str()); | |||||
| return; | |||||
| } | |||||
| struct dirent *dent(0); | |||||
| DIR *dir = opendir(real_path.c_str()); | |||||
| // Plugin path does not exist | |||||
| if (dir == nullptr) { | |||||
| GELOGW("Open directory %s failed.", real_path.c_str()); | |||||
| return; | |||||
| } | |||||
| while ((dent = readdir(dir)) != nullptr) { | |||||
| if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) continue; | |||||
| string name = dent->d_name; | |||||
| string full_name = real_path + "/" + name; | |||||
| const string so_suff = ".so"; | |||||
| const string caffe_parser_so_suff = "lib_caffe_parser.so"; | |||||
| const string aicpu_so_suff = "_aicpu.so"; | |||||
| const string aicpu_host_so_suff = "_online.so"; | |||||
| if (name.size() >= so_suff.size() && name.compare(name.size() - so_suff.size(), so_suff.size(), so_suff) == 0) { | |||||
| ProcessSoFullName(file_list, caffe_parser_path, full_name, caffe_parser_so_suff, aicpu_so_suff, | |||||
| aicpu_host_so_suff); | |||||
| } else { | |||||
| FindParserSo(full_name, file_list, caffe_parser_path); | |||||
| } | |||||
| } | |||||
| closedir(dir); | |||||
| } | |||||
| void TBEPluginManager::GetPluginSoFileList(const string &path, vector<string> &file_list, string &caffe_parser_path) { | |||||
| // Support to split multiple so directories by ":" | |||||
| vector<string> v_path = StringUtils::Split(path, ':'); | |||||
| for (size_t i = 0; i < v_path.size(); ++i) { | |||||
| FindParserSo(v_path[i], file_list, caffe_parser_path); | |||||
| GELOGI("CustomOpLib full name = %s", v_path[i].c_str()); | |||||
| } | |||||
| } | |||||
| void TBEPluginManager::GetCustomOpPath(std::string &customop_path) { | |||||
| GELOGI("Enter get custom op path schedule"); | |||||
| std::string fmk_type; | |||||
| domi::FrameworkType type = domi::TENSORFLOW; | |||||
| auto it = options_.find(FRAMEWORK_TYPE); | |||||
| if (it != options_.end()) { | |||||
| type = static_cast<domi::FrameworkType>(std::strtol(it->second.c_str(), nullptr, 10)); | |||||
| } | |||||
| fmk_type = ge::TypeUtils::FmkTypeToSerialString(type); | |||||
| GELOGI("Framework type is %s.", fmk_type.c_str()); | |||||
| const char *path_env = std::getenv("ASCEND_OPP_PATH"); | |||||
| if (path_env != nullptr) { | |||||
| std::string path = path_env; | |||||
| customop_path = (path + "/framework/custom" + "/:") + (path + "/framework/built-in/" + fmk_type); | |||||
| GELOGI("Get custom so path from env : %s", path_env); | |||||
| return; | |||||
| } | |||||
| std::string path_base = GetPath(); | |||||
| GELOGI("path_base is %s", path_base.c_str()); | |||||
| path_base = path_base.substr(0, path_base.rfind('/')); | |||||
| path_base = path_base.substr(0, path_base.rfind('/') + 1); | |||||
| customop_path = (path_base + "ops/framework/custom" + "/:") + (path_base + "ops/framework/built-in/" + fmk_type); | |||||
| return; | |||||
| } | |||||
| void TBEPluginManager::LoadCustomOpLib() { | |||||
| LoadPluginSo(); | |||||
| std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas; | |||||
| GELOGI("The size of registration_datas is: %zu", registration_datas.size()); | |||||
| for (OpRegistrationData reg_data : registration_datas) { | |||||
| bool ret = CheckRegisterStatus(reg_data); | |||||
| if (ret) { | |||||
| GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(), | |||||
| static_cast<uint32_t>(reg_data.GetImplyType())); | |||||
| domi::OpRegistry::Instance()->Register(reg_data); | |||||
| } | |||||
| } | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPluginSo() { | |||||
| vector<string> file_list; | |||||
| string caffe_parser_path; | |||||
| std::string plugin_path; | |||||
| GetCustomOpPath(plugin_path); | |||||
| // Whether there are files in the plugin so path | |||||
| GetPluginSoFileList(plugin_path, file_list, caffe_parser_path); | |||||
| // No file | |||||
| if (file_list.empty()) { | |||||
| // Print log | |||||
| GELOGW("Can not find any plugin file in plugin_path: %s", plugin_path.c_str()); | |||||
| } | |||||
| GELOGW("The shared library will not be checked. Please ensure that the source of the shared library is trusted."); | |||||
| // Load other so files except lib_caffe_parser.so in the plugin so path | |||||
| for (auto elem : file_list) { | |||||
| StringUtils::Trim(elem); | |||||
| void *handle = dlopen(elem.c_str(), RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE); | |||||
| if (handle == nullptr) { | |||||
| GELOGW("dlopen failed, plugin name:%s. Message(%s).", elem.c_str(), dlerror()); | |||||
| } else if (find(handles_vec_.begin(), handles_vec_.end(), handle) == handles_vec_.end()) { | |||||
| // Close dl when the program exist, not close here | |||||
| GELOGI("Plugin load %s success.", elem.c_str()); | |||||
| handles_vec_.push_back(handle); | |||||
| } else { | |||||
| GELOGI("Plugin so has already been loaded, no need to load again."); | |||||
| } | |||||
| } | |||||
| } | |||||
| bool TBEPluginManager::CheckRegisterStatus(const OpRegistrationData ®_data) { | |||||
| bool ret = true; | |||||
| static char *parser_priority = std::getenv("PARSER_PRIORITY"); | |||||
| static bool keep_cce = parser_priority != nullptr && string(parser_priority) == "cce"; | |||||
| auto ori_optype_set = reg_data.GetOriginOpTypeSet(); | |||||
| for (const auto &op_type : ori_optype_set) { | |||||
| domi::ImplyType imply_type = domi::OpRegistry::Instance()->GetImplyTypeByOriOpType(op_type); | |||||
| GELOGD("Enter into reg_data loop. op_type = %s , om_optype_ = %s", op_type.c_str(), reg_data.GetOmOptype().c_str()); | |||||
| if (imply_type != domi::ImplyType::BUILDIN) { | |||||
| if ((keep_cce && reg_data.GetImplyType() != domi::ImplyType::CCE) || | |||||
| (!keep_cce && reg_data.GetImplyType() != domi::ImplyType::TVM)) { | |||||
| GELOGD("op_type[%s] does not need to be changed, om_optype:%s.", op_type.c_str(), | |||||
| reg_data.GetOmOptype().c_str()); | |||||
| ret = false; | |||||
| } else { | |||||
| GELOGI("op_type[%s] will be changed to om_optype:%s.", op_type.c_str(), reg_data.GetOmOptype().c_str()); | |||||
| } | |||||
| } else { | |||||
| GELOGD("First register in ge initialize, original type: %s, om_optype: %s, imply type: %d.", op_type.c_str(), | |||||
| reg_data.GetOmOptype().c_str(), static_cast<int>(reg_data.GetImplyType())); | |||||
| } | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| Status TBEPluginManager::CheckCustomAiCpuOpLib() { | |||||
| std::vector<std::string> vec_op_type; | |||||
| domi::OpRegistry::Instance()->GetOpTypeByImplyType(vec_op_type, domi::ImplyType::CUSTOM); | |||||
| for (size_t i = 0; i < vec_op_type.size(); i++) { | |||||
| bool aicpu_so_exist = false; | |||||
| std::string ai_cpu_so_name = "lib" + vec_op_type[i] + "_aicpu.so"; | |||||
| for (size_t j = 0; j < domi::GetContext().aicpu_op_run_paths.size(); j++) { | |||||
| string bin_file_path = domi::GetContext().aicpu_op_run_paths[j]; | |||||
| if (bin_file_path.size() >= ai_cpu_so_name.size() && | |||||
| bin_file_path.compare(bin_file_path.size() - ai_cpu_so_name.size(), ai_cpu_so_name.size(), ai_cpu_so_name) == | |||||
| 0) { | |||||
| aicpu_so_exist = true; | |||||
| break; | |||||
| } | |||||
| } | |||||
| if (!aicpu_so_exist) { | |||||
| GELOGE(FAILED, "Can't find aicpu run so(%s), please check the plugin path!", ai_cpu_so_name.c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::InitPreparation( | |||||
| const std::map<string, string> &options) { | |||||
| options_.insert(options.begin(), options.end()); | |||||
| // Load TBE plugin | |||||
| TBEPluginManager::Instance().LoadCustomOpLib(); | |||||
| Status ret = CheckCustomAiCpuOpLib(); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "Check custom aicpu run so failed!"); | |||||
| return; | |||||
| } | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,73 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ | |||||
| #define GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ | |||||
| #include <dlfcn.h> | |||||
| #include <functional> | |||||
| #include <iostream> | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <type_traits> | |||||
| #include <typeinfo> | |||||
| #include <vector> | |||||
| #include "external/ge/ge_api_error_codes.h" | |||||
| #include "external/register/register.h" | |||||
| namespace ge { | |||||
| using SoHandlesVec = std::vector<void *>; | |||||
| using std::function; | |||||
| using std::map; | |||||
| using std::string; | |||||
| using std::vector; | |||||
| class TBEPluginManager { | |||||
| public: | |||||
| Status Finalize(); | |||||
| // Get TBEPluginManager singleton instance | |||||
| static TBEPluginManager &Instance(); | |||||
| static string GetPath(); | |||||
| static void InitPreparation(const std::map<string, string> &options); | |||||
| void LoadPluginSo(); | |||||
| private: | |||||
| TBEPluginManager() = default; | |||||
| ~TBEPluginManager() = default; | |||||
| Status ClearHandles_(); | |||||
| static void ProcessSoFullName(vector<string> &file_list, string &caffe_parser_path, string &full_name, | |||||
| const string &caffe_parser_so_suff, const string &aicpu_so_suff, | |||||
| const string &aicpu_host_so_suff); | |||||
| static void FindParserSo(const string &path, vector<string> &file_list, string &caffe_parser_path); | |||||
| static void GetPluginSoFileList(const string &path, vector<string> &file_list, string &caffe_parser_path); | |||||
| static void GetCustomOpPath(std::string &customop_path); | |||||
| void LoadCustomOpLib(); | |||||
| static Status CheckCustomAiCpuOpLib(); | |||||
| static bool CheckRegisterStatus(const OpRegistrationData ®_data); | |||||
| SoHandlesVec handles_vec_; | |||||
| static std::map<string, string> options_; | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ | |||||
| @@ -0,0 +1,241 @@ | |||||
| LOCAL_PATH := $(call my-dir) | |||||
| GE_COMMON_LOCAL_SRC_FILES := \ | |||||
| context/ctx.cc \ | |||||
| model_saver.cc \ | |||||
| ge/datatype_util.cc \ | |||||
| helper/om_file_helper.cc \ | |||||
| helper/model_helper.cc \ | |||||
| ../model/ge_model.cc \ | |||||
| auth/file_saver.cc \ | |||||
| fp16_t.cc \ | |||||
| math/fp16_math.cc \ | |||||
| debug/memory_dumper.cc \ | |||||
| formats/utils/formats_trans_utils.cc \ | |||||
| formats/format_transfers/datatype_transfer.cc \ | |||||
| formats/format_transfers/format_transfer_transpose.cc \ | |||||
| formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \ | |||||
| formats/format_transfers/format_transfer_fractal_z.cc \ | |||||
| formats/format_transfers/format_transfer_fractal_nz.cc \ | |||||
| formats/format_transfers/format_transfer_fractal_zz.cc \ | |||||
| formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \ | |||||
| formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \ | |||||
| formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \ | |||||
| formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \ | |||||
| formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \ | |||||
| formats/format_transfers/format_transfer_fracz_nchw.cc \ | |||||
| formats/format_transfers/format_transfer_fracz_nhwc.cc \ | |||||
| formats/format_transfers/format_transfer_fracz_hwcn.cc \ | |||||
| formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \ | |||||
| formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \ | |||||
| formats/format_transfers/format_transfer_nchw_fz_c04.cc \ | |||||
| formats/formats.cc \ | |||||
| ge_format_util.cc \ | |||||
| fmk_error_codes.cc \ | |||||
| util.cc \ | |||||
| properties_manager.cc \ | |||||
| types.cc\ | |||||
| model_parser/base.cc \ | |||||
| tbe_kernel_store.cc \ | |||||
| op/attr_value_util.cc \ | |||||
| op/ge_op_utils.cc \ | |||||
| thread_pool.cc \ | |||||
| ge/tbe_plugin_manager.cc \ | |||||
| GE_COMMON_LOCAL_C_INCLUDES := \ | |||||
| proto/om.proto \ | |||||
| proto/ge_ir.proto \ | |||||
| proto/task.proto \ | |||||
| proto/insert_op.proto \ | |||||
| proto/tensorflow/graph.proto \ | |||||
| proto/tensorflow/node_def.proto \ | |||||
| proto/tensorflow/function.proto \ | |||||
| proto/tensorflow/versions.proto \ | |||||
| proto/tensorflow/attr_value.proto \ | |||||
| proto/tensorflow/tensor.proto \ | |||||
| proto/tensorflow/tensor_shape.proto \ | |||||
| proto/tensorflow/op_def.proto \ | |||||
| proto/tensorflow/types.proto \ | |||||
| proto/tensorflow/resource_handle.proto \ | |||||
| $(TOPDIR)inc \ | |||||
| $(TOPDIR)inc/external \ | |||||
| $(TOPDIR)inc/external/graph \ | |||||
| $(TOPDIR)inc/framework \ | |||||
| $(TOPDIR)inc/common/util \ | |||||
| $(TOPDIR)libc_sec/include \ | |||||
| $(TOPDIR)third_party/json/include \ | |||||
| $(TOPDIR)third_party/protobuf/include \ | |||||
| $(TOPDIR)third_party/openssl/include/x86/include \ | |||||
| $(TOPDIR)framework/domi \ | |||||
| $(TOPDIR)framework/domi/common \ | |||||
| $(TOPDIR)framework/domi/common/op | |||||
| #compile host libge_common | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_common | |||||
| LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 | |||||
| ifeq ($(DEBUG), 1) | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| else | |||||
| LOCAL_CFLAGS += -fvisibility=hidden -DHOST_VISIBILITY | |||||
| endif | |||||
| ifeq ($(host_os), euleros) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| ifeq ($(host_os), centos) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| ifeq ($(TARGET_OS), euleros) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| ifeq ($(TARGET_OS), centos) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES) | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libprotobuf \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| liberror_manager \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| include $(BUILD_HOST_SHARED_LIBRARY) | |||||
| #compile device libge_common | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_common | |||||
| LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 | |||||
| ifeq ($(DEBUG), 1) | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| else | |||||
| LOCAL_CFLAGS += -fvisibility=hidden -DDEV_VISIBILITY | |||||
| endif | |||||
| ifeq ($(host_os), euleros) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| ifeq ($(host_os), centos) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| ifeq ($(TARGET_OS), euleros) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| ifeq ($(TARGET_OS), centos) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES) | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libprotobuf \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| liberror_manager \ | |||||
| ifeq ($(device_os),android) | |||||
| LOCAL_LDFLAGS += -ldl | |||||
| LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog | |||||
| else | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| endif | |||||
| include $(BUILD_SHARED_LIBRARY) | |||||
| #compile host libge_common static lib | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_common | |||||
| LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 | |||||
| ifeq ($(DEBUG), 1) | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| endif | |||||
| ifeq ($(host_os), euleros) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| ifeq ($(host_os), centos) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| ifeq ($(TARGET_OS), euleros) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| ifeq ($(TARGET_OS), centos) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES) | |||||
| LOCAL_STATIC_LIBRARIES := \ | |||||
| libgraph \ | |||||
| libprotobuf \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libregister \ | |||||
| liberror_manager \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| include $(BUILD_HOST_STATIC_LIBRARY) | |||||
| #compile device libge_common static_lib | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_common | |||||
| LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 | |||||
| ifeq ($(DEBUG), 1) | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| endif | |||||
| ifeq ($(host_os), euleros) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| ifeq ($(host_os), centos) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| ifeq ($(TARGET_OS), euleros) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| ifeq ($(TARGET_OS), centos) | |||||
| LOCAL_CFLAGS += -DOS_CENTOS | |||||
| endif | |||||
| LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES) | |||||
| LOCAL_STATIC_LIBRARIES := \ | |||||
| libgraph \ | |||||
| libprotobuf \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libregister \ | |||||
| liberror_manager \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| include $(BUILD_STATIC_LIBRARY) | |||||
| @@ -178,7 +178,7 @@ bool ModelCacheHelper::IsModelCacheHit() const { | |||||
| return false; | return false; | ||||
| } | } | ||||
| if (!IsVarManagerSameAsCache(var_manager_json)) { | if (!IsVarManagerSameAsCache(var_manager_json)) { | ||||
| GELOGI("Graph id[%u] cache miss: the VarManager dos not match the cache info.", graph_id_); | |||||
| GELOGI("Graph id[%u] cache miss: the VarManager does not match the cache info.", graph_id_); | |||||
| return false; | return false; | ||||
| } | } | ||||
| GELOGI("Graph id[%u] cache hit.", graph_id_); | GELOGI("Graph id[%u] cache hit.", graph_id_); | ||||
| @@ -563,7 +563,7 @@ Status ModelCacheHelper::GetCacheInfo(CacheInfo &cache_info) const { | |||||
| cache_info.graph_hash = cache_json[kGraphHash]; | cache_info.graph_hash = cache_json[kGraphHash]; | ||||
| Json nodes_hash_json = cache_json[kNodeHash]; | Json nodes_hash_json = cache_json[kNodeHash]; | ||||
| if (!(nodes_hash_json.is_null() || nodes_hash_json.is_array())) { | if (!(nodes_hash_json.is_null() || nodes_hash_json.is_array())) { | ||||
| GELOGW("Nodes hash in cache be null or array."); | |||||
| GELOGW("Nodes hash in cache should be null or array."); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| for (const auto &iter : nodes_hash_json) { | for (const auto &iter : nodes_hash_json) { | ||||
| @@ -1670,7 +1670,7 @@ Status ModelCacheHelper::LoadOmModelFromCache(GeModelPtr &ge_model) const { | |||||
| ModelData model_data; | ModelData model_data; | ||||
| ret = DavinciModelParser::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data); | ret = DavinciModelParser::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGW("LoadOmModelFromCache: Load model from file fialed. ret = %u", ret); | |||||
| GELOGW("LoadOmModelFromCache: Load model from file failed. ret = %u", ret); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -144,7 +144,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||||
| err = memcpy_s(model_header.platform_version, PLATFORM_VERSION_LEN, platform_version.c_str(), | err = memcpy_s(model_header.platform_version, PLATFORM_VERSION_LEN, platform_version.c_str(), | ||||
| platform_version.size() + 1); | platform_version.size() + 1); | ||||
| if (err != EOK) { | if (err != EOK) { | ||||
| GELOGE(MEMALLOC_FAILED, "ModelHelper SaveModel failed while while allocating memory for platform_version"); | |||||
| GELOGE(MEMALLOC_FAILED, "ModelHelper SaveModel failed while allocating memory for platform_version."); | |||||
| return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
| } | } | ||||
| string version = reinterpret_cast<char *>(model_header.platform_version); | string version = reinterpret_cast<char *>(model_header.platform_version); | ||||
| @@ -52,7 +52,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(u | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetModelPartition(ModelPartitionType type, | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetModelPartition(ModelPartitionType type, | ||||
| ModelPartition &partition) { | ModelPartition &partition) { | ||||
| if (!is_inited_) { | if (!is_inited_) { | ||||
| GELOGE(PARAM_INVALID, "OmFileLoadHelper not Inited!"); | |||||
| GELOGE(PARAM_INVALID, "OmFileLoadHelper has not been initialized!"); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| @@ -67,7 +67,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod | |||||
| if (!found) { | if (!found) { | ||||
| if (type != ModelPartitionType::TBE_KERNELS) { | if (type != ModelPartitionType::TBE_KERNELS) { | ||||
| GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas", static_cast<int>(type)); | |||||
| GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas!", static_cast<int>(type)); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| } | } | ||||
| @@ -77,7 +77,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod | |||||
| Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { | Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { | ||||
| // Parameter validity check | // Parameter validity check | ||||
| if (model.model_data == nullptr) { | if (model.model_data == nullptr) { | ||||
| GELOGE(PARAM_INVALID, "Model_data must not be null"); | |||||
| GELOGE(PARAM_INVALID, "Model_data must not be null!"); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| @@ -103,7 +103,7 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { | |||||
| Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size) { | Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size) { | ||||
| if (model_data == nullptr) { | if (model_data == nullptr) { | ||||
| GELOGE(PARAM_INVALID, "Param model_data must not be null"); | |||||
| GELOGE(PARAM_INVALID, "Param model_data must not be null!"); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| // Init partition table | // Init partition table | ||||
| @@ -131,7 +131,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint | |||||
| context_.partition_datas_.push_back(partition); | context_.partition_datas_.push_back(partition); | ||||
| if (partition.size > model_data_size || mem_offset > model_data_size - partition.size) { | if (partition.size > model_data_size || mem_offset > model_data_size - partition.size) { | ||||
| GELOGE(PARAM_INVALID, "the current need partition sizes %zu greater than the model data size %u ", | |||||
| GELOGE(PARAM_INVALID, "The partition size %zu is greater than the model data size %u.", | |||||
| partition.size + mem_offset, model_data_size); | partition.size + mem_offset, model_data_size); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| @@ -199,7 +199,7 @@ Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferDat | |||||
| ModelPartitionTable *partition_table = GetPartitionTable(); | ModelPartitionTable *partition_table = GetPartitionTable(); | ||||
| if (partition_table == nullptr) { | if (partition_table == nullptr) { | ||||
| GELOGE(ge::GE_GRAPH_SAVE_FAILED, "SaveModelToFile exe failed: partition_table is NULL"); | |||||
| GELOGE(ge::GE_GRAPH_SAVE_FAILED, "SaveModelToFile execute failed: partition_table is NULL."); | |||||
| return ge::GE_GRAPH_SAVE_FAILED; | return ge::GE_GRAPH_SAVE_FAILED; | ||||
| } | } | ||||
| uint32_t size_of_table = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | uint32_t size_of_table = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | ||||
| @@ -26,6 +26,7 @@ | |||||
| #include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/util.h" | #include "framework/common/util.h" | ||||
| #include "common/util/error_manager/error_manager.h" | |||||
| namespace ge { | namespace ge { | ||||
| const uint32_t kInteval = 2; | const uint32_t kInteval = 2; | ||||
| @@ -41,10 +42,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi | |||||
| try { | try { | ||||
| model_str = model.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | model_str = model.dump(kInteval, ' ', false, Json::error_handler_t::ignore); | ||||
| } catch (std::exception &e) { | } catch (std::exception &e) { | ||||
| GELOGE(FAILED, "Transfer json to string failed, reason: %s.", e.what()); | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19007", {"exception"}, {e.what()}); | |||||
| GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); | |||||
| return FAILED; | return FAILED; | ||||
| } catch (...) { | } catch (...) { | ||||
| GELOGE(FAILED, "Transfer json to string failed."); | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19008"); | |||||
| GELOGE(FAILED, "Failed to convert JSON to string."); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -57,6 +60,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi | |||||
| mode_t mode = S_IRUSR | S_IWUSR; | mode_t mode = S_IRUSR | S_IWUSR; | ||||
| int32_t fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode); | int32_t fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode); | ||||
| if (fd == EN_ERROR || fd == EN_INVALID_PARAM) { | if (fd == EN_ERROR || fd == EN_INVALID_PARAM) { | ||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"filepath", "errMsg"}, {file_path, strerror(errno)}); | |||||
| GELOGE(FAILED, "Open file failed. file path : %s, %s", file_path, strerror(errno)); | GELOGE(FAILED, "Open file failed. file path : %s, %s", file_path, strerror(errno)); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| @@ -65,6 +69,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi | |||||
| // Write data to file | // Write data to file | ||||
| mmSsize_t mmpa_ret = mmWrite(fd, const_cast<void *>((const void *)model_char), len); | mmSsize_t mmpa_ret = mmWrite(fd, const_cast<void *>((const void *)model_char), len); | ||||
| if (mmpa_ret == EN_ERROR || mmpa_ret == EN_INVALID_PARAM) { | if (mmpa_ret == EN_ERROR || mmpa_ret == EN_INVALID_PARAM) { | ||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19003", {"mmpa_ret", "errMsg"}, | |||||
| {std::to_string(mmpa_ret), strerror(errno)}); | |||||
| // Need to both print the error info of mmWrite and mmClose, so return ret after mmClose | // Need to both print the error info of mmWrite and mmClose, so return ret after mmClose | ||||
| GELOGE(FAILED, "Write to file failed. errno = %d, %s", mmpa_ret, strerror(errno)); | GELOGE(FAILED, "Write to file failed. errno = %d, %s", mmpa_ret, strerror(errno)); | ||||
| ret = FAILED; | ret = FAILED; | ||||
| @@ -0,0 +1,3 @@ | |||||
| LOCAL_PATH := $(call my-dir) | |||||
| include $(LOCAL_PATH)/ge_common.mk | |||||
| @@ -114,6 +114,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status | |||||
| OpUtils::ConvertAippParams(const GeAttrValue::NAMED_ATTRS &aipp_attr, domi::AippOpParams *aipp_params) { | OpUtils::ConvertAippParams(const GeAttrValue::NAMED_ATTRS &aipp_attr, domi::AippOpParams *aipp_params) { | ||||
| GE_CHECK_NOTNULL(aipp_params); | GE_CHECK_NOTNULL(aipp_params); | ||||
| AIPP_CONVERT_FORMAT_EX(aipp_mode, domi::AippOpParams::AippMode, int32_t, GeAttrValue::INT); | AIPP_CONVERT_FORMAT_EX(aipp_mode, domi::AippOpParams::AippMode, int32_t, GeAttrValue::INT); | ||||
| AIPP_CONVERT_INT(related_input_rank); | |||||
| if (aipp_params->aipp_mode() == domi::AippOpParams::dynamic) { | if (aipp_params->aipp_mode() == domi::AippOpParams::dynamic) { | ||||
| AIPP_CONVERT_INT(max_src_image_size); | AIPP_CONVERT_INT(max_src_image_size); | ||||
| @@ -149,6 +150,7 @@ OpUtils::ConvertAippParams(const GeAttrValue::NAMED_ATTRS &aipp_attr, domi::Aipp | |||||
| AIPP_CONVERT_LIST_FLOAT(var_reci_chn_0, true); | AIPP_CONVERT_LIST_FLOAT(var_reci_chn_0, true); | ||||
| AIPP_CONVERT_LIST_FLOAT(var_reci_chn_1, true); | AIPP_CONVERT_LIST_FLOAT(var_reci_chn_1, true); | ||||
| AIPP_CONVERT_LIST_FLOAT(var_reci_chn_2, true); | AIPP_CONVERT_LIST_FLOAT(var_reci_chn_2, true); | ||||
| AIPP_CONVERT_LIST_FLOAT(var_reci_chn_3, true); | |||||
| const bool csc_switch = aipp_params->csc_switch(); | const bool csc_switch = aipp_params->csc_switch(); | ||||
| AIPP_CONVERT_LIST_INT(matrix_r0c0, csc_switch); | AIPP_CONVERT_LIST_INT(matrix_r0c0, csc_switch); | ||||
| @@ -478,24 +478,32 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn | |||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( | ||||
| const std::vector<TaskDescInfo> &task_desc_info, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) { | const std::vector<TaskDescInfo> &task_desc_info, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) { | ||||
| #ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
| int32_t device_id = 0; | |||||
| rtError_t rt_ret = rtGetDevice(&device_id); | |||||
| int32_t logic_device_id = 0; | |||||
| rtError_t rt_ret = rtGetDevice(&logic_device_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(rt_ret, "runtime get device_id failed, current device_id:%d", device_id); | |||||
| GELOGE(rt_ret, "runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id); | |||||
| return; | return; | ||||
| } | } | ||||
| GELOGI("current device_id:%d", device_id); | |||||
| GELOGI("current logic_device_id:%d", logic_device_id); | |||||
| auto ret = std::find(device_id_.begin(), device_id_.end(), device_id); | |||||
| uint32_t phy_device_id = 0; | |||||
| rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); | |||||
| return; | |||||
| } | |||||
| GELOGI("current phy_device_id:%d", phy_device_id); | |||||
| auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id); | |||||
| if (ret == device_id_.end()) { | if (ret == device_id_.end()) { | ||||
| GELOGE(FAILED, "get valid device_id failed, profiling report failed."); | |||||
| GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); | |||||
| return; | return; | ||||
| } | } | ||||
| GELOGI("start ProfilingTaskDescInfo."); | GELOGI("start ProfilingTaskDescInfo."); | ||||
| ProfilingTaskDescInfo(task_desc_info, device_id); | |||||
| ProfilingTaskDescInfo(task_desc_info, phy_device_id); | |||||
| GELOGI("start ProfilingGraphDescInfo."); | GELOGI("start ProfilingGraphDescInfo."); | ||||
| ProfilingGraphDescInfo(compute_graph_desc_info, device_id); | |||||
| ProfilingGraphDescInfo(compute_graph_desc_info, phy_device_id); | |||||
| GELOGI("Report profiling data for GE end."); | GELOGI("Report profiling data for GE end."); | ||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -116,6 +116,7 @@ REGISTER_OPTYPE_DEFINE(SLICE, "Slice"); | |||||
| REGISTER_OPTYPE_DEFINE(SLICED, "SliceD"); | REGISTER_OPTYPE_DEFINE(SLICED, "SliceD"); | ||||
| REGISTER_OPTYPE_DEFINE(FLOORDIV, "FloorDiv"); | REGISTER_OPTYPE_DEFINE(FLOORDIV, "FloorDiv"); | ||||
| REGISTER_OPTYPE_DEFINE(SQUEEZE, "Squeeze"); | REGISTER_OPTYPE_DEFINE(SQUEEZE, "Squeeze"); | ||||
| REGISTER_OPTYPE_DEFINE(UNSQUEEZE, "Unsqueeze"); | |||||
| REGISTER_OPTYPE_DEFINE(STRIDEDSLICE, "StridedSlice"); | REGISTER_OPTYPE_DEFINE(STRIDEDSLICE, "StridedSlice"); | ||||
| REGISTER_OPTYPE_DEFINE(RANGE, "Range"); | REGISTER_OPTYPE_DEFINE(RANGE, "Range"); | ||||
| REGISTER_OPTYPE_DEFINE(RPNPROPOSALS, "RpnProposals"); | REGISTER_OPTYPE_DEFINE(RPNPROPOSALS, "RpnProposals"); | ||||
| @@ -67,9 +67,8 @@ static bool ReadProtoFromCodedInputStream(CodedInputStream &coded_stream, Messag | |||||
| } | } | ||||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromBinaryFile(const char *file, Message *proto) { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromBinaryFile(const char *file, Message *proto) { | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file == nullptr || proto == nullptr), | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19001"); | |||||
| return false, "Input parameter file or proto is nullptr!"); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file == nullptr || proto == nullptr), return false, | |||||
| "Input parameter file or proto is nullptr!"); | |||||
| std::string real_path = RealPath(file); | std::string real_path = RealPath(file); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return false, "pb file path '%s' not valid", file); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return false, "pb file path '%s' not valid", file); | ||||
| @@ -119,8 +118,9 @@ long GetFileLength(const std::string &input_file) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10037", {"filepath"}, {input_file}); | ErrorManager::GetInstance().ATCReportErrMessage("E10037", {"filepath"}, {input_file}); | ||||
| return -1, "Open file[%s] failed", input_file.c_str()); | return -1, "Open file[%s] failed", input_file.c_str()); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length == 0), ErrorManager::GetInstance().ATCReportErrMessage("E10038"); | |||||
| return -1, "File[%s] length is 0, not valid.", input_file.c_str()); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length == 0), | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10038", {"filepath"}, {input_file}); | |||||
| return -1, "File[%s] size is 0, not valid.", input_file.c_str()); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
| file_length > kMaxFileSizeLimit, ErrorManager::GetInstance().ATCReportErrMessage( | file_length > kMaxFileSizeLimit, ErrorManager::GetInstance().ATCReportErrMessage( | ||||
| @@ -207,7 +207,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int CreateDirectory(const std:: | |||||
| if (dir_path_len >= PATH_MAX) { | if (dir_path_len >= PATH_MAX) { | ||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, | ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, | ||||
| {directory_path, std::to_string(PATH_MAX)}); | {directory_path, std::to_string(PATH_MAX)}); | ||||
| GELOGW("Path[%s] len is too long, it must smaller than %d", directory_path.c_str(), PATH_MAX); | |||||
| GELOGW("Path[%s] len is too long, it must be less than %d", directory_path.c_str(), PATH_MAX); | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| char tmp_dir_path[PATH_MAX] = {0}; | char tmp_dir_path[PATH_MAX] = {0}; | ||||
| @@ -338,14 +338,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string RealPath(const char | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
| strlen(path) >= PATH_MAX, | strlen(path) >= PATH_MAX, | ||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(PATH_MAX)}); | ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(PATH_MAX)}); | ||||
| return "", "Path[%s] len is too long, it must smaller than %d", path, PATH_MAX); | |||||
| return "", "Path[%s] len is too long, it must be less than %d", path, PATH_MAX); | |||||
| // PATH_MAX is the system's own macro, indicating the maximum file path length supported | // PATH_MAX is the system's own macro, indicating the maximum file path length supported | ||||
| std::shared_ptr<char> resolved_path(new (std::nothrow) char[PATH_MAX](), std::default_delete<char[]>()); | std::shared_ptr<char> resolved_path(new (std::nothrow) char[PATH_MAX](), std::default_delete<char[]>()); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
| resolved_path == nullptr, | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E19003", {"filepath", "size"}, {path, std::to_string(PATH_MAX)}); | |||||
| return "", "Path[%s] new string object len[%d] failed.", path, PATH_MAX); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(resolved_path == nullptr, return "", "Path[%s] new string object len[%d] failed.", | |||||
| path, PATH_MAX); | |||||
| // Nullptr is returned when the path does not exist or there is no permission | // Nullptr is returned when the path does not exist or there is no permission | ||||
| // Return absolute path when path is accessible | // Return absolute path when path is accessible | ||||
| @@ -384,7 +382,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const | |||||
| !ValidateStr(real_path, mode), | !ValidateStr(real_path, mode), | ||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "path"}, {atc_param, real_path}); | ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "path"}, {atc_param, real_path}); | ||||
| return false, | return false, | ||||
| "Input parameter's value[%s] is illegal. The path[%s] can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' " | |||||
| "Input parameter[--%s]'s value[%s] is illegal. The path can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' " | |||||
| "and chinese character.", | "and chinese character.", | ||||
| atc_param.c_str(), real_path.c_str()); | atc_param.c_str(), real_path.c_str()); | ||||
| @@ -420,7 +418,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const | |||||
| !ValidateStr(real_path, mode), | !ValidateStr(real_path, mode), | ||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "path"}, {atc_param, real_path}); | ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "path"}, {atc_param, real_path}); | ||||
| return false, | return false, | ||||
| "Input parameter's value[%s] is illegal. The path[%s] can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' " | |||||
| "Input parameter[--%s]'s value[%s] is illegal. The path can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' " | |||||
| "and chinese character.", | "and chinese character.", | ||||
| atc_param.c_str(), real_path.c_str()); | atc_param.c_str(), real_path.c_str()); | ||||
| @@ -75,7 +75,7 @@ Status DNNEngineManager::Initialize(const std::map<std::string, std::string> &op | |||||
| return status; | return status; | ||||
| } | } | ||||
| GELOGI("The number of DNNEngineObjs are %zu.", engines_map_.size()); | |||||
| GELOGI("The number of DNNEngineObjs is %zu.", engines_map_.size()); | |||||
| // Engines initialize | // Engines initialize | ||||
| for (auto iter = engines_map_.begin(); iter != engines_map_.end(); ++iter) { | for (auto iter = engines_map_.begin(); iter != engines_map_.end(); ++iter) { | ||||
| @@ -373,7 +373,7 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h | |||||
| GELOGE(FAILED, "The json file %s is not exist, %s", file_path.c_str(), strerror(errno)); | GELOGE(FAILED, "The json file %s is not exist, %s", file_path.c_str(), strerror(errno)); | ||||
| return FAILED; | return FAILED; | ||||
| } else { | } else { | ||||
| GELOGW("The json file %s is not need", file_path.c_str()); | |||||
| GELOGW("The json file %s is not needed.", file_path.c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } | } | ||||
| @@ -30,6 +30,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
| "../common/profiling/profiling_manager.cc" | "../common/profiling/profiling_manager.cc" | ||||
| "../graph/execute/graph_execute.cc" | "../graph/execute/graph_execute.cc" | ||||
| "../graph/load/graph_loader.cc" | "../graph/load/graph_loader.cc" | ||||
| "../graph/load/new_model_manager/aipp_utils.cc" | |||||
| "../graph/load/new_model_manager/cpu_queue_schedule.cc" | "../graph/load/new_model_manager/cpu_queue_schedule.cc" | ||||
| "../graph/load/new_model_manager/data_dumper.cc" | "../graph/load/new_model_manager/data_dumper.cc" | ||||
| "../graph/load/new_model_manager/data_inputer.cc" | "../graph/load/new_model_manager/data_inputer.cc" | ||||
| @@ -38,6 +38,7 @@ | |||||
| namespace { | namespace { | ||||
| const size_t kDynamicBatchSizeVecSize = 1; | const size_t kDynamicBatchSizeVecSize = 1; | ||||
| const size_t kStaticBatchInfoSize = 1; | |||||
| const size_t kDynamicImageSizeVecSize = 2; | const size_t kDynamicImageSizeVecSize = 2; | ||||
| const size_t kDynamicImageSizeInputSize = 2; | const size_t kDynamicImageSizeInputSize = 2; | ||||
| const char *const kBatchLabel = "Batch_"; | const char *const kBatchLabel = "Batch_"; | ||||
| @@ -180,16 +181,16 @@ class ModelListenerAdapter : public ModelListener { | |||||
| GeExecutor::GeExecutor() {} | GeExecutor::GeExecutor() {} | ||||
| Status GeExecutor::Initialize() { | Status GeExecutor::Initialize() { | ||||
| GELOGI("Init ge_executor begin."); | |||||
| GELOGI("Init GeExecutor begin."); | |||||
| if (isInit_) { | if (isInit_) { | ||||
| GELOGW("Already inited, don't need to init again."); | |||||
| GELOGW("Already initialized, no need to be initialized again."); | |||||
| return ge::SUCCESS; | return ge::SUCCESS; | ||||
| } | } | ||||
| std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM); | std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM); | ||||
| auto ret = MemManager::Instance().Initialize(mem_type); | auto ret = MemManager::Instance().Initialize(mem_type); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Memory Manager init fail."); | |||||
| GELOGE(ret, "Memory Manager init failed."); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -200,14 +201,14 @@ Status GeExecutor::Initialize() { | |||||
| ProfilingManager::Instance().Init(profiling_options); | ProfilingManager::Instance().Init(profiling_options); | ||||
| isInit_ = true; | isInit_ = true; | ||||
| GELOGI("Init ge_executor over."); | |||||
| GELOGI("Init GeExecutor over."); | |||||
| return ge::SUCCESS; | return ge::SUCCESS; | ||||
| } | } | ||||
| Status GeExecutor::Finalize() { | Status GeExecutor::Finalize() { | ||||
| GELOGI("Uninit ge_executor begin."); | |||||
| GELOGI("Uninit GeExecutor begin."); | |||||
| if (isInit_ == false) { | if (isInit_ == false) { | ||||
| GELOGW("ge_executor needs to init begin."); | |||||
| GELOGW("GeExecutor has not been initialized."); | |||||
| return ge::SUCCESS; | return ge::SUCCESS; | ||||
| } | } | ||||
| @@ -217,7 +218,7 @@ Status GeExecutor::Finalize() { | |||||
| ProfilingManager::Instance().PluginUnInit(GE_PROFILING_MODULE); | ProfilingManager::Instance().PluginUnInit(GE_PROFILING_MODULE); | ||||
| } | } | ||||
| GELOGI("Uninit ge_executor over."); | |||||
| GELOGI("Uninit GeExecutor over."); | |||||
| return ge::SUCCESS; | return ge::SUCCESS; | ||||
| } | } | ||||
| @@ -236,6 +237,7 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad | |||||
| // Verify whether the input dynamic batch matches the model gear | // Verify whether the input dynamic batch matches the model gear | ||||
| std::vector<std::vector<int64_t>> batch_info; | std::vector<std::vector<int64_t>> batch_info; | ||||
| std::vector<uint64_t> batch_num{batch_size}; | |||||
| Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); | Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(FAILED, "Get dynamic input info failed."); | GELOGE(FAILED, "Get dynamic input info failed."); | ||||
| @@ -247,6 +249,11 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| ret = GraphExecutor::SetDynamicSize(model_id, batch_num); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(FAILED, "Set dynamic size failed"); | |||||
| return FAILED; | |||||
| } | |||||
| // memcpy dynamic_batch_size from host to device | // memcpy dynamic_batch_size from host to device | ||||
| if (rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { | if (rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { | ||||
| GELOGE(FAILED, "memcpy dynamic batch input data failed!"); | GELOGE(FAILED, "memcpy dynamic batch input data failed!"); | ||||
| @@ -270,6 +277,7 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad | |||||
| // Verify whether the input dynamic resolution matches the model gear | // Verify whether the input dynamic resolution matches the model gear | ||||
| std::vector<std::vector<int64_t>> batch_info; | std::vector<std::vector<int64_t>> batch_info; | ||||
| std::vector<uint64_t> batch_num{image_height, image_width}; | |||||
| Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); | Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(FAILED, "Get dynamic input info failed."); | GELOGE(FAILED, "Get dynamic input info failed."); | ||||
| @@ -281,6 +289,11 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| ret = GraphExecutor::SetDynamicSize(model_id, batch_num); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(FAILED, "Set dynamic size failed"); | |||||
| return FAILED; | |||||
| } | |||||
| // Memcpy dynamic resolution height from host to device | // Memcpy dynamic resolution height from host to device | ||||
| if (rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE) != | if (rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE) != | ||||
| RT_ERROR_NONE) { | RT_ERROR_NONE) { | ||||
| @@ -298,6 +311,20 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) { | |||||
| GELOGI("Begin to get current shape"); | |||||
| if (!isInit_) { | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | |||||
| } | |||||
| Status ret = GraphExecutor::GetCurShape(model_id, batch_info); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(FAILED, "Get current shape failed"); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | ||||
| const std::vector<kAippDynamicBatchPara> &aippBatchPara, | const std::vector<kAippDynamicBatchPara> &aippBatchPara, | ||||
| const kAippDynamicPara &aippParms) { | const kAippDynamicPara &aippParms) { | ||||
| @@ -346,13 +373,13 @@ Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path, | |||||
| int32_t priority, std::shared_ptr<ge::ModelListener> listener) { | int32_t priority, std::shared_ptr<ge::ModelListener> listener) { | ||||
| GELOGI("load model offline begin."); | GELOGI("load model offline begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| string filePath = RealPath(path.c_str()); | string filePath = RealPath(path.c_str()); | ||||
| if (filePath.empty()) { | if (filePath.empty()) { | ||||
| GELOGE(ge::FAILED, "fileath is invalid. please check your text file '%s'.", path.c_str()); | |||||
| GELOGE(ge::FAILED, "File path is invalid. please check your text file '%s'.", path.c_str()); | |||||
| return ge::FAILED; | return ge::FAILED; | ||||
| } | } | ||||
| @@ -375,7 +402,7 @@ Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data, | |||||
| std::shared_ptr<ge::ModelListener> listener) { | std::shared_ptr<ge::ModelListener> listener) { | ||||
| GELOGI("Load model begin."); | GELOGI("Load model begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| @@ -397,7 +424,7 @@ Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data, | |||||
| Status GeExecutor::UnloadModel(uint32_t model_id) { | Status GeExecutor::UnloadModel(uint32_t model_id) { | ||||
| GELOGI("unload model %u begin.", model_id); | GELOGI("unload model %u begin.", model_id); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id); | Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id); | ||||
| @@ -411,7 +438,7 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { | |||||
| Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) { | Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) { | ||||
| GELOGI("run model begin."); | GELOGI("run model begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| @@ -428,7 +455,7 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||||
| std::vector<ge::TensorDesc> &output_desc) { | std::vector<ge::TensorDesc> &output_desc) { | ||||
| GELOGI("get model desc info begin."); | GELOGI("get model desc info begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| @@ -436,12 +463,11 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||||
| std::vector<InputOutputDescInfo> output_desc_infos; | std::vector<InputOutputDescInfo> output_desc_infos; | ||||
| std::vector<uint32_t> input_formats; | std::vector<uint32_t> input_formats; | ||||
| std::vector<uint32_t> output_formats; | std::vector<uint32_t> output_formats; | ||||
| GELOGI("GetInputOutputDescInfo via new ome."); | |||||
| Status ret = | Status ret = | ||||
| GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats, output_formats); | GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats, output_formats); | ||||
| if (ret != domi::SUCCESS) { | if (ret != domi::SUCCESS) { | ||||
| GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret); | |||||
| GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret); | |||||
| return TransferDomiErrorCode(ret); | return TransferDomiErrorCode(ret); | ||||
| } | } | ||||
| @@ -473,7 +499,7 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||||
| Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) { | Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) { | ||||
| GELOGI("Begin to get dynamic batch info."); | GELOGI("Begin to get dynamic batch info."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| @@ -487,11 +513,49 @@ Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vecto | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get AIPP input format | |||||
| /// @param [in] model_id | |||||
| /// @param [in] index | |||||
| /// @param [out] input_format | |||||
| /// @return execute result | |||||
| /// | |||||
| Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { | |||||
| GELOGI("Begin to GetAIPPInfo."); | |||||
| if (!isInit_) { | |||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| return GE_EXEC_NOT_INIT; | |||||
| } | |||||
| Status ret = GraphExecutor::GetAIPPInfo(model_id, index, aipp_info); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "GetAIPPInfo failed."); | |||||
| return ret; | |||||
| } | |||||
| GELOGI("GetAIPPInfo succ."); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) { | |||||
| GELOGI("Begin to get dynamic batch output shape info"); | |||||
| if (!isInit_) { | |||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| return GE_EXEC_NOT_INIT; | |||||
| } | |||||
| Status ret = GraphExecutor::GetModelAttr(model_id, dynamic_output_shape_info); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "Get dynamic batch output shape info failed."); | |||||
| return ret; | |||||
| } | |||||
| GELOGI("Get dynamic batch output shape info succ."); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ||||
| std::vector<TensorDesc> &output_desc) { | std::vector<TensorDesc> &output_desc) { | ||||
| GELOGI("get model desc info for zero copy begin."); | GELOGI("get model desc info for zero copy begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| @@ -499,12 +563,11 @@ Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge | |||||
| std::vector<InputOutputDescInfo> output_desc_infos; | std::vector<InputOutputDescInfo> output_desc_infos; | ||||
| std::vector<uint32_t> input_formats; | std::vector<uint32_t> input_formats; | ||||
| std::vector<uint32_t> output_formats; | std::vector<uint32_t> output_formats; | ||||
| GELOGI("GetInputOutputDescInfoForZeroCopy via new ome."); | |||||
| Status ret = GraphExecutor::GetInputOutputDescInfoForZeroCopy(model_id, input_desc_infos, output_desc_infos, | Status ret = GraphExecutor::GetInputOutputDescInfoForZeroCopy(model_id, input_desc_infos, output_desc_infos, | ||||
| input_formats, output_formats); | input_formats, output_formats); | ||||
| if (ret != domi::SUCCESS) { | if (ret != domi::SUCCESS) { | ||||
| GELOGE(ret, "Get DescInfo For ZeroCopy failed. ret = %u", ret); | |||||
| GELOGE(ret, "Get DescInfo from zero copy failed. ret = %u", ret); | |||||
| return TransferDomiErrorCode(ret); | return TransferDomiErrorCode(ret); | ||||
| } | } | ||||
| @@ -521,7 +584,7 @@ Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge | |||||
| GetGeTensorDescFromDomiInfo(input_desc, input_desc_infos, input_formats); | GetGeTensorDescFromDomiInfo(input_desc, input_desc_infos, input_formats); | ||||
| GetGeTensorDescFromDomiInfo(output_desc, output_desc_infos, output_formats); | GetGeTensorDescFromDomiInfo(output_desc, output_desc_infos, output_formats); | ||||
| GELOGI("get model desc info for zero copy end."); | |||||
| GELOGI("get model desc info from zero copy end."); | |||||
| return ge::SUCCESS; | return ge::SUCCESS; | ||||
| } | } | ||||
| @@ -539,7 +602,7 @@ Status GeExecutor::CommandHandle(const Command &command) { | |||||
| Status GeExecutor::GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size) { | Status GeExecutor::GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size) { | ||||
| GELOGI("Get max used memory begin."); | GELOGI("Get max used memory begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| @@ -559,13 +622,13 @@ Status GeExecutor::GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size) { | |||||
| Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_data) { | Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_data) { | ||||
| GELOGI("Load data from file begin."); | GELOGI("Load data from file begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| string filePath = RealPath(path.c_str()); | string filePath = RealPath(path.c_str()); | ||||
| if (filePath.empty()) { | if (filePath.empty()) { | ||||
| GELOGE(ge::FAILED, "filePath is invalid. please check your text file '%s'.", path.c_str()); | |||||
| GELOGE(ge::FAILED, "File path is invalid. please check your text file '%s'.", path.c_str()); | |||||
| return ge::FAILED; | return ge::FAILED; | ||||
| } | } | ||||
| GELOGI("load modelData from file: %s.", path.c_str()); | GELOGI("load modelData from file: %s.", path.c_str()); | ||||
| @@ -618,7 +681,7 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat | |||||
| const std::vector<uint32_t> &output_queue_ids) { | const std::vector<uint32_t> &output_queue_ids) { | ||||
| GELOGI("Load model with queue begin."); | GELOGI("Load model with queue begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| return GraphLoader::LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids); | return GraphLoader::LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids); | ||||
| @@ -638,7 +701,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel | |||||
| ge::RunModelData &run_output_data, bool async_mode) { | ge::RunModelData &run_output_data, bool async_mode) { | ||||
| GELOGI("Execute model begin."); | GELOGI("Execute model begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| @@ -674,7 +737,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel | |||||
| Status GeExecutor::GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size) { | Status GeExecutor::GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size) { | ||||
| GELOGI("Get memory and weight size from file begin."); | GELOGI("Get memory and weight size from file begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| @@ -707,7 +770,7 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size | |||||
| size_t &weight_size) { | size_t &weight_size) { | ||||
| GELOGI("Get memory and weight size from data begin."); | GELOGI("Get memory and weight size from data begin."); | ||||
| if (!isInit_) { | if (!isInit_) { | ||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
| return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
| } | } | ||||
| @@ -741,4 +804,56 @@ Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer | |||||
| Status GeExecutor::ReleaseSingleOpResource(void *stream) { | Status GeExecutor::ReleaseSingleOpResource(void *stream) { | ||||
| return SingleOpManager::GetInstance().ReleaseResource(stream); | return SingleOpManager::GetInstance().ReleaseResource(stream); | ||||
| } | } | ||||
| Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) { | |||||
| std::vector<std::vector<int64_t>> batch_info; | |||||
| Status ret = GetDynamicBatchInfo(model_id, batch_info); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "Calc batch info size failed. ret = %d", ret); | |||||
| return ret; | |||||
| } | |||||
| if (batch_info.empty()) { | |||||
| shape_count = kStaticBatchInfoSize; | |||||
| } else { | |||||
| shape_count = batch_info.size(); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GeExecutor::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) { | |||||
| GELOGI("Begin to GetOrigInputInfo."); | |||||
| if (!isInit_) { | |||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| return GE_EXEC_NOT_INIT; | |||||
| } | |||||
| Status ret = GraphExecutor::GetOrigInputInfo(model_id, index, orig_input_info); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "GetOrigInputInfo failed."); | |||||
| return ret; | |||||
| } | |||||
| GELOGI("GetOrigInputInfo succ."); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GeExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, | |||||
| std::vector<InputOutputDims> &input_dims, | |||||
| std::vector<InputOutputDims> &output_dims) { | |||||
| GELOGI("Begin to GetAllAippInputOutputDims."); | |||||
| if (!isInit_) { | |||||
| GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
| return GE_EXEC_NOT_INIT; | |||||
| } | |||||
| Status ret = GraphExecutor::GetAllAippInputOutputDims(model_id, index, input_dims, output_dims); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "GetAllAippInputOutputDims failed."); | |||||
| return ret; | |||||
| } | |||||
| GELOGI("GetAllAippInputOutputDims succ."); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -0,0 +1,202 @@ | |||||
| LOCAL_PATH := $(call my-dir) | |||||
| local_ge_executor_src_files := \ | |||||
| ge_executor.cc \ | |||||
| ../common/profiling/profiling_manager.cc \ | |||||
| ../common/ge/plugin_manager.cc \ | |||||
| ../graph/load/graph_loader.cc \ | |||||
| ../graph/execute/graph_execute.cc \ | |||||
| ../omm/csa_interact.cc \ | |||||
| ../graph/manager/graph_manager_utils.cc \ | |||||
| ../graph/manager/graph_var_manager.cc \ | |||||
| ../graph/manager/graph_mem_allocator.cc \ | |||||
| ../graph/manager/graph_caching_allocator.cc \ | |||||
| ../graph/manager/trans_var_data_utils.cc \ | |||||
| ../graph/manager/util/debug.cc \ | |||||
| ../model/ge_model.cc \ | |||||
| ../model/ge_root_model.cc \ | |||||
| ../graph/load/new_model_manager/davinci_model.cc \ | |||||
| ../graph/load/new_model_manager/davinci_model_parser.cc \ | |||||
| ../graph/load/new_model_manager/model_manager.cc \ | |||||
| ../graph/load/new_model_manager/tbe_handle_store.cc \ | |||||
| ../graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||||
| ../graph/load/new_model_manager/model_utils.cc \ | |||||
| ../graph/load/new_model_manager/aipp_utils.cc \ | |||||
| ../graph/load/new_model_manager/data_inputer.cc \ | |||||
| ../graph/load/new_model_manager/data_dumper.cc \ | |||||
| ../graph/load/new_model_manager/zero_copy_task.cc \ | |||||
| ../graph/load/new_model_manager/task_info/task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/kernel_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/label_set_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/stream_active_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||||
| ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||||
| ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||||
| ../graph/load/output/output.cc \ | |||||
| ../single_op/single_op_manager.cc \ | |||||
| ../single_op/single_op_model.cc \ | |||||
| ../single_op/single_op.cc \ | |||||
| ../single_op/stream_resource.cc \ | |||||
| ../single_op/task/op_task.cc \ | |||||
| ../single_op/task/build_task_utils.cc \ | |||||
| ../single_op/task/tbe_task_builder.cc \ | |||||
| ../single_op/task/aicpu_task_builder.cc \ | |||||
| ../hybrid/hybrid_davinci_model_stub.cc\ | |||||
| local_ge_executor_c_include := \ | |||||
| proto/insert_op.proto \ | |||||
| proto/op_mapping_info.proto \ | |||||
| proto/ge_ir.proto \ | |||||
| proto/task.proto \ | |||||
| proto/om.proto \ | |||||
| $(TOPDIR)inc/external \ | |||||
| $(TOPDIR)inc/external/graph \ | |||||
| $(TOPDIR)inc/framework \ | |||||
| $(TOPDIR)inc \ | |||||
| $(LOCAL_PATH)/../ \ | |||||
| $(TOPDIR)libc_sec/include \ | |||||
| third_party/protobuf/include \ | |||||
| third_party/json/include \ | |||||
| local_ge_executor_shared_library := \ | |||||
| libprotobuf \ | |||||
| libc_sec \ | |||||
| libge_common \ | |||||
| libruntime \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libgraph \ | |||||
| libmsprof \ | |||||
| local_ge_executor_ldflags := -lrt -ldl \ | |||||
| #compile arm device dynamic lib | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_executor | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -DDAVINCI_SUPPORT_PROFILING | |||||
| LOCAL_SRC_FILES := $(local_ge_executor_src_files) | |||||
| LOCAL_C_INCLUDES := $(local_ge_executor_c_include) | |||||
| LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library) | |||||
| ifeq ($(device_os),android) | |||||
| LOCAL_LDFLAGS += -ldl | |||||
| LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog | |||||
| else | |||||
| LOCAL_LDFLAGS += $(local_ge_executor_ldflags) | |||||
| endif | |||||
| include $(BUILD_SHARED_LIBRARY) | |||||
| #compile x86 host dynamic lib | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_executor | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING | |||||
| ifeq ($(DEBUG), 1) | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| else | |||||
| LOCAL_CFLAGS += -O2 | |||||
| endif | |||||
| LOCAL_SRC_FILES := $(local_ge_executor_src_files) | |||||
| LOCAL_C_INCLUDES := $(local_ge_executor_c_include) | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libprotobuf \ | |||||
| libc_sec \ | |||||
| libge_common \ | |||||
| libruntime \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libgraph \ | |||||
| libmsprof \ | |||||
| LOCAL_LDFLAGS += $(local_ge_executor_ldflags) | |||||
| include $(BUILD_HOST_SHARED_LIBRARY) | |||||
| #compile for host static lib | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_executor | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING | |||||
| ifeq ($(DEBUG), 1) | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| else | |||||
| LOCAL_CFLAGS += -O2 | |||||
| endif | |||||
| LOCAL_SRC_FILES := $(local_ge_executor_src_files) | |||||
| LOCAL_C_INCLUDES := $(local_ge_executor_c_include) | |||||
| LOCAL_STATIC_LIBRARIES := \ | |||||
| libge_common \ | |||||
| libgraph \ | |||||
| libprotobuf \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libruntime \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libmsprof \ | |||||
| LOCAL_LDFLAGS += $(local_ge_executor_ldflags) | |||||
| include $(BUILD_HOST_STATIC_LIBRARY) | |||||
| #compile for device static lib | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_executor | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING | |||||
| ifeq ($(DEBUG), 1) | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| else | |||||
| LOCAL_CFLAGS += -O2 | |||||
| endif | |||||
| LOCAL_SRC_FILES := $(local_ge_executor_src_files) | |||||
| LOCAL_C_INCLUDES := $(local_ge_executor_c_include) | |||||
| LOCAL_STATIC_LIBRARIES := \ | |||||
| libge_common \ | |||||
| libgraph \ | |||||
| libprotobuf \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libruntime \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libmsprof \ | |||||
| ifeq ($(device_os),android) | |||||
| LOCAL_LDFLAGS += -ldl | |||||
| LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog | |||||
| else | |||||
| LOCAL_LDFLAGS += $(local_ge_executor_ldflags) | |||||
| endif | |||||
| include $(BUILD_STATIC_LIBRARY) | |||||
| @@ -0,0 +1,407 @@ | |||||
| LOCAL_PATH := $(call my-dir) | |||||
| COMMON_LOCAL_SRC_FILES := \ | |||||
| proto/fusion_model.proto \ | |||||
| proto/optimizer_priority.proto \ | |||||
| graph/manager/trans_var_data_utils.cc \ | |||||
| omm/csa_interact.cc \ | |||||
| common/fp16_t.cc \ | |||||
| common/formats/utils/formats_trans_utils.cc \ | |||||
| common/formats/format_transfers/datatype_transfer.cc \ | |||||
| common/formats/format_transfers/format_transfer_transpose.cc \ | |||||
| common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \ | |||||
| common/formats/format_transfers/format_transfer_fractal_z.cc \ | |||||
| common/formats/format_transfers/format_transfer_fractal_nz.cc \ | |||||
| common/formats/format_transfers/format_transfer_fractal_zz.cc \ | |||||
| common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \ | |||||
| common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \ | |||||
| common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \ | |||||
| common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \ | |||||
| common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \ | |||||
| common/formats/format_transfers/format_transfer_fracz_nchw.cc \ | |||||
| common/formats/format_transfers/format_transfer_fracz_nhwc.cc \ | |||||
| common/formats/format_transfers/format_transfer_fracz_hwcn.cc \ | |||||
| common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \ | |||||
| common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \ | |||||
| common/formats/format_transfers/format_transfer_nchw_fz_c04.cc \ | |||||
| common/formats/formats.cc \ | |||||
| common/profiling/profiling_manager.cc \ | |||||
| common/helper/model_cache_helper.cc \ | |||||
| ge_local_engine/engine/host_cpu_engine.cc \ | |||||
| GRAPH_MANAGER_LOCAL_SRC_FILES := \ | |||||
| common/ge/plugin_manager.cc\ | |||||
| init/gelib.cc \ | |||||
| session/inner_session.cc \ | |||||
| session/session_manager.cc \ | |||||
| engine_manager/dnnengine_manager.cc \ | |||||
| opskernel_manager/ops_kernel_manager.cc \ | |||||
| graph/manager/graph_manager.cc \ | |||||
| graph/manager/graph_manager_utils.cc \ | |||||
| graph/manager/graph_context.cc \ | |||||
| graph/preprocess/graph_preprocess.cc \ | |||||
| graph/preprocess/multi_batch_copy_graph.cc \ | |||||
| graph/execute/graph_execute.cc \ | |||||
| graph/load/graph_loader.cc \ | |||||
| graph/optimize/graph_optimize.cc \ | |||||
| graph/optimize/summary_optimize.cc \ | |||||
| graph/build/graph_builder.cc \ | |||||
| graph/partition/engine_place.cc \ | |||||
| graph/partition/graph_partition.cc \ | |||||
| graph/partition/dynamic_shape_partition.cc \ | |||||
| generator/ge_generator.cc \ | |||||
| generator/generator_api.cc \ | |||||
| graph/manager/graph_var_manager.cc \ | |||||
| graph/manager/graph_mem_allocator.cc \ | |||||
| graph/manager/graph_caching_allocator.cc \ | |||||
| BUILER_SRC_FILES := \ | |||||
| ir_build/ge_ir_build.cc \ | |||||
| ir_build/atc_ir_common.cc \ | |||||
| OMG_HOST_SRC_FILES := \ | |||||
| model/ge_model.cc \ | |||||
| model/ge_root_model.cc \ | |||||
| graph/common/transop_util.cc \ | |||||
| graph/passes/pass_manager.cc \ | |||||
| graph/passes/resource_pair_add_control_pass.cc \ | |||||
| graph/passes/resource_pair_remove_control_pass.cc \ | |||||
| graph/passes/pass_utils.cc \ | |||||
| graph/passes/base_pass.cc \ | |||||
| graph/passes/constant_folding_pass.cc \ | |||||
| graph/passes/aicpu_constant_folding_pass.cc \ | |||||
| graph/passes/reshape_remove_pass.cc \ | |||||
| graph/passes/reshape_recovery_pass.cc \ | |||||
| graph/passes/transop_breadth_fusion_pass.cc \ | |||||
| graph/passes/transop_depth_fusion_pass.cc \ | |||||
| graph/passes/transop_nearby_allreduce_fusion_pass.cc \ | |||||
| graph/passes/same_transdata_breadth_fusion_pass.cc \ | |||||
| graph/passes/transop_without_reshape_fusion_pass.cc \ | |||||
| graph/passes/compile_nodes_pass.cc \ | |||||
| graph/passes/variable_prepare_op_pass.cc \ | |||||
| graph/passes/variable_ref_delete_op_pass.cc \ | |||||
| graph/passes/variable_ref_useless_control_out_delete_pass.cc \ | |||||
| graph/passes/subgraph_pass.cc \ | |||||
| graph/passes/data_pass.cc \ | |||||
| graph/passes/net_output_pass.cc \ | |||||
| graph/passes/replace_transshape_pass.cc \ | |||||
| graph/passes/constant_fuse_same_pass.cc \ | |||||
| graph/passes/print_op_pass.cc \ | |||||
| graph/passes/no_use_reshape_remove_pass.cc \ | |||||
| graph/passes/iterator_op_pass.cc \ | |||||
| graph/passes/atomic_addr_clean_pass.cc \ | |||||
| graph/common/omg_util.cc \ | |||||
| graph/common/bcast.cc \ | |||||
| graph/passes/dimension_compute_pass.cc \ | |||||
| graph/passes/dimension_adjust_pass.cc \ | |||||
| graph/passes/get_original_format_pass.cc \ | |||||
| graph/passes/shape_operate_op_remove_pass.cc \ | |||||
| graph/passes/unused_op_remove_pass.cc \ | |||||
| graph/passes/assert_pass.cc \ | |||||
| graph/passes/dropout_pass.cc \ | |||||
| graph/passes/infershape_pass.cc \ | |||||
| graph/passes/unused_const_pass.cc \ | |||||
| graph/passes/isolated_op_remove_pass.cc \ | |||||
| graph/passes/permute_pass.cc \ | |||||
| graph/passes/ctrl_edge_transfer_pass.cc \ | |||||
| host_kernels/broadcast_gradient_args_kernel.cc \ | |||||
| host_kernels/greater_kernel.cc \ | |||||
| host_kernels/gather_v2_kernel.cc \ | |||||
| host_kernels/maximum_kernel.cc \ | |||||
| host_kernels/floormod_kernel.cc \ | |||||
| host_kernels/floordiv_kernel.cc \ | |||||
| host_kernels/range_kernel.cc \ | |||||
| host_kernels/shape_kernel.cc \ | |||||
| host_kernels/size_kernel.cc \ | |||||
| host_kernels/shape_n_kernel.cc \ | |||||
| host_kernels/rank_kernel.cc \ | |||||
| host_kernels/broadcast_args_kernel.cc \ | |||||
| host_kernels/fill_kernel.cc \ | |||||
| host_kernels/empty_kernel.cc \ | |||||
| host_kernels/expanddims_kernel.cc \ | |||||
| host_kernels/reshape_kernel.cc \ | |||||
| host_kernels/squeeze_kernel.cc \ | |||||
| host_kernels/unsqueeze_kernel.cc \ | |||||
| host_kernels/kernel_utils.cc \ | |||||
| host_kernels/cast_kernel.cc \ | |||||
| host_kernels/transdata_kernel.cc \ | |||||
| host_kernels/unpack_kernel.cc \ | |||||
| host_kernels/transpose_kernel.cc \ | |||||
| host_kernels/permute_kernel.cc \ | |||||
| host_kernels/pack_kernel.cc \ | |||||
| host_kernels/concat_v2_kernel.cc \ | |||||
| host_kernels/concat_offset_kernel.cc \ | |||||
| host_kernels/strided_slice_kernel.cc \ | |||||
| host_kernels/ssd_prior_box_kernel.cc \ | |||||
| host_kernels/add_kernel.cc \ | |||||
| host_kernels/sub_kernel.cc \ | |||||
| host_kernels/mul_kernel.cc \ | |||||
| host_kernels/reduce_prod_kernel.cc \ | |||||
| host_kernels/rsqrt_kernel.cc \ | |||||
| host_kernels/slice_kernel.cc \ | |||||
| host_kernels/slice_d_kernel.cc \ | |||||
| host_kernels/dynamic_stitch_kernel.cc \ | |||||
| graph/passes/stop_gradient_pass.cc \ | |||||
| graph/passes/prevent_gradient_pass.cc \ | |||||
| graph/passes/identity_pass.cc \ | |||||
| graph/passes/placeholder_with_default_pass.cc \ | |||||
| graph/passes/snapshot_pass.cc \ | |||||
| graph/passes/guarantee_const_pass.cc \ | |||||
| graph/passes/var_is_initialized_op_pass.cc \ | |||||
| graph/passes/parallel_concat_start_op_pass.cc \ | |||||
| graph/passes/folding_pass.cc \ | |||||
| graph/passes/cast_translate_pass.cc \ | |||||
| graph/passes/prune_pass.cc \ | |||||
| graph/passes/switch_op_pass.cc \ | |||||
| graph/passes/multi_batch_pass.cc \ | |||||
| graph/passes/next_iteration_pass.cc \ | |||||
| graph/passes/control_trigger_pass.cc \ | |||||
| graph/passes/cond_pass.cc \ | |||||
| graph/passes/cond_remove_pass.cc \ | |||||
| graph/passes/for_pass.cc \ | |||||
| graph/passes/enter_pass.cc \ | |||||
| graph/passes/addn_pass.cc \ | |||||
| graph/passes/common_subexpression_elimination_pass.cc \ | |||||
| graph/passes/transop_symmetry_elimination_pass.cc \ | |||||
| graph/passes/save_pass.cc \ | |||||
| graph/passes/switch_dead_branch_elimination.cc \ | |||||
| graph/passes/switch_logic_remove_pass.cc \ | |||||
| graph/passes/switch_data_edges_bypass.cc \ | |||||
| graph/passes/merge_pass.cc \ | |||||
| graph/passes/variable_format_pass.cc \ | |||||
| graph/passes/variable_op_pass.cc \ | |||||
| graph/passes/cast_remove_pass.cc \ | |||||
| graph/passes/transpose_transdata_pass.cc \ | |||||
| graph/passes/identify_reference_pass.cc \ | |||||
| graph/passes/hccl_memcpy_pass.cc \ | |||||
| graph/passes/flow_ctrl_pass.cc \ | |||||
| graph/passes/link_gen_mask_nodes_pass.cc \ | |||||
| graph/passes/replace_with_empty_const_pass.cc \ | |||||
| graph/passes/hccl_group_pass.cc \ | |||||
| graph/passes/switch_fusion_pass.cc \ | |||||
| graph/passes/switch_split_pass.cc \ | |||||
| OMG_DEVICE_SRC_FILES := $(OMG_HOST_SRC_FILES) | |||||
| OME_HOST_SRC_FILES := \ | |||||
| graph/manager/model_manager/event_manager.cc \ | |||||
| graph/manager/util/rt_context_util.cc \ | |||||
| graph/manager/util/variable_accelerate_ctrl.cc \ | |||||
| graph/manager/util/debug.cc \ | |||||
| graph/load/new_model_manager/model_manager.cc \ | |||||
| graph/load/new_model_manager/data_inputer.cc \ | |||||
| graph/load/new_model_manager/davinci_model.cc \ | |||||
| graph/load/new_model_manager/davinci_model_parser.cc \ | |||||
| graph/load/new_model_manager/model_utils.cc \ | |||||
| graph/load/new_model_manager/aipp_utils.cc \ | |||||
| graph/load/new_model_manager/tbe_handle_store.cc \ | |||||
| graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||||
| graph/load/new_model_manager/zero_copy_task.cc \ | |||||
| graph/load/output/output.cc \ | |||||
| graph/load/new_model_manager/data_dumper.cc \ | |||||
| graph/load/new_model_manager/task_info/task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/kernel_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/label_set_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/stream_active_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||||
| graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||||
| single_op/task/op_task.cc \ | |||||
| single_op/task/build_task_utils.cc \ | |||||
| single_op/task/tbe_task_builder.cc \ | |||||
| single_op/task/aicpu_task_builder.cc \ | |||||
| single_op/single_op.cc \ | |||||
| single_op/single_op_model.cc \ | |||||
| single_op/stream_resource.cc \ | |||||
| single_op/single_op_manager.cc \ | |||||
| hybrid/hybrid_davinci_model_stub.cc \ | |||||
| # graph/load/new_model_manager/task_info/hccl_task_info.cc | |||||
| OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES) | |||||
| COMMON_LOCAL_C_INCLUDES := \ | |||||
| proto/om.proto \ | |||||
| proto/task.proto \ | |||||
| proto/insert_op.proto \ | |||||
| proto/ge_ir.proto \ | |||||
| proto/fwk_adapter.proto \ | |||||
| proto/op_mapping_info.proto \ | |||||
| proto/tensorflow/attr_value.proto \ | |||||
| proto/tensorflow/function.proto \ | |||||
| proto/tensorflow/graph.proto \ | |||||
| proto/tensorflow/node_def.proto \ | |||||
| proto/tensorflow/op_def.proto \ | |||||
| proto/tensorflow/resource_handle.proto \ | |||||
| proto/tensorflow/tensor.proto \ | |||||
| proto/tensorflow/tensor_shape.proto \ | |||||
| proto/tensorflow/types.proto \ | |||||
| proto/tensorflow/versions.proto \ | |||||
| $(LOCAL_PATH) ./ \ | |||||
| $(TOPDIR)inc \ | |||||
| $(TOPDIR)inc/external \ | |||||
| $(TOPDIR)inc/external/graph \ | |||||
| $(TOPDIR)inc/framework \ | |||||
| $(TOPDIR)inc/framework/common \ | |||||
| $(TOPDIR)inc/common \ | |||||
| $(TOPDIR)inc/runtime \ | |||||
| $(TOPDIR)libc_sec/include \ | |||||
| $(TOPDIR)ops/built-in/op_proto/inc \ | |||||
| third_party/json/include \ | |||||
| third_party/protobuf/include \ | |||||
| third_party/opencv/include \ | |||||
| NEW_OMG_HOST_SRC_FILES := \ | |||||
| graph/preprocess/insert_op/util_insert_aipp_op.cc \ | |||||
| graph/preprocess/insert_op/ge_aipp_op.cc \ | |||||
| graph/build/model_builder.cc \ | |||||
| graph/build/task_generator.cc \ | |||||
| graph/build/stream_allocator.cc \ | |||||
| graph/build/logical_stream_allocator.cc \ | |||||
| graph/build/stream_graph_optimizer.cc \ | |||||
| graph/build/run_context.cc \ | |||||
| graph/build/label_allocator.cc \ | |||||
| graph/label/label_maker.cc \ | |||||
| graph/label/if_label_maker.cc \ | |||||
| graph/label/case_label_maker.cc \ | |||||
| graph/label/while_label_maker.cc \ | |||||
| graph/label/partitioned_call_label_maker.cc \ | |||||
| OME_HOST_SRC_FILES += $(NEW_OMG_HOST_SRC_FILES) | |||||
| OMG_DEVICE_SRC_FILES += $(NEW_OMG_HOST_SRC_FILES) | |||||
| DEVICE_LOCAL_C_INCLUDES := \ | |||||
| proto/om.proto \ | |||||
| proto/task.proto \ | |||||
| proto/insert_op.proto \ | |||||
| proto/ge_ir.proto \ | |||||
| proto/fwk_adapter.proto \ | |||||
| proto/op_mapping_info.proto \ | |||||
| proto/tensorflow/attr_value.proto \ | |||||
| proto/tensorflow/function.proto \ | |||||
| proto/tensorflow/graph.proto \ | |||||
| proto/tensorflow/node_def.proto \ | |||||
| proto/tensorflow/op_def.proto \ | |||||
| proto/tensorflow/resource_handle.proto \ | |||||
| proto/tensorflow/tensor.proto \ | |||||
| proto/tensorflow/tensor_shape.proto \ | |||||
| proto/tensorflow/types.proto \ | |||||
| proto/tensorflow/versions.proto \ | |||||
| $(LOCAL_PATH) ./ \ | |||||
| $(TOPDIR)inc \ | |||||
| $(TOPDIR)libc_sec/include \ | |||||
| $(TOPDIR)inc/external \ | |||||
| $(TOPDIR)inc/external/graph \ | |||||
| $(TOPDIR)inc/common/util \ | |||||
| $(TOPDIR)inc/framework \ | |||||
| $(TOPDIR)inc/framework/common \ | |||||
| $(TOPDIR)inc/runtime \ | |||||
| $(TOPDIR)ops/built-in/op_proto/inc \ | |||||
| $(TOPDIR)framework/domi \ | |||||
| third_party/json/include \ | |||||
| third_party/protobuf/include \ | |||||
| third_party/opencv/include \ | |||||
| #compiler for host infer | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_compiler | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 | |||||
| # from ome_inference.mk | |||||
| LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP | |||||
| ifeq ($(DEBUG), 1) | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| endif | |||||
| LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(GRAPH_MANAGER_LOCAL_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(OMG_HOST_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(OME_HOST_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(NEW_OME_DEVICE_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(BUILER_SRC_FILES) | |||||
| LOCAL_STATIC_LIBRARIES := libge_memory \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libprotobuf \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| libge_common \ | |||||
| libruntime_compile \ | |||||
| libresource \ | |||||
| liberror_manager \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| include $(BUILD_HOST_SHARED_LIBRARY) | |||||
| #compiler for device | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_compiler | |||||
| LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_FILE | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||||
| LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP | |||||
| LOCAL_CFLAGS += -DOMG_DEVICE_VERSION | |||||
| LOCAL_CFLAGS += -O2 | |||||
| LOCAL_MODULE_CLASS := SHARED_LIBRARIES | |||||
| LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(GRAPH_MANAGER_LOCAL_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(OMG_DEVICE_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(OME_DEVICE_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(BUILER_SRC_FILES) | |||||
| LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES) | |||||
| LOCAL_STATIC_LIBRARIES := libge_memory \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libprotobuf \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| libresource \ | |||||
| libruntime_compile \ | |||||
| libge_common \ | |||||
| ifeq ($(device_os),android) | |||||
| LOCAL_LDFLAGS := -ldl | |||||
| else | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| endif | |||||
| LOCAL_CFLAGS += \ | |||||
| -Wall | |||||
| ifeq ($(device_os),android) | |||||
| LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog | |||||
| endif | |||||
| include $(BUILD_SHARED_LIBRARY) | |||||
| @@ -124,7 +124,7 @@ Status HostCpuEngine::PrepareOutputs(const ge::ConstOpDescPtr &op_desc, vector<G | |||||
| Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc, HostCpuOp &op_kernel, | Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc, HostCpuOp &op_kernel, | ||||
| map<std::string, const Tensor> &named_inputs, | map<std::string, const Tensor> &named_inputs, | ||||
| map<std::string, Tensor> &named_outputs) { | map<std::string, Tensor> &named_outputs) { | ||||
| GELOGD("To run host cpu op: %s", op_desc->GetName().c_str()); | |||||
| GELOGD("Run operation on host cpu, op name: %s", op_desc->GetName().c_str()); | |||||
| Operator op = ge::OpDescUtils::CreateOperatorFromOpDesc(op_desc); | Operator op = ge::OpDescUtils::CreateOperatorFromOpDesc(op_desc); | ||||
| auto ret = op_kernel.Compute(op, named_inputs, named_outputs); | auto ret = op_kernel.Compute(op, named_inputs, named_outputs); | ||||
| if (ret != GRAPH_SUCCESS) { | if (ret != GRAPH_SUCCESS) { | ||||
| @@ -139,7 +139,7 @@ Status HostCpuEngine::Run(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, | |||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| GELOGD("To run node by host cpu engine. node name = %s", node->GetName().c_str()); | |||||
| GELOGD("Run node by host cpu engine. node name = %s", node->GetName().c_str()); | |||||
| std::unique_ptr<HostCpuOp> op_kernel; | std::unique_ptr<HostCpuOp> op_kernel; | ||||
| GE_CHK_STATUS_RET_NOLOG(FindOpKernel(node, op_kernel)); | GE_CHK_STATUS_RET_NOLOG(FindOpKernel(node, op_kernel)); | ||||
| @@ -151,7 +151,7 @@ Status HostCpuEngine::Run(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, | |||||
| GE_CHK_STATUS_RET_NOLOG(PrepareOutputs(op_desc, tmp_outputs, named_outputs)); | GE_CHK_STATUS_RET_NOLOG(PrepareOutputs(op_desc, tmp_outputs, named_outputs)); | ||||
| GE_CHK_STATUS_RET_NOLOG(RunInternal(op_desc, *op_kernel, named_inputs, named_outputs)); | GE_CHK_STATUS_RET_NOLOG(RunInternal(op_desc, *op_kernel, named_inputs, named_outputs)); | ||||
| GELOGD("Ran node by host cpu engine successfully. name node = %s", node->GetName().c_str()); | |||||
| GELOGD("Run node by host cpu engine successfully. name node = %s", node->GetName().c_str()); | |||||
| outputs.swap(tmp_outputs); | outputs.swap(tmp_outputs); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -0,0 +1,59 @@ | |||||
| LOCAL_PATH := $(call my-dir) | |||||
| local_lib_src_files := engine/ge_local_engine.cc \ | |||||
| ops_kernel_store/ge_local_ops_kernel_info.cc \ | |||||
| ops_kernel_store/op/op_factory.cc \ | |||||
| ops_kernel_store/op/op.cc \ | |||||
| ops_kernel_store/op/ge_deleted_op.cc \ | |||||
| ops_kernel_store/op/no_op.cc \ | |||||
| local_lib_inc_path := proto/task.proto \ | |||||
| ${LOCAL_PATH} \ | |||||
| ${TOPDIR}inc \ | |||||
| ${TOPDIR}inc/external \ | |||||
| ${TOPDIR}inc/external/graph \ | |||||
| $(TOPDIR)libc_sec/include \ | |||||
| ${TOPDIR}third_party/protobuf/include \ | |||||
| ${TOPDIR}inc/framework \ | |||||
| $(TOPDIR)framework/domi \ | |||||
| #compiler for host | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_local_engine | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -std=c++11 | |||||
| LOCAL_LDFLAGS := | |||||
| LOCAL_STATIC_LIBRARIES := | |||||
| LOCAL_SHARED_LIBRARIES := libprotobuf \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| libruntime | |||||
| LOCAL_SRC_FILES := $(local_lib_src_files) | |||||
| LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| include ${BUILD_HOST_SHARED_LIBRARY} | |||||
| #compiler for atc | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := atclib/libge_local_engine | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -std=c++11 | |||||
| LOCAL_LDFLAGS := | |||||
| LOCAL_STATIC_LIBRARIES := | |||||
| LOCAL_SHARED_LIBRARIES := libprotobuf \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| libruntime_compile | |||||
| LOCAL_SRC_FILES := $(local_lib_src_files) | |||||
| LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| include ${BUILD_HOST_SHARED_LIBRARY} | |||||
| @@ -81,7 +81,7 @@ Status GeLocalOpsKernelInfoStore::CalcOpRunningParam(Node &ge_node) { | |||||
| const string node_name = ge_node.GetName(); | const string node_name = ge_node.GetName(); | ||||
| const string node_type = ge_node.GetType(); | const string node_type = ge_node.GetType(); | ||||
| size_t output_size = op_desc->GetOutputsSize(); | size_t output_size = op_desc->GetOutputsSize(); | ||||
| GELOGD("Calc op[%s:%s] op running param, output size=%zu.", node_name.c_str(), node_type.c_str(), output_size); | |||||
| GELOGD("Calc op[%s:%s] running param, output size=%zu.", node_name.c_str(), node_type.c_str(), output_size); | |||||
| for (size_t i = 0; i < output_size; ++i) { | for (size_t i = 0; i < output_size; ++i) { | ||||
| GeTensorDesc output_tensor = op_desc->GetOutputDesc(static_cast<uint32_t>(i)); | GeTensorDesc output_tensor = op_desc->GetOutputDesc(static_cast<uint32_t>(i)); | ||||
| @@ -24,7 +24,7 @@ namespace ge_local { | |||||
| NoOp::NoOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} | NoOp::NoOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} | ||||
| Status NoOp::Run() { | Status NoOp::Run() { | ||||
| GELOGI("Node:%s type is %s, no need gen task.", name_.c_str(), type_.c_str()); | |||||
| GELOGI("Node:%s type is %s, no need generate task.", name_.c_str(), type_.c_str()); | |||||
| // Do nothing | // Do nothing | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -0,0 +1,429 @@ | |||||
| LOCAL_PATH := $(call my-dir) | |||||
| LIBGE_LOCAL_SRC_FILES := \ | |||||
| proto/fusion_model.proto \ | |||||
| proto/optimizer_priority.proto \ | |||||
| common/formats/format_transfers/datatype_transfer.cc \ | |||||
| common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \ | |||||
| common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \ | |||||
| common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \ | |||||
| common/formats/format_transfers/format_transfer_fractal_nz.cc \ | |||||
| common/formats/format_transfers/format_transfer_fractal_z.cc \ | |||||
| common/formats/format_transfers/format_transfer_fractal_zz.cc \ | |||||
| common/formats/format_transfers/format_transfer_fracz_hwcn.cc \ | |||||
| common/formats/format_transfers/format_transfer_fracz_nchw.cc \ | |||||
| common/formats/format_transfers/format_transfer_fracz_nhwc.cc \ | |||||
| common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \ | |||||
| common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \ | |||||
| common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \ | |||||
| common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \ | |||||
| common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \ | |||||
| common/formats/format_transfers/format_transfer_transpose.cc \ | |||||
| common/formats/formats.cc \ | |||||
| common/formats/utils/formats_trans_utils.cc \ | |||||
| common/fp16_t.cc \ | |||||
| common/ge/plugin_manager.cc\ | |||||
| common/helper/model_cache_helper.cc \ | |||||
| common/profiling/profiling_manager.cc \ | |||||
| engine_manager/dnnengine_manager.cc \ | |||||
| ge_local_engine/engine/host_cpu_engine.cc \ | |||||
| generator/ge_generator.cc \ | |||||
| generator/generator_api.cc \ | |||||
| graph/build/graph_builder.cc \ | |||||
| graph/build/label_allocator.cc \ | |||||
| graph/build/logical_stream_allocator.cc \ | |||||
| graph/build/model_builder.cc \ | |||||
| graph/build/run_context.cc \ | |||||
| graph/build/stream_allocator.cc \ | |||||
| graph/build/stream_graph_optimizer.cc \ | |||||
| graph/build/task_generator.cc \ | |||||
| graph/common/bcast.cc \ | |||||
| graph/common/omg_util.cc \ | |||||
| graph/common/transop_util.cc \ | |||||
| graph/execute/graph_execute.cc \ | |||||
| graph/label/case_label_maker.cc \ | |||||
| graph/label/if_label_maker.cc \ | |||||
| graph/label/label_maker.cc \ | |||||
| graph/label/partitioned_call_label_maker.cc \ | |||||
| graph/label/while_label_maker.cc \ | |||||
| graph/load/graph_loader.cc \ | |||||
| graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||||
| graph/load/new_model_manager/data_dumper.cc \ | |||||
| graph/load/new_model_manager/data_inputer.cc \ | |||||
| graph/load/new_model_manager/davinci_model.cc \ | |||||
| graph/load/new_model_manager/davinci_model_parser.cc \ | |||||
| graph/load/new_model_manager/model_manager.cc \ | |||||
| graph/load/new_model_manager/model_utils.cc \ | |||||
| graph/load/new_model_manager/aipp_utils.cc \ | |||||
| graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/hccl_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/kernel_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/label_set_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/stream_active_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||||
| graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||||
| graph/load/new_model_manager/task_info/task_info.cc \ | |||||
| graph/load/new_model_manager/tbe_handle_store.cc \ | |||||
| graph/load/new_model_manager/zero_copy_task.cc \ | |||||
| graph/load/output/output.cc \ | |||||
| graph/manager/graph_context.cc \ | |||||
| graph/manager/graph_manager.cc \ | |||||
| graph/manager/graph_manager_utils.cc \ | |||||
| graph/manager/graph_mem_allocator.cc \ | |||||
| graph/manager/graph_caching_allocator.cc \ | |||||
| graph/manager/graph_var_manager.cc \ | |||||
| graph/manager/model_manager/event_manager.cc \ | |||||
| graph/manager/trans_var_data_utils.cc \ | |||||
| graph/manager/util/debug.cc \ | |||||
| graph/manager/util/hcom_util.cc \ | |||||
| graph/manager/util/rt_context_util.cc \ | |||||
| graph/manager/util/variable_accelerate_ctrl.cc \ | |||||
| graph/optimize/graph_optimize.cc \ | |||||
| graph/optimize/optimizer/allreduce_fusion_pass.cc \ | |||||
| graph/optimize/summary_optimize.cc \ | |||||
| graph/partition/engine_place.cc \ | |||||
| graph/partition/graph_partition.cc \ | |||||
| graph/passes/addn_pass.cc \ | |||||
| graph/passes/aicpu_constant_folding_pass.cc \ | |||||
| graph/passes/assert_pass.cc \ | |||||
| graph/passes/atomic_addr_clean_pass.cc \ | |||||
| graph/partition/dynamic_shape_partition.cc \ | |||||
| graph/passes/base_pass.cc \ | |||||
| graph/passes/cast_remove_pass.cc \ | |||||
| graph/passes/cast_translate_pass.cc \ | |||||
| graph/passes/common_subexpression_elimination_pass.cc \ | |||||
| graph/passes/transop_symmetry_elimination_pass.cc \ | |||||
| graph/passes/compile_nodes_pass.cc \ | |||||
| graph/passes/constant_folding_pass.cc \ | |||||
| graph/passes/constant_fuse_same_pass.cc \ | |||||
| graph/passes/control_trigger_pass.cc \ | |||||
| graph/passes/dimension_adjust_pass.cc \ | |||||
| graph/passes/dimension_compute_pass.cc \ | |||||
| graph/passes/dropout_pass.cc \ | |||||
| graph/passes/hccl_group_pass.cc \ | |||||
| graph/passes/switch_fusion_pass.cc \ | |||||
| graph/passes/switch_split_pass.cc \ | |||||
| graph/passes/enter_pass.cc \ | |||||
| graph/passes/flow_ctrl_pass.cc \ | |||||
| host_kernels/transpose_kernel.cc \ | |||||
| host_kernels/add_kernel.cc \ | |||||
| host_kernels/broadcast_args_kernel.cc \ | |||||
| host_kernels/broadcast_gradient_args_kernel.cc \ | |||||
| host_kernels/cast_kernel.cc \ | |||||
| host_kernels/concat_offset_kernel.cc \ | |||||
| host_kernels/concat_v2_kernel.cc \ | |||||
| host_kernels/dynamic_stitch_kernel.cc \ | |||||
| host_kernels/empty_kernel.cc \ | |||||
| host_kernels/expanddims_kernel.cc \ | |||||
| host_kernels/fill_kernel.cc \ | |||||
| host_kernels/floordiv_kernel.cc \ | |||||
| host_kernels/floormod_kernel.cc \ | |||||
| host_kernels/gather_v2_kernel.cc \ | |||||
| host_kernels/greater_kernel.cc \ | |||||
| host_kernels/kernel_utils.cc \ | |||||
| host_kernels/maximum_kernel.cc \ | |||||
| host_kernels/mul_kernel.cc \ | |||||
| host_kernels/pack_kernel.cc \ | |||||
| host_kernels/permute_kernel.cc \ | |||||
| host_kernels/range_kernel.cc \ | |||||
| host_kernels/rank_kernel.cc \ | |||||
| host_kernels/reduce_prod_kernel.cc \ | |||||
| host_kernels/reshape_kernel.cc \ | |||||
| host_kernels/rsqrt_kernel.cc \ | |||||
| host_kernels/shape_kernel.cc \ | |||||
| host_kernels/shape_n_kernel.cc \ | |||||
| host_kernels/size_kernel.cc \ | |||||
| host_kernels/slice_d_kernel.cc \ | |||||
| host_kernels/slice_kernel.cc \ | |||||
| host_kernels/squeeze_kernel.cc \ | |||||
| host_kernels/unsqueeze_kernel.cc \ | |||||
| host_kernels/ssd_prior_box_kernel.cc \ | |||||
| host_kernels/strided_slice_kernel.cc \ | |||||
| host_kernels/sub_kernel.cc \ | |||||
| host_kernels/transdata_kernel.cc \ | |||||
| host_kernels/unpack_kernel.cc \ | |||||
| graph/passes/folding_pass.cc \ | |||||
| graph/passes/get_original_format_pass.cc \ | |||||
| graph/passes/guarantee_const_pass.cc \ | |||||
| graph/passes/hccl_memcpy_pass.cc \ | |||||
| graph/passes/identify_reference_pass.cc \ | |||||
| graph/passes/identity_pass.cc \ | |||||
| graph/passes/infershape_pass.cc \ | |||||
| graph/passes/isolated_op_remove_pass.cc \ | |||||
| graph/passes/iterator_op_pass.cc \ | |||||
| graph/passes/link_gen_mask_nodes_pass.cc \ | |||||
| graph/passes/merge_pass.cc \ | |||||
| graph/passes/multi_batch_pass.cc \ | |||||
| graph/passes/net_output_pass.cc \ | |||||
| graph/passes/next_iteration_pass.cc \ | |||||
| graph/passes/no_use_reshape_remove_pass.cc \ | |||||
| graph/passes/pass_manager.cc \ | |||||
| graph/passes/pass_utils.cc \ | |||||
| graph/passes/permute_pass.cc \ | |||||
| graph/passes/placeholder_with_default_pass.cc \ | |||||
| graph/passes/prevent_gradient_pass.cc \ | |||||
| graph/passes/print_op_pass.cc \ | |||||
| graph/passes/prune_pass.cc \ | |||||
| graph/passes/ctrl_edge_transfer_pass.cc \ | |||||
| graph/passes/replace_with_empty_const_pass.cc \ | |||||
| graph/passes/reshape_remove_pass.cc \ | |||||
| graph/passes/reshape_recovery_pass.cc \ | |||||
| graph/passes/resource_pair_add_control_pass.cc \ | |||||
| graph/passes/resource_pair_remove_control_pass.cc \ | |||||
| graph/passes/same_transdata_breadth_fusion_pass.cc \ | |||||
| graph/passes/save_pass.cc \ | |||||
| graph/passes/shape_operate_op_remove_pass.cc \ | |||||
| graph/passes/snapshot_pass.cc \ | |||||
| graph/passes/stop_gradient_pass.cc \ | |||||
| graph/passes/subgraph_pass.cc \ | |||||
| graph/passes/data_pass.cc \ | |||||
| graph/passes/switch_data_edges_bypass.cc \ | |||||
| graph/passes/switch_logic_remove_pass.cc \ | |||||
| graph/passes/switch_op_pass.cc \ | |||||
| graph/passes/switch_dead_branch_elimination.cc \ | |||||
| graph/passes/replace_transshape_pass.cc \ | |||||
| graph/passes/transop_breadth_fusion_pass.cc \ | |||||
| graph/passes/transop_depth_fusion_pass.cc \ | |||||
| graph/passes/transop_nearby_allreduce_fusion_pass.cc \ | |||||
| graph/passes/transop_without_reshape_fusion_pass.cc \ | |||||
| graph/passes/transpose_transdata_pass.cc \ | |||||
| graph/passes/unused_const_pass.cc \ | |||||
| graph/passes/unused_op_remove_pass.cc \ | |||||
| graph/passes/var_is_initialized_op_pass.cc \ | |||||
| graph/passes/parallel_concat_start_op_pass.cc \ | |||||
| graph/passes/cond_pass.cc \ | |||||
| graph/passes/cond_remove_pass.cc \ | |||||
| graph/passes/for_pass.cc \ | |||||
| graph/passes/variable_format_pass.cc \ | |||||
| graph/passes/variable_op_pass.cc \ | |||||
| graph/passes/variable_prepare_op_pass.cc \ | |||||
| graph/passes/variable_ref_delete_op_pass.cc \ | |||||
| graph/passes/variable_ref_useless_control_out_delete_pass.cc \ | |||||
| graph/preprocess/graph_preprocess.cc \ | |||||
| graph/preprocess/insert_op/ge_aipp_op.cc \ | |||||
| graph/preprocess/insert_op/util_insert_aipp_op.cc \ | |||||
| graph/preprocess/multi_batch_copy_graph.cc \ | |||||
| init/gelib.cc \ | |||||
| model/ge_model.cc \ | |||||
| model/ge_root_model.cc \ | |||||
| omm/csa_interact.cc \ | |||||
| opskernel_manager/ops_kernel_manager.cc \ | |||||
| session/inner_session.cc \ | |||||
| session/session_manager.cc \ | |||||
| single_op/single_op.cc \ | |||||
| single_op/single_op_manager.cc \ | |||||
| single_op/single_op_model.cc \ | |||||
| single_op/stream_resource.cc \ | |||||
| single_op/task/build_task_utils.cc \ | |||||
| single_op/task/op_task.cc \ | |||||
| single_op/task/tbe_task_builder.cc \ | |||||
| single_op/task/aicpu_task_builder.cc \ | |||||
| hybrid/common/tensor_value.cc \ | |||||
| hybrid/common/npu_memory_allocator.cc \ | |||||
| hybrid/executor/rt_callback_manager.cc \ | |||||
| hybrid/executor/node_state.cc \ | |||||
| hybrid/executor/node_done_manager.cc \ | |||||
| hybrid/executor/hybrid_profiler.cc \ | |||||
| hybrid/executor/hybrid_model_executor.cc \ | |||||
| hybrid/executor/hybrid_model_async_executor.cc \ | |||||
| hybrid/executor/hybrid_execution_context.cc \ | |||||
| hybrid/executor/worker/task_compile_engine.cc \ | |||||
| hybrid/executor/worker/shape_inference_engine.cc \ | |||||
| hybrid/executor/worker/execution_engine.cc \ | |||||
| hybrid/model/hybrid_model.cc \ | |||||
| hybrid/model/hybrid_model_builder.cc \ | |||||
| hybrid/model/node_item.cc \ | |||||
| hybrid/node_executor/aicore/aicore_node_executor.cc \ | |||||
| hybrid/node_executor/aicore/aicore_op_task.cc \ | |||||
| hybrid/node_executor/aicore/aicore_task_builder.cc \ | |||||
| hybrid/node_executor/aicore/aicore_task_compiler.cc \ | |||||
| hybrid/node_executor/aicpu/aicpu_ext_info.cc \ | |||||
| hybrid/node_executor/aicpu/aicpu_node_executor.cc \ | |||||
| hybrid/node_executor/compiledsubgraph/known_node_executor.cc \ | |||||
| hybrid/node_executor/hostcpu/ge_local_node_executor.cc \ | |||||
| hybrid/node_executor/node_executor.cc \ | |||||
| hybrid/node_executor/task_context.cc \ | |||||
| hybrid/hybrid_davinci_model.cc \ | |||||
| executor/ge_executor.cc \ | |||||
| LIBCLIENT_LOCAL_SRC_FILES := \ | |||||
| proto/ge_api.proto \ | |||||
| client/ge_api.cc \ | |||||
| RUNNER_LOCAL_C_INCLUDES := \ | |||||
| $(LOCAL_PATH) ./ \ | |||||
| $(LOCAL_PATH)/../ \ | |||||
| $(LOCAL_PATH)/../../ \ | |||||
| $(TOPDIR)inc \ | |||||
| $(TOPDIR)inc/common \ | |||||
| $(TOPDIR)inc/external \ | |||||
| $(TOPDIR)inc/external/graph \ | |||||
| $(TOPDIR)inc/framework \ | |||||
| $(TOPDIR)inc/framework/common \ | |||||
| $(TOPDIR)inc/graph \ | |||||
| $(TOPDIR)inc/runtime \ | |||||
| $(TOPDIR)libc_sec/include \ | |||||
| $(TOPDIR)ops/built-in/op_proto/inc \ | |||||
| proto/fwk_adapter.proto \ | |||||
| proto/ge_ir.proto \ | |||||
| proto/insert_op.proto \ | |||||
| proto/om.proto \ | |||||
| proto/op_mapping_info.proto \ | |||||
| proto/task.proto \ | |||||
| proto/tensorflow/attr_value.proto \ | |||||
| proto/tensorflow/function.proto \ | |||||
| proto/tensorflow/graph.proto \ | |||||
| proto/tensorflow/node_def.proto \ | |||||
| proto/tensorflow/op_def.proto \ | |||||
| proto/tensorflow/resource_handle.proto \ | |||||
| proto/tensorflow/tensor.proto \ | |||||
| proto/tensorflow/tensor_shape.proto \ | |||||
| proto/tensorflow/types.proto \ | |||||
| proto/tensorflow/versions.proto \ | |||||
| third_party/json/include \ | |||||
| third_party/opencv/include \ | |||||
| third_party/protobuf/include \ | |||||
| #compiler for GeRunner | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_runner | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 | |||||
| LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD | |||||
| ifeq ($(DEBUG), 1) | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| endif | |||||
| LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) | |||||
| LOCAL_STATIC_LIBRARIES := libge_memory \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libprotobuf \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| libge_common \ | |||||
| libhccl \ | |||||
| libmsprof \ | |||||
| liberror_manager \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| LOCAL_SHARED_LIBRARIES += \ | |||||
| libruntime \ | |||||
| libresource \ | |||||
| include $(BUILD_HOST_SHARED_LIBRARY) | |||||
| # add engine_conf.json to host | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := engine_conf.json | |||||
| LOCAL_SRC_FILES := engine_manager/engine_conf.json | |||||
| LOCAL_MODULE_CLASS := ETC | |||||
| LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/engine_conf.json | |||||
| include $(BUILD_HOST_PREBUILT) | |||||
| # add optimizer_priority.pbtxt to host | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := optimizer_priority.pbtxt | |||||
| LOCAL_SRC_FILES := opskernel_manager/optimizer_priority.pbtxt | |||||
| LOCAL_MODULE_CLASS := ETC | |||||
| LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/optimizer_priority.pbtxt | |||||
| include $(BUILD_HOST_PREBUILT) | |||||
| #compiler for GeRunner static lib | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_runner | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 | |||||
| LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) | |||||
| LOCAL_STATIC_LIBRARIES := libge_memory \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libhccl \ | |||||
| libmsprof \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| LOCAL_SHARED_LIBRARIES += \ | |||||
| libruntime \ | |||||
| libresource \ | |||||
| include $(BUILD_HOST_STATIC_LIBRARY) | |||||
| #compiler for GeRunner static lib device | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_runner | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 | |||||
| LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) | |||||
| LOCAL_STATIC_LIBRARIES := libge_memory \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libhccl \ | |||||
| libmsprof \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| LOCAL_SHARED_LIBRARIES += \ | |||||
| libruntime \ | |||||
| libresource \ | |||||
| include $(BUILD_STATIC_LIBRARY) | |||||
| @@ -0,0 +1,333 @@ | |||||
| LOCAL_PATH := $(call my-dir) | |||||
| COMMON_LOCAL_SRC_FILES := \ | |||||
| proto/fusion_model.proto \ | |||||
| proto/optimizer_priority.proto \ | |||||
| session/inner_session.cc \ | |||||
| session/session_manager.cc \ | |||||
| common/ge/plugin_manager.cc\ | |||||
| common/fp16_t.cc \ | |||||
| common/formats/utils/formats_trans_utils.cc \ | |||||
| common/formats/format_transfers/datatype_transfer.cc \ | |||||
| common/formats/format_transfers/format_transfer_transpose.cc \ | |||||
| common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \ | |||||
| common/formats/format_transfers/format_transfer_fractal_z.cc \ | |||||
| common/formats/format_transfers/format_transfer_fractal_nz.cc \ | |||||
| common/formats/format_transfers/format_transfer_fractal_zz.cc \ | |||||
| common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \ | |||||
| common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \ | |||||
| common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \ | |||||
| common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \ | |||||
| common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \ | |||||
| common/formats/format_transfers/format_transfer_fracz_nchw.cc \ | |||||
| common/formats/format_transfers/format_transfer_fracz_nhwc.cc \ | |||||
| common/formats/format_transfers/format_transfer_fracz_hwcn.cc \ | |||||
| common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \ | |||||
| common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \ | |||||
| common/formats/formats.cc \ | |||||
| init/gelib.cc \ | |||||
| engine_manager/dnnengine_manager.cc \ | |||||
| opskernel_manager/ops_kernel_manager.cc \ | |||||
| graph/manager/graph_manager.cc \ | |||||
| graph/manager/graph_manager_utils.cc \ | |||||
| graph/manager/graph_context.cc \ | |||||
| graph/preprocess/graph_preprocess.cc \ | |||||
| graph/preprocess/multi_batch_copy_graph.cc \ | |||||
| graph/execute/graph_execute.cc \ | |||||
| graph/load/graph_loader.cc \ | |||||
| graph/optimize/graph_optimize.cc \ | |||||
| graph/passes/folding_pass.cc \ | |||||
| graph/optimize/summary_optimize.cc \ | |||||
| graph/build/graph_builder.cc \ | |||||
| graph/partition/engine_place.cc \ | |||||
| graph/partition/graph_partition.cc \ | |||||
| graph/partition/dynamic_shape_partition.cc \ | |||||
| generator/ge_generator.cc \ | |||||
| generator/generator_api.cc \ | |||||
| common/profiling/profiling_manager.cc \ | |||||
| ge_local_engine/engine/host_cpu_engine.cc \ | |||||
| common/helper/model_cache_helper.cc \ | |||||
| OMG_HOST_SRC_FILES := \ | |||||
| model/ge_model.cc \ | |||||
| model/ge_root_model.cc \ | |||||
| graph/common/transop_util.cc \ | |||||
| graph/manager/graph_var_manager.cc \ | |||||
| graph/manager/trans_var_data_utils.cc \ | |||||
| omm/csa_interact.cc \ | |||||
| graph/passes/pass_manager.cc \ | |||||
| graph/passes/pass_utils.cc \ | |||||
| graph/passes/base_pass.cc \ | |||||
| graph/passes/resource_pair_add_control_pass.cc \ | |||||
| graph/passes/resource_pair_remove_control_pass.cc \ | |||||
| graph/passes/constant_folding_pass.cc \ | |||||
| graph/passes/aicpu_constant_folding_pass.cc \ | |||||
| graph/passes/reshape_remove_pass.cc \ | |||||
| graph/passes/reshape_recovery_pass.cc \ | |||||
| graph/passes/transop_breadth_fusion_pass.cc \ | |||||
| graph/passes/transop_depth_fusion_pass.cc \ | |||||
| graph/passes/same_transdata_breadth_fusion_pass.cc \ | |||||
| graph/passes/transop_without_reshape_fusion_pass.cc \ | |||||
| graph/passes/compile_nodes_pass.cc \ | |||||
| graph/passes/transop_nearby_allreduce_fusion_pass.cc \ | |||||
| graph/passes/variable_prepare_op_pass.cc \ | |||||
| graph/passes/variable_ref_delete_op_pass.cc \ | |||||
| graph/passes/variable_ref_useless_control_out_delete_pass.cc \ | |||||
| graph/passes/variable_op_pass.cc \ | |||||
| graph/passes/cast_remove_pass.cc \ | |||||
| graph/passes/replace_transshape_pass.cc \ | |||||
| graph/passes/transpose_transdata_pass.cc \ | |||||
| graph/passes/identify_reference_pass.cc \ | |||||
| graph/passes/variable_format_pass.cc \ | |||||
| graph/passes/subgraph_pass.cc \ | |||||
| graph/passes/data_pass.cc \ | |||||
| graph/passes/net_output_pass.cc \ | |||||
| graph/passes/constant_fuse_same_pass.cc \ | |||||
| graph/passes/print_op_pass.cc \ | |||||
| graph/passes/no_use_reshape_remove_pass.cc \ | |||||
| graph/passes/iterator_op_pass.cc \ | |||||
| graph/passes/atomic_addr_clean_pass.cc \ | |||||
| graph/optimize/optimizer/allreduce_fusion_pass.cc \ | |||||
| graph/common/omg_util.cc \ | |||||
| graph/common/bcast.cc \ | |||||
| graph/passes/dimension_compute_pass.cc \ | |||||
| graph/passes/dimension_adjust_pass.cc \ | |||||
| graph/passes/get_original_format_pass.cc \ | |||||
| graph/passes/shape_operate_op_remove_pass.cc \ | |||||
| graph/passes/unused_op_remove_pass.cc \ | |||||
| graph/passes/assert_pass.cc \ | |||||
| graph/passes/dropout_pass.cc \ | |||||
| graph/passes/infershape_pass.cc \ | |||||
| graph/passes/unused_const_pass.cc \ | |||||
| graph/passes/isolated_op_remove_pass.cc \ | |||||
| graph/passes/permute_pass.cc \ | |||||
| graph/passes/ctrl_edge_transfer_pass.cc \ | |||||
| host_kernels/broadcast_gradient_args_kernel.cc \ | |||||
| host_kernels/greater_kernel.cc \ | |||||
| host_kernels/gather_v2_kernel.cc \ | |||||
| host_kernels/maximum_kernel.cc \ | |||||
| host_kernels/floormod_kernel.cc \ | |||||
| host_kernels/floordiv_kernel.cc \ | |||||
| host_kernels/range_kernel.cc \ | |||||
| host_kernels/shape_kernel.cc \ | |||||
| host_kernels/size_kernel.cc \ | |||||
| host_kernels/shape_n_kernel.cc \ | |||||
| host_kernels/rank_kernel.cc \ | |||||
| host_kernels/broadcast_args_kernel.cc \ | |||||
| host_kernels/fill_kernel.cc \ | |||||
| host_kernels/empty_kernel.cc \ | |||||
| host_kernels/expanddims_kernel.cc \ | |||||
| host_kernels/reshape_kernel.cc \ | |||||
| host_kernels/squeeze_kernel.cc \ | |||||
| host_kernels/kernel_utils.cc \ | |||||
| host_kernels/cast_kernel.cc \ | |||||
| host_kernels/transdata_kernel.cc \ | |||||
| host_kernels/transpose_kernel.cc \ | |||||
| host_kernels/permute_kernel.cc \ | |||||
| host_kernels/pack_kernel.cc \ | |||||
| host_kernels/concat_v2_kernel.cc \ | |||||
| host_kernels/concat_offset_kernel.cc \ | |||||
| host_kernels/strided_slice_kernel.cc \ | |||||
| host_kernels/ssd_prior_box_kernel.cc \ | |||||
| host_kernels/add_kernel.cc \ | |||||
| host_kernels/unpack_kernel.cc \ | |||||
| host_kernels/sub_kernel.cc \ | |||||
| host_kernels/mul_kernel.cc \ | |||||
| host_kernels/reduce_prod_kernel.cc \ | |||||
| host_kernels/rsqrt_kernel.cc \ | |||||
| host_kernels/slice_kernel.cc \ | |||||
| host_kernels/slice_d_kernel.cc \ | |||||
| host_kernels/dynamic_stitch_kernel.cc \ | |||||
| graph/passes/stop_gradient_pass.cc \ | |||||
| graph/passes/prevent_gradient_pass.cc \ | |||||
| graph/passes/identity_pass.cc \ | |||||
| graph/passes/placeholder_with_default_pass.cc \ | |||||
| graph/passes/snapshot_pass.cc \ | |||||
| graph/passes/guarantee_const_pass.cc \ | |||||
| graph/passes/var_is_initialized_op_pass.cc \ | |||||
| graph/passes/parallel_concat_start_op_pass.cc \ | |||||
| graph/passes/cast_translate_pass.cc \ | |||||
| graph/passes/addn_pass.cc \ | |||||
| graph/passes/common_subexpression_elimination_pass.cc \ | |||||
| graph/passes/transop_symmetry_elimination_pass.cc \ | |||||
| graph/passes/save_pass.cc \ | |||||
| graph/passes/switch_dead_branch_elimination.cc \ | |||||
| graph/passes/merge_pass.cc \ | |||||
| graph/passes/prune_pass.cc \ | |||||
| graph/passes/flow_ctrl_pass.cc \ | |||||
| graph/passes/control_trigger_pass.cc \ | |||||
| graph/passes/switch_data_edges_bypass.cc \ | |||||
| graph/passes/switch_op_pass.cc \ | |||||
| graph/passes/multi_batch_pass.cc \ | |||||
| graph/passes/switch_logic_remove_pass.cc \ | |||||
| graph/passes/next_iteration_pass.cc \ | |||||
| graph/passes/cond_pass.cc \ | |||||
| graph/passes/cond_remove_pass.cc \ | |||||
| graph/passes/for_pass.cc \ | |||||
| graph/passes/enter_pass.cc \ | |||||
| graph/passes/hccl_memcpy_pass.cc \ | |||||
| graph/passes/link_gen_mask_nodes_pass.cc \ | |||||
| graph/passes/replace_with_empty_const_pass.cc \ | |||||
| graph/passes/hccl_group_pass.cc \ | |||||
| OME_SRC_FILES := \ | |||||
| graph/manager/graph_mem_allocator.cc \ | |||||
| graph/manager/graph_caching_allocator.cc \ | |||||
| graph/manager/model_manager/event_manager.cc \ | |||||
| graph/manager/util/debug.cc \ | |||||
| graph/manager/util/rt_context_util.cc \ | |||||
| graph/manager/util/variable_accelerate_ctrl.cc \ | |||||
| graph/manager/util/hcom_util.cc \ | |||||
| graph/load/new_model_manager/model_manager.cc \ | |||||
| graph/load/new_model_manager/data_inputer.cc \ | |||||
| graph/load/new_model_manager/davinci_model.cc \ | |||||
| graph/load/new_model_manager/davinci_model_parser.cc \ | |||||
| graph/load/new_model_manager/model_utils.cc \ | |||||
| graph/load/new_model_manager/tbe_handle_store.cc \ | |||||
| graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||||
| graph/load/new_model_manager/zero_copy_task.cc \ | |||||
| graph/load/output/output.cc \ | |||||
| graph/load/new_model_manager/data_dumper.cc \ | |||||
| graph/load/new_model_manager/task_info/task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/hccl_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/kernel_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/label_set_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/stream_active_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||||
| graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||||
| graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||||
| single_op/task/op_task.cc \ | |||||
| single_op/task/build_task_utils.cc \ | |||||
| single_op/task/tbe_task_builder.cc \ | |||||
| single_op/task/aicpu_task_builder.cc \ | |||||
| single_op/single_op.cc \ | |||||
| single_op/single_op_model.cc \ | |||||
| single_op/stream_resource.cc \ | |||||
| single_op/single_op_manager.cc \ | |||||
| hybrid/hybrid_davinci_model_stub.cc \ | |||||
| COMMON_LOCAL_C_INCLUDES := \ | |||||
| proto/om.proto \ | |||||
| proto/task.proto \ | |||||
| proto/insert_op.proto \ | |||||
| proto/ge_ir.proto \ | |||||
| proto/fwk_adapter.proto \ | |||||
| proto/op_mapping_info.proto \ | |||||
| proto/tensorflow/attr_value.proto \ | |||||
| proto/tensorflow/function.proto \ | |||||
| proto/tensorflow/graph.proto \ | |||||
| proto/tensorflow/node_def.proto \ | |||||
| proto/tensorflow/op_def.proto \ | |||||
| proto/tensorflow/resource_handle.proto \ | |||||
| proto/tensorflow/tensor.proto \ | |||||
| proto/tensorflow/tensor_shape.proto \ | |||||
| proto/tensorflow/types.proto \ | |||||
| proto/tensorflow/versions.proto \ | |||||
| $(LOCAL_PATH) ./ \ | |||||
| $(TOPDIR)inc \ | |||||
| $(TOPDIR)inc/external \ | |||||
| $(TOPDIR)inc/external/graph \ | |||||
| $(TOPDIR)inc/framework \ | |||||
| $(TOPDIR)inc/framework/common \ | |||||
| $(TOPDIR)inc/runtime \ | |||||
| $(TOPDIR)libc_sec/include \ | |||||
| $(TOPDIR)ops/built-in/op_proto/inc \ | |||||
| third_party/json/include \ | |||||
| third_party/protobuf/include \ | |||||
| third_party/opencv/include \ | |||||
| NEW_OMG_HOST_SRC_FILES := \ | |||||
| graph/preprocess/insert_op/util_insert_aipp_op.cc \ | |||||
| graph/preprocess/insert_op/ge_aipp_op.cc \ | |||||
| graph/build/model_builder.cc \ | |||||
| graph/build/task_generator.cc \ | |||||
| graph/build/stream_allocator.cc \ | |||||
| graph/build/logical_stream_allocator.cc \ | |||||
| graph/build/stream_graph_optimizer.cc \ | |||||
| graph/build/run_context.cc \ | |||||
| graph/build/label_allocator.cc \ | |||||
| graph/label/label_maker.cc \ | |||||
| graph/label/if_label_maker.cc \ | |||||
| graph/label/case_label_maker.cc \ | |||||
| graph/label/while_label_maker.cc \ | |||||
| graph/label/partitioned_call_label_maker.cc \ | |||||
| #compiler for host train | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_train | |||||
| LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 | |||||
| LOCAL_CFLAGS += -DDAVINCI_CLOUD -DDAVINCI_TRAIN -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING | |||||
| LOCAL_CFLAGS += -DFMK_SUPPORT_DEBUG | |||||
| ifeq ($(DEBUG), 1) | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| endif | |||||
| LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||||
| LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(OMG_HOST_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(OME_SRC_FILES) | |||||
| LOCAL_SRC_FILES += $(NEW_OMG_HOST_SRC_FILES) | |||||
| LOCAL_STATIC_LIBRARIES := libge_memory \ | |||||
| LOCAL_SHARED_LIBRARIES := \ | |||||
| libc_sec \ | |||||
| libprotobuf \ | |||||
| libslog \ | |||||
| libmmpa \ | |||||
| libgraph \ | |||||
| libregister \ | |||||
| libge_common \ | |||||
| libhccl \ | |||||
| libmsprof \ | |||||
| LOCAL_LDFLAGS := -lrt -ldl | |||||
| LOCAL_SHARED_LIBRARIES += \ | |||||
| libruntime \ | |||||
| libresource \ | |||||
| include $(BUILD_HOST_SHARED_LIBRARY) | |||||
| # add engine_conf.json to host | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := engine_conf.json | |||||
| LOCAL_SRC_FILES := engine_manager/engine_conf.json | |||||
| LOCAL_MODULE_CLASS := ETC | |||||
| LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/engine_conf.json | |||||
| include $(BUILD_HOST_PREBUILT) | |||||
| # add optimizer_priority.pbtxt to host | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := optimizer_priority.pbtxt | |||||
| LOCAL_SRC_FILES := opskernel_manager/optimizer_priority.pbtxt | |||||
| LOCAL_MODULE_CLASS := ETC | |||||
| LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/optimizer_priority.pbtxt | |||||
| include $(BUILD_HOST_PREBUILT) | |||||
| @@ -22,10 +22,13 @@ | |||||
| #include "common/util.h" | #include "common/util.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "ge/ge_api.h" | #include "ge/ge_api.h" | ||||
| #include "graph/ge_context.h" | |||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "graph/manager/graph_manager.h" | #include "graph/manager/graph_manager.h" | ||||
| #include "graph/manager/util/rt_context_util.h" | |||||
| #include "graph/opsproto_manager.h" | #include "graph/opsproto_manager.h" | ||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "graph/utils/type_utils.h" | |||||
| #include "model/ge_model.h" | #include "model/ge_model.h" | ||||
| #include "init/gelib.h" | #include "init/gelib.h" | ||||
| @@ -108,7 +111,7 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index, | |||||
| static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTensorDesc &tensor, int32_t index, | |||||
| bool attr) { | bool attr) { | ||||
| GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | ||||
| GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | ||||
| @@ -122,6 +125,17 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const | |||||
| if (data_op == nullptr) { | if (data_op == nullptr) { | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); | |||||
| auto input_desc = op_desc->MutableInputDesc(index); | |||||
| GE_CHECK_NOTNULL_EXEC(input_desc, return PARAM_INVALID); | |||||
| ge::Format old_format = input_desc->GetFormat(); | |||||
| if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) { | |||||
| input_desc->SetFormat(FORMAT_ND); | |||||
| input_desc->SetOriginFormat(FORMAT_ND); | |||||
| (void)AttrUtils::SetStr(data_op, "_single_input_format", TypeUtils::FormatToSerialString(old_format)); | |||||
| (void)AttrUtils::SetBool(data_op, "_is_single_op", true); | |||||
| } | |||||
| GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail."); | GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail."); | ||||
| GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail."); | GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail."); | ||||
| @@ -139,10 +153,21 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const | |||||
| } | } | ||||
| static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, const vector<GeTensor> &outputs) { | static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, const vector<GeTensor> &outputs) { | ||||
| OpDescPtr op_desc = MakeShared<ge::OpDesc>(NODE_NAME_NET_OUTPUT, NETOUTPUT); | |||||
| OpDescPtr op_desc = MakeShared<ge::OpDesc>(graph->GetName() + "_" + NODE_NAME_NET_OUTPUT, NETOUTPUT); | |||||
| if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| auto single_op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL_EXEC(single_op_desc, return PARAM_INVALID); | |||||
| auto output_desc = single_op_desc->MutableOutputDesc(0); | |||||
| GE_CHECK_NOTNULL_EXEC(output_desc, return PARAM_INVALID); | |||||
| ge::Format old_format = output_desc->GetFormat(); | |||||
| if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) { | |||||
| output_desc->SetFormat(FORMAT_ND); | |||||
| output_desc->SetOriginFormat(FORMAT_ND); | |||||
| (void)AttrUtils::SetStr(op_desc, "_single_output_format", TypeUtils::FormatToSerialString(old_format)); | |||||
| (void)AttrUtils::SetBool(op_desc, "_is_single_op", true); | |||||
| } | |||||
| int32_t count = 0; | int32_t count = 0; | ||||
| for (const auto &out_desc : outputs) { | for (const auto &out_desc : outputs) { | ||||
| GeTensorDesc tensor = out_desc.GetTensorDesc(); | GeTensorDesc tensor = out_desc.GetTensorDesc(); | ||||
| @@ -187,6 +212,19 @@ static void GetOpsProtoPath(string &opsproto_path) { | |||||
| opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); | opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); | ||||
| } | } | ||||
| static string GetModelNameFromFileName(const string &file_name_prefix) { | |||||
| int start_position = 0; | |||||
| // using output as model_name (ignore ".om") | |||||
| int filename_suffixes = 3; | |||||
| if (file_name_prefix.find_last_of('/') != string::npos) { | |||||
| start_position += 1; | |||||
| } | |||||
| int end_position = file_name_prefix.length() - filename_suffixes; | |||||
| string model_name = file_name_prefix.substr(start_position, end_position - start_position); | |||||
| GELOGI("Get model_name from file, model_name:%s", model_name.c_str()); | |||||
| return model_name; | |||||
| } | |||||
| class GeGenerator::Impl { | class GeGenerator::Impl { | ||||
| public: | public: | ||||
| Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id, GeRootModelPtr &ge_models); | Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id, GeRootModelPtr &ge_models); | ||||
| @@ -278,24 +316,28 @@ Status GeGenerator::GenerateInfershapeGraph(const Graph &graph) { | |||||
| } | } | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| GELOGI("GenerateInfershapeGraph success."); | |||||
| GELOGI("Generate infer shape graph success"); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs, | Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs, | ||||
| ModelBufferData &model, bool is_offline) { | ModelBufferData &model, bool is_offline) { | ||||
| rtContext_t ctx = nullptr; | |||||
| auto rt = rtCtxGetCurrent(&ctx); | |||||
| if (rt != RT_ERROR_NONE) { | |||||
| GELOGW("Current ctx is null."); | |||||
| } else { | |||||
| ge::RtContextUtil::GetInstance().SetNormalModeContext(ctx); | |||||
| } | |||||
| GraphId graph_id; | GraphId graph_id; | ||||
| GeRootModelPtr ge_root_model = nullptr; | GeRootModelPtr ge_root_model = nullptr; | ||||
| GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | ||||
| // using output as model_name (ignore ".om") | |||||
| int start_position = file_name_prefix.find_last_of('/') + 1; | |||||
| int end_position = file_name_prefix.length() - 3; | |||||
| const string model_name = file_name_prefix.substr(start_position, end_position - start_position); | |||||
| const string model_name = GetModelNameFromFileName(file_name_prefix); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(model_name.empty(), return PARAM_INVALID, "om name is not valid!"); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(model_name.empty(), return PARAM_INVALID, "om name is not valid!"); | ||||
| impl_->is_offline_ = is_offline; | impl_->is_offline_ = is_offline; | ||||
| Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model); | Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Build model failed"); | |||||
| GELOGE(ret, "Build model failed."); | |||||
| if (impl_->graph_manager_.Finalize() != SUCCESS) { | if (impl_->graph_manager_.Finalize() != SUCCESS) { | ||||
| GELOGE(FAILED, "graph_manager finalize fail."); | GELOGE(FAILED, "graph_manager finalize fail."); | ||||
| } | } | ||||
| @@ -316,6 +358,11 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||||
| } | } | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (RtContextUtil::GetInstance().GetNormalModeContext() != nullptr) { | |||||
| (void)rtCtxSetCurrent(RtContextUtil::GetInstance().GetNormalModeContext()); | |||||
| } | |||||
| GELOGI("GenerateOfflineModel success."); | GELOGI("GenerateOfflineModel success."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -325,11 +372,11 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| bool is_offline) { | bool is_offline) { | ||||
| GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); | ||||
| if (!inputs.empty() && (inputs.size() != op_desc->GetInputsSize())) { | if (!inputs.empty() && (inputs.size() != op_desc->GetInputsSize())) { | ||||
| GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size:%zu", inputs.size(), op_desc->GetInputsSize()); | |||||
| GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetInputsSize()); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) { | if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) { | ||||
| GELOGE(PARAM_INVALID, "Tensor size: %zu, Outputs size:%zu", outputs.size(), op_desc->GetOutputsSize()); | |||||
| GELOGE(PARAM_INVALID, "Tensor size: %zu, Outputs size: %zu", outputs.size(), op_desc->GetOutputsSize()); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| @@ -368,7 +415,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| } | } | ||||
| } else { | } else { | ||||
| for (const auto &in_desc : inputs) { | for (const auto &in_desc : inputs) { | ||||
| const GeTensorDesc input_desc = in_desc.GetTensorDesc(); | |||||
| GeTensorDesc input_desc = in_desc.GetTensorDesc(); | |||||
| GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true)); | GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true)); | ||||
| arg_index++; | arg_index++; | ||||
| } | } | ||||
| @@ -382,7 +429,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| // dump ComputeGraph. | // dump ComputeGraph. | ||||
| compute_graph->Dump(); | compute_graph->Dump(); | ||||
| Graph graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); | Graph graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); | ||||
| GELOGI("ATC parser success in single op schedule."); | |||||
| GELOGI("ATC parser success in single op build."); | |||||
| GraphId graph_id; | GraphId graph_id; | ||||
| GeRootModelPtr ge_root_model = nullptr; | GeRootModelPtr ge_root_model = nullptr; | ||||
| @@ -394,7 +441,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | ||||
| map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | ||||
| GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | ||||
| GELOGD("The opType in op_desc_tmp is: %s", op_desc_tmp->GetType().c_str()); | |||||
| GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); | |||||
| GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | ||||
| GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff)); | GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff)); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -411,7 +458,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
| */ | */ | ||||
| Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | ||||
| const vector<GeTensor> &outputs, const string &model_file_name) { | const vector<GeTensor> &outputs, const string &model_file_name) { | ||||
| GELOGI("Start to Build Single Op Offline Model."); | |||||
| GELOGI("Start to build single op offline model."); | |||||
| ModelBufferData model_buff; | ModelBufferData model_buff; | ||||
| OpEngineType engine_type = ENGINE_SYS; | OpEngineType engine_type = ENGINE_SYS; | ||||
| return BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true); | return BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true); | ||||
| @@ -430,7 +477,7 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor | |||||
| Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | ||||
| const vector<GeTensor> &outputs, OpEngineType engine_type, | const vector<GeTensor> &outputs, OpEngineType engine_type, | ||||
| ModelBufferData &model_buff) { | ModelBufferData &model_buff) { | ||||
| GELOGI("Start to Build Single Op Online"); | |||||
| GELOGI("Start to build single op online"); | |||||
| return BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false); | return BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false); | ||||
| } | } | ||||
| @@ -449,7 +496,7 @@ Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr & | |||||
| model_helper.SetSaveMode(is_offline_); | model_helper.SetSaveMode(is_offline_); | ||||
| Status ret = model_helper.SaveToOmModel(model, save_param_, file_name_prefix, model_buff); | Status ret = model_helper.SaveToOmModel(model, save_param_, file_name_prefix, model_buff); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Save to Om model failed"); | |||||
| GELOGE(ret, "Save to om model failed"); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -461,16 +508,22 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> | |||||
| const std::map<std::string, std::string> options; | const std::map<std::string, std::string> options; | ||||
| Status ret = graph_manager_.AddGraph(id, graph, options); | Status ret = graph_manager_.AddGraph(id, graph, options); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph failed, id: %u", id); | |||||
| GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph fail, graph id: %u", id); | |||||
| (void)graph_manager_.Finalize(); | (void)graph_manager_.Finalize(); | ||||
| return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED; | return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED; | ||||
| } | } | ||||
| GELOGI("models inputs.size()=%zu", inputs.size()); | |||||
| GELOGI("Model inputs size is %zu", inputs.size()); | |||||
| graph_manager_.SetOptionsRunGraphFlag(false); | graph_manager_.SetOptionsRunGraphFlag(false); | ||||
| ret = graph_manager_.BuildGraph(id, inputs, ge_root_model); | |||||
| struct timeval tv; | |||||
| if (gettimeofday(&tv, nullptr) != 0) { | |||||
| GELOGE(INTERNAL_ERROR, "get the time of day failed."); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| uint64_t session_id = static_cast<uint64_t>(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us | |||||
| ret = graph_manager_.BuildGraph(id, inputs, ge_root_model, session_id); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph failed, id: %u", id); | |||||
| GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph fail, graph id: %u", id); | |||||
| return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; | return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; | ||||
| } | } | ||||
| @@ -485,14 +538,14 @@ Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph, GraphId &g | |||||
| const std::map<std::string, std::string> options; | const std::map<std::string, std::string> options; | ||||
| Status ret = graph_manager_.AddGraph(id, graph, options); | Status ret = graph_manager_.AddGraph(id, graph, options); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "graphManager add graph failed, id: %u", id); | |||||
| GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph failed, graph id: %u", id); | |||||
| (void)graph_manager_.Finalize(); | (void)graph_manager_.Finalize(); | ||||
| return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED; | return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED; | ||||
| } | } | ||||
| ret = graph_manager_.GenerateInfershapeGraph(id); | ret = graph_manager_.GenerateInfershapeGraph(id); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager BuildGraph failed, id: %u", id); | |||||
| GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager generate graph failed"); | |||||
| return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; | return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; | ||||
| } | } | ||||
| @@ -160,10 +160,10 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block) { | |||||
| parent->child_offset_ += child->block_size_; | parent->child_offset_ += child->block_size_; | ||||
| child->deleted_block_ = true; | child->deleted_block_ = true; | ||||
| GELOGI( | GELOGI( | ||||
| "Add block stream id:%ld [size:%zu, life time[begin:%zu, end:%zu]] to" | |||||
| " block[size:%zu, life time[begin:%zu, end:%zu]]", | |||||
| stream_id_, child->block_size_, child->GetLifeBegin(), child->GetLifeEnd(), parent->block_size_, | |||||
| parent->GetLifeBegin(), parent->GetLifeEnd()); | |||||
| "Add block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" | |||||
| " block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]", | |||||
| child, child->block_size_, child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent, | |||||
| parent->block_size_, parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd()); | |||||
| } | } | ||||
| } | } | ||||
| @@ -499,17 +499,17 @@ void BlockMemAssigner::InitReuseFlag() { | |||||
| bool pre_reuse_flag = true; | bool pre_reuse_flag = true; | ||||
| bool post_reuse_flag = true; | bool post_reuse_flag = true; | ||||
| for (auto &node_index_io : pair.second) { | for (auto &node_index_io : pair.second) { | ||||
| if (node_index_io.io_type == kIn) { | |||||
| if (node_index_io.io_type_ == kIn) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| OutDataAnchorPtr out_anchor = node_index_io.node->GetOutDataAnchor(node_index_io.index); | |||||
| OutDataAnchorPtr out_anchor = node_index_io.node_->GetOutDataAnchor(node_index_io.index_); | |||||
| if (out_anchor == nullptr) { | if (out_anchor == nullptr) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| bool out_flg = false; | bool out_flg = false; | ||||
| if (node_index_io.node->GetOutDataNodes().empty()) { | |||||
| if (node_index_io.node_->GetOutDataNodes().empty()) { | |||||
| out_flg = true; | out_flg = true; | ||||
| } | } | ||||
| for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { | for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { | ||||
| @@ -643,7 +643,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
| CanReuseByStream(map_iter->second, *reusable_block)) { | CanReuseByStream(map_iter->second, *reusable_block)) { | ||||
| GELOGD("Cross stream mem reuse, target stream:%ld, current stream:%ld", reusable_block->stream_id_, | GELOGD("Cross stream mem reuse, target stream:%ld, current stream:%ld", reusable_block->stream_id_, | ||||
| stream_id); | stream_id); | ||||
| reusable_block->AddNodeTypeIndex({n, mem_type, out_index}, real_size, no_align_size); | |||||
| reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); | |||||
| if (mem_type == kOutput) { | if (mem_type == kOutput) { | ||||
| auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | ||||
| if (iter != anchor_to_symbol_.end()) { | if (iter != anchor_to_symbol_.end()) { | ||||
| @@ -660,7 +660,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
| } | } | ||||
| } | } | ||||
| auto block = new (std::nothrow) MemoryBlock(block_size, is_reuse_memory); | |||||
| auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "new an object failed."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "new an object failed."); | ||||
| // Data and netoutput need zero copy block | // Data and netoutput need zero copy block | ||||
| @@ -688,7 +688,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
| auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); | ||||
| MemoryBlock *block = nullptr; | MemoryBlock *block = nullptr; | ||||
| NodeIndexIO node_index_io = NodeIndexIO(n, index, kOut); | |||||
| NodeIndexIO node_index_io(n, index, kOut); | |||||
| int64_t size = 0; | int64_t size = 0; | ||||
| auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | ||||
| if (output_op_desc != nullptr) { | if (output_op_desc != nullptr) { | ||||
| @@ -701,7 +701,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
| if (IsSymbolExist(node_index_io)) { | if (IsSymbolExist(node_index_io)) { | ||||
| std::string symbol = anchor_to_symbol_[node_index_io.ToString()]; | std::string symbol = anchor_to_symbol_[node_index_io.ToString()]; | ||||
| block = symbol_blocks_[symbol]; | block = symbol_blocks_[symbol]; | ||||
| block->AddNodeTypeIndex({n, kOutput, index}, size, no_align_size); | |||||
| block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); | |||||
| block->ref_count_++; | block->ref_count_++; | ||||
| } else { | } else { | ||||
| int64_t max_size = size; | int64_t max_size = size; | ||||
| @@ -749,7 +749,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, | GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, | ||||
| GELOGI("Get dst_reuse_input_index failed")); | GELOGI("Get dst_reuse_input_index failed")); | ||||
| if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) { | if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) { | ||||
| block->AddNodeTypeIndex({owner_node, kOutput, i}, block->Size(), block->Size()); | |||||
| block->AddNodeTypeIndex({owner_node, kOutput, i, true}, block->Size(), block->Size()); | |||||
| out_count_reuse_input += 1; | out_count_reuse_input += 1; | ||||
| reuse_input = true; | reuse_input = true; | ||||
| } | } | ||||
| @@ -775,31 +775,6 @@ bool IsOutputBlock(const ge::InDataAnchorPtr &in_data_anchor) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| // current node's output uses previous node's output memory | |||||
| bool IsReferencePreviousNodeOutputMemory(const ge::NodePtr &node, uint32_t output_index) { | |||||
| // Get the reference type of the node, default is false | |||||
| bool is_ref = false; | |||||
| // If GetBool fail, is_ref is false. | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| if (op_desc == nullptr) { | |||||
| return false; | |||||
| } | |||||
| (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref); | |||||
| if (!is_ref) { | |||||
| return false; | |||||
| } | |||||
| const string &output_name = op_desc->GetOutputNameByIndex(output_index); | |||||
| for (const auto &input_name : op_desc->GetAllInputNames()) { | |||||
| if (!input_name.empty() && output_name == input_name) { | |||||
| int input_index = op_desc->GetInputIndexByName(input_name); | |||||
| GELOGI("Reference memory:name[%s] output[%s][%u] ref to input[%s][%d] ", op_desc->GetName().c_str(), | |||||
| output_name.c_str(), output_index, input_name.c_str(), input_index); | |||||
| return true; | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| // atomic out memory will be reassigned | // atomic out memory will be reassigned | ||||
| bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool is_atomic, | bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool is_atomic, | ||||
| bool out_node_set_continuous_input) { | bool out_node_set_continuous_input) { | ||||
| @@ -920,58 +895,57 @@ void CheckAndGetOpReuseEnv(const string &env, vector<string> &env_vec, bool &op_ | |||||
| } | } | ||||
| Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector<int64_t> &ranges) { | Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector<int64_t> &ranges) { | ||||
| auto node_op_desc = node->GetOpDesc(); | |||||
| int64_t stream_id = node_op_desc->GetStreamId(); | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| int64_t stream_id = op_desc->GetStreamId(); | |||||
| vector<int64_t> memorys_type; | vector<int64_t> memorys_type; | ||||
| bool has_mem_type_attr = ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memorys_type); | |||||
| GELOGI("Assign memory node[%s], output size[%d], output memory type size[%d]", node_op_desc->GetName().c_str(), | |||||
| node_op_desc->GetOutputsSize(), memorys_type.size()); | |||||
| if (has_mem_type_attr && (memorys_type.size() != node_op_desc->GetOutputsSize())) { | |||||
| bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memorys_type); | |||||
| GELOGI("Assign memory node[%s], output size[%d], output memory type size[%d]", op_desc->GetName().c_str(), | |||||
| op_desc->GetOutputsSize(), memorys_type.size()); | |||||
| if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) { | |||||
| GELOGE(INTERNAL_ERROR, "fusion: node[%s], output memory size err[outputsize:%zu, memorysize:%zu]", | GELOGE(INTERNAL_ERROR, "fusion: node[%s], output memory size err[outputsize:%zu, memorysize:%zu]", | ||||
| node_op_desc->GetName().c_str(), node_op_desc->GetOutputsSize(), memorys_type.size()); | |||||
| op_desc->GetName().c_str(), op_desc->GetOutputsSize(), memorys_type.size()); | |||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| is_op_reuse_mem_ = true; | is_op_reuse_mem_ = true; | ||||
| if (op_reuse_env_valid_ == true) { | if (op_reuse_env_valid_ == true) { | ||||
| vector<string>::iterator it_name = | vector<string>::iterator it_name = | ||||
| std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), node_op_desc->GetName()); | |||||
| std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName()); | |||||
| vector<string>::iterator it_type = | vector<string>::iterator it_type = | ||||
| std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), node_op_desc->GetType()); | |||||
| std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetType()); | |||||
| GE_IF_BOOL_EXEC(it_name != op_no_reuse_mem_vec_.end() || it_type != op_no_reuse_mem_vec_.end(), | GE_IF_BOOL_EXEC(it_name != op_no_reuse_mem_vec_.end() || it_type != op_no_reuse_mem_vec_.end(), | ||||
| is_op_reuse_mem_ = false;); | is_op_reuse_mem_ = false;); | ||||
| } | } | ||||
| bool is_atomic = false; | bool is_atomic = false; | ||||
| // If GetBool fail, is_atomic is false. | // If GetBool fail, is_atomic is false. | ||||
| (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); | |||||
| (void)ge::AttrUtils::GetBool(op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); | |||||
| // Allocate memory for the current node and release node memory of the same size in the workspace | // Allocate memory for the current node and release node memory of the same size in the workspace | ||||
| GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | ||||
| ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_);) | ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_);) | ||||
| for (uint32_t i = 0; i < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); i++) { | |||||
| for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | |||||
| int64_t size = 0; | int64_t size = 0; | ||||
| auto output_op_desc = node_op_desc->GetOutputDescPtr(i); | |||||
| auto output_op_desc = op_desc->GetOutputDescPtr(i); | |||||
| if (output_op_desc != nullptr) { | if (output_op_desc != nullptr) { | ||||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | ||||
| } | } | ||||
| // fusion: other type's size not means malloc HBM memory | // fusion: other type's size not means malloc HBM memory | ||||
| bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; | bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; | ||||
| if (l1_flag) { | if (l1_flag) { | ||||
| GELOGI("fusion: node[%s], output[%s], output memory type [%d]", node_op_desc->GetName().c_str(), | |||||
| node_op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); | |||||
| GELOGI("fusion: node[%s], output[%s], output memory type [%d]", op_desc->GetName().c_str(), | |||||
| op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); | |||||
| size = 0; | size = 0; | ||||
| } | } | ||||
| std::string peer_name; | std::string peer_name; | ||||
| uint32_t peer_input_index = 0; | uint32_t peer_input_index = 0; | ||||
| bool out_node_set_continuous_input = false; | bool out_node_set_continuous_input = false; | ||||
| bool no_need_assign_memory = | |||||
| ((size == 0) || CheckIsZeroMemNodeType(node->GetType()) || IsReferencePreviousNodeOutputMemory(node, i)); | |||||
| bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType())); | |||||
| if (!no_need_assign_memory) { | if (!no_need_assign_memory) { | ||||
| out_node_set_continuous_input = IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index); | out_node_set_continuous_input = IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index); | ||||
| no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input); | no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input); | ||||
| } | } | ||||
| if (no_need_assign_memory) { | if (no_need_assign_memory) { | ||||
| zero_memory_list_.emplace_back(node, kOutput, i); | |||||
| zero_memory_list_.emplace_back(node, kOutput, i, false); | |||||
| continue; | continue; | ||||
| } | } | ||||
| // atomic can't be reused | // atomic can't be reused | ||||
| @@ -1049,7 +1023,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
| workspace_skip_flag = true; | workspace_skip_flag = true; | ||||
| } | } | ||||
| if (temp[i] == 0 || workspace_skip_flag) { | if (temp[i] == 0 || workspace_skip_flag) { | ||||
| zero_memory_list_.emplace_back(n, kWorkspace, static_cast<uint32_t>(i)); | |||||
| zero_memory_list_.emplace_back(n, kWorkspace, static_cast<uint32_t>(i), false); | |||||
| continue; | continue; | ||||
| } | } | ||||
| MemoryBlock *mem_block = ApplyMemory(GetBlockSize(static_cast<size_t>(temp[i]), ranges), | MemoryBlock *mem_block = ApplyMemory(GetBlockSize(static_cast<size_t>(temp[i]), ranges), | ||||
| @@ -1067,7 +1041,9 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
| (void)mem_block; // Fix warning | (void)mem_block; // Fix warning | ||||
| } | } | ||||
| GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), MergeDynamicBatchBlocks();) | |||||
| bool merge_dynamic_batch = false; | |||||
| GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), merge_dynamic_batch = MergeDynamicBatchBlocks();) | |||||
| GE_IF_BOOL_EXEC(!merge_dynamic_batch, ReuseBlocksByLifeTime();) | |||||
| AssignContinuousBlocks(); | AssignContinuousBlocks(); | ||||
| ResizeMemoryBlocks(); | ResizeMemoryBlocks(); | ||||
| @@ -1131,7 +1107,8 @@ void MergeBlocks(std::vector<MemoryBlock *> &dest, std::vector<MemoryBlock *> &s | |||||
| } | } | ||||
| } | } | ||||
| void BlockMemAssigner::MergeDynamicBatchBlocks() { | |||||
| bool BlockMemAssigner::MergeDynamicBatchBlocks() { | |||||
| bool merged = false; | |||||
| std::map<std::string, std::vector<MemoryBlock *>> dynamic_batch_blocks; | std::map<std::string, std::vector<MemoryBlock *>> dynamic_batch_blocks; | ||||
| for (auto block : memory_blocks_) { | for (auto block : memory_blocks_) { | ||||
| if (block == nullptr) { | if (block == nullptr) { | ||||
| @@ -1160,8 +1137,10 @@ void BlockMemAssigner::MergeDynamicBatchBlocks() { | |||||
| if (it != it_max) { | if (it != it_max) { | ||||
| GELOGD("MergeDynamicBatch from %s to %s", it->first.c_str(), it_max->first.c_str()); | GELOGD("MergeDynamicBatch from %s to %s", it->first.c_str(), it_max->first.c_str()); | ||||
| MergeBlocks(it_max->second, it->second); | MergeBlocks(it_max->second, it->second); | ||||
| merged = true; | |||||
| } | } | ||||
| } | } | ||||
| return merged; | |||||
| } | } | ||||
| // asending order | // asending order | ||||
| @@ -1331,9 +1310,10 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, siz | |||||
| } | } | ||||
| GELOGI( | GELOGI( | ||||
| "[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" | "[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" | ||||
| " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d].", | |||||
| " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d] isref[%d].", | |||||
| graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, | graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, | ||||
| op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block); | |||||
| op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, | |||||
| node_type.ref_input); | |||||
| } | } | ||||
| void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { | void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { | ||||
| @@ -1528,6 +1508,7 @@ void BlockMemAssigner::FindDependentStreamBetweenGraphs(const NodePtr &pre_node, | |||||
| bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | ||||
| return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | ||||
| (node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) || | (node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) || | ||||
| (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT); | |||||
| (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || | |||||
| (node_type == HVDCALLBACKBROADCAST) || (node_type == HVDCALLBACKALLREDUCE); | |||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -23,6 +23,7 @@ | |||||
| #include <unordered_set> | #include <unordered_set> | ||||
| #include <utility> | #include <utility> | ||||
| #include <vector> | #include <vector> | ||||
| #include <list> | |||||
| #include "common/ge_inner_error_codes.h" | #include "common/ge_inner_error_codes.h" | ||||
| #include "common/types.h" | #include "common/types.h" | ||||
| #include "common/util.h" | #include "common/util.h" | ||||
| @@ -36,13 +37,14 @@ const size_t kMaxLifeTime = 0xffffffff; | |||||
| enum MemoryType { kOutput, kWorkspace }; | enum MemoryType { kOutput, kWorkspace }; | ||||
| struct NodeTypeIndex { | struct NodeTypeIndex { | ||||
| NodeTypeIndex(ge::NodePtr node, MemoryType mem_type, uint32_t index) | |||||
| : node(std::move(node)), mem_type(mem_type), index(index) {} | |||||
| NodeTypeIndex(ge::NodePtr node, MemoryType mem_type, uint32_t index, bool ref_input = false) | |||||
| : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {} | |||||
| ge::NodePtr node = nullptr; | ge::NodePtr node = nullptr; | ||||
| MemoryType mem_type = kOutput; | MemoryType mem_type = kOutput; | ||||
| uint32_t index = 0; | uint32_t index = 0; | ||||
| size_t life_time_end = kMaxLifeTime; | size_t life_time_end = kMaxLifeTime; | ||||
| bool ref_input = false; | |||||
| const string GetMemType() const { | const string GetMemType() const { | ||||
| if (mem_type == kOutput) { | if (mem_type == kOutput) { | ||||
| return "output"; | return "output"; | ||||
| @@ -55,9 +57,9 @@ struct NodeTypeIndex { | |||||
| class MemoryBlock { | class MemoryBlock { | ||||
| public: | public: | ||||
| explicit MemoryBlock(size_t block_size, bool reuse_mem = true) | |||||
| explicit MemoryBlock(size_t block_size, int64_t stream_id = 0, bool reuse_mem = true) | |||||
| : ref_count_(0), | : ref_count_(0), | ||||
| stream_id_(0), | |||||
| stream_id_(stream_id), | |||||
| deleted_block_(false), | deleted_block_(false), | ||||
| reuse_mem_(reuse_mem), | reuse_mem_(reuse_mem), | ||||
| input_index_(0), | input_index_(0), | ||||
| @@ -81,7 +83,7 @@ class MemoryBlock { | |||||
| void Init(size_t real_size, MemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) { | void Init(size_t real_size, MemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) { | ||||
| real_size_list_.emplace_back(real_size); | real_size_list_.emplace_back(real_size); | ||||
| no_align_size_list_.emplace_back(no_align_size); | no_align_size_list_.emplace_back(no_align_size); | ||||
| node_type_index_list_.emplace_back(node, type, out_index); | |||||
| node_type_index_list_.emplace_back(node, type, out_index, false); | |||||
| } | } | ||||
| size_t Size() const { return block_size_; } | size_t Size() const { return block_size_; } | ||||
| @@ -129,6 +131,7 @@ class MemoryBlock { | |||||
| bool continuous_block_; | bool continuous_block_; | ||||
| bool last_continuous_block_; | bool last_continuous_block_; | ||||
| bool is_zero_copy_; | bool is_zero_copy_; | ||||
| std::map<int64_t, size_t> depend_stream_life_; | |||||
| private: | private: | ||||
| size_t block_size_; | size_t block_size_; | ||||
| @@ -287,7 +290,7 @@ class BlockMemAssigner : public MemAssigner { | |||||
| std::vector<NodeTypeIndex> zero_memory_list_; | std::vector<NodeTypeIndex> zero_memory_list_; | ||||
| // ref mapping | // ref mapping | ||||
| std::map<std::string, std::vector<NodeIndexIO>> symbol_to_anchors_; | |||||
| std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors_; | |||||
| std::map<std::string, std::string> anchor_to_symbol_; | std::map<std::string, std::string> anchor_to_symbol_; | ||||
| std::map<std::string, bool> pre_reuse_flag_; | std::map<std::string, bool> pre_reuse_flag_; | ||||
| std::map<std::string, bool> post_reuse_flag_; | std::map<std::string, bool> post_reuse_flag_; | ||||
| @@ -371,10 +374,10 @@ class BlockMemAssigner : public MemAssigner { | |||||
| /// | /// | ||||
| /// @ingroup GE | /// @ingroup GE | ||||
| /// @brief Merge memory blocks between different batchs | /// @brief Merge memory blocks between different batchs | ||||
| /// @return void | |||||
| /// @return merge or not | |||||
| /// @author | /// @author | ||||
| /// | /// | ||||
| void MergeDynamicBatchBlocks(); | |||||
| bool MergeDynamicBatchBlocks(); | |||||
| void AssignContinuousBlocks(); | void AssignContinuousBlocks(); | ||||
| @@ -0,0 +1,98 @@ | |||||
| LOCAL_PATH := $(call my-dir) | |||||
| local_lib_src_files := memory_assigner.cc \ | |||||
| graph_mem_assigner.cc \ | |||||
| binary_block_mem_assigner.cc \ | |||||
| block_mem_assigner.cc \ | |||||
| hybrid_mem_assigner.cc \ | |||||
| max_block_mem_assigner.cc \ | |||||
| var_mem_assign_util.cc \ | |||||
| local_lib_inc_path := ${LOCAL_PATH} \ | |||||
| ${TOPDIR}inc \ | |||||
| ${TOPDIR}inc/external \ | |||||
| ${TOPDIR}inc/external/graph \ | |||||
| $(TOPDIR)libc_sec/include \ | |||||
| ${TOPDIR}third_party/protobuf/include \ | |||||
| ${TOPDIR}inc/framework \ | |||||
| $(TOPDIR)framework/domi \ | |||||
| #compiler for host | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_memory | |||||
| LOCAL_CFLAGS += -std=c++11 | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -O2 | |||||
| ifeq ($(DEBUG), 1) | |||||
| LOCAL_CFLAGS += -g -O0 | |||||
| endif | |||||
| LOCAL_LDFLAGS := | |||||
| LOCAL_STATIC_LIBRARIES := | |||||
| LOCAL_SHARED_LIBRARIES := libprotobuf \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libgraph \ | |||||
| libge_common \ | |||||
| LOCAL_SRC_FILES := $(local_lib_src_files) | |||||
| generated_sources_dir := $(call local-generated-sources-dir) | |||||
| LOCAL_EXPORT_C_INCLUDE_DIRS := $(generated_sources_dir)/proto/$(LOCAL_PATH) | |||||
| LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| LOCAL_C_INCLUDES += LOCAL_EXPORT_C_INCLUDE_DIRS | |||||
| include ${BUILD_HOST_STATIC_LIBRARY} | |||||
| #compiler for device | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_memory | |||||
| LOCAL_CFLAGS += -std=c++11 | |||||
| LOCAL_CFLAGS += -Werror | |||||
| LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY | |||||
| LOCAL_CFLAGS += -O2 | |||||
| LOCAL_LDFLAGS := | |||||
| LOCAL_STATIC_LIBRARIES := | |||||
| LOCAL_SHARED_LIBRARIES := libprotobuf \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libgraph \ | |||||
| libge_common \ | |||||
| LOCAL_SRC_FILES := $(local_lib_src_files) | |||||
| generated_sources_dir := $(call local-generated-sources-dir) | |||||
| LOCAL_EXPORT_C_INCLUDE_DIRS := $(generated_sources_dir)/proto/$(LOCAL_PATH) | |||||
| LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| LOCAL_C_INCLUDES += LOCAL_EXPORT_C_INCLUDE_DIRS | |||||
| include ${BUILD_STATIC_LIBRARY} | |||||
| #compiler for device | |||||
| include $(CLEAR_VARS) | |||||
| LOCAL_MODULE := libge_memory | |||||
| LOCAL_CFLAGS += -std=c++11 | |||||
| LOCAL_LDFLAGS := | |||||
| LOCAL_STATIC_LIBRARIES := | |||||
| LOCAL_SHARED_LIBRARIES := libprotobuf \ | |||||
| libc_sec \ | |||||
| libslog \ | |||||
| libgraph \ | |||||
| libge_common \ | |||||
| LOCAL_SRC_FILES := $(local_lib_src_files) | |||||
| generated_sources_dir := $(call local-generated-sources-dir) | |||||
| LOCAL_EXPORT_C_INCLUDE_DIRS := $(generated_sources_dir)/proto/$(LOCAL_PATH) | |||||
| LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||||
| LOCAL_C_INCLUDES += LOCAL_EXPORT_C_INCLUDE_DIRS | |||||
| include ${BUILD_LLT_STATIC_LIBRARY} | |||||
| @@ -18,6 +18,7 @@ | |||||
| #include <iostream> | #include <iostream> | ||||
| #include <set> | #include <set> | ||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <securectype.h> | |||||
| #include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "graph/anchor.h" | #include "graph/anchor.h" | ||||
| @@ -250,7 +251,7 @@ Status ModelBuilder::SetInputOutputDesc() { | |||||
| } | } | ||||
| // if user set input node format ND, the expected node for data and netoutput format is ND in | // if user set input node format ND, the expected node for data and netoutput format is ND in | ||||
| // final graph. | // final graph. | ||||
| if ((domi::GetContext().format == domi::DOMI_TENSOR_ND) && | |||||
| if ((domi::GetContext().format == domi::DOMI_TENSOR_ND) && (!node_op_desc->HasAttr("_is_single_op")) && | |||||
| ((node_op_desc->GetType() == DATA_TYPE) || (node_op_desc->GetType() == NETOUTPUT))) { | ((node_op_desc->GetType() == DATA_TYPE) || (node_op_desc->GetType() == NETOUTPUT))) { | ||||
| GELOGI("The node [%s] format should be set ND.", node_op_desc->GetName().c_str()); | GELOGI("The node [%s] format should be set ND.", node_op_desc->GetName().c_str()); | ||||
| auto inputDescsPtr = node_op_desc->GetAllInputsDescPtr(); | auto inputDescsPtr = node_op_desc->GetAllInputsDescPtr(); | ||||
| @@ -521,11 +522,37 @@ Status ModelBuilder::MergeWeights() { | |||||
| } | } | ||||
| if (weight_data.data() != nullptr) { | if (weight_data.data() != nullptr) { | ||||
| GE_IF_BOOL_EXEC(base_addr == nullptr, GELOGE(FAILED, "Base addr is nullptr."); return FAILED); | GE_IF_BOOL_EXEC(base_addr == nullptr, GELOGE(FAILED, "Base addr is nullptr."); return FAILED); | ||||
| GE_CHK_BOOL_EXEC( | |||||
| memcpy_s(base_addr + offset, weight_offset_ - offset, weight_data.data(), weight_data.size()) == EOK, | |||||
| return FAILED, "call memcpy_s failed."); | |||||
| if (weight_offset_ - offset < weight_data.size()) { | |||||
| GELOGE(FAILED, "left weight size not enough. left_size:%lu, weight_size:%lu", weight_offset_ - offset, | |||||
| weight_data.size()); | |||||
| return FAILED; | |||||
| } | |||||
| uintptr_t dst_ptr = (uintptr_t)base_addr + offset; | |||||
| uintptr_t src_ptr = (uintptr_t)weight_data.data(); | |||||
| size_t left_size = weight_data.size(); | |||||
| while (left_size > SECUREC_MEM_MAX_LEN) { | |||||
| auto err = memcpy_s(reinterpret_cast<void *>(dst_ptr), SECUREC_MEM_MAX_LEN, reinterpret_cast<void *>(src_ptr), | |||||
| SECUREC_MEM_MAX_LEN); | |||||
| if (err != EOK) { | |||||
| GELOGE(FAILED, | |||||
| "mem copy failed. errret:%u, " | |||||
| "dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu", | |||||
| err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN); | |||||
| return FAILED; | |||||
| } | |||||
| left_size -= SECUREC_MEM_MAX_LEN; | |||||
| dst_ptr = dst_ptr + SECUREC_MEM_MAX_LEN; | |||||
| src_ptr = src_ptr + SECUREC_MEM_MAX_LEN; | |||||
| } | |||||
| auto err = memcpy_s(reinterpret_cast<void *>(dst_ptr), left_size, reinterpret_cast<void *>(src_ptr), left_size); | |||||
| if (err != EOK) { | |||||
| GELOGE(FAILED, | |||||
| "mem copy failed. errret:%u, " | |||||
| "dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu", | |||||
| err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN); | |||||
| return FAILED; | |||||
| } | |||||
| } | } | ||||
| weight_data.clear(); | weight_data.clear(); | ||||
| } | } | ||||
| @@ -683,7 +683,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) { | |||||
| GELOGE(FAILED, "SplitStreams:streamid(%ld) > last_stream_id(%ld)", stream_id, last_stream_id); | GELOGE(FAILED, "SplitStreams:streamid(%ld) > last_stream_id(%ld)", stream_id, last_stream_id); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| stream_node_num_vec[stream_id]++; | |||||
| AddNodeNum(cur_node, stream_node_num_vec[stream_id]); | |||||
| stream_2_nodes_map[stream_id].push_back(cur_node); | stream_2_nodes_map[stream_id].push_back(cur_node); | ||||
| // The maximum number of tasks per stream. | // The maximum number of tasks per stream. | ||||
| int64_t max_node_num_one_stream = GetMaxNodeNumPerStream(cur_node, max_task_count); | int64_t max_node_num_one_stream = GetMaxNodeNumPerStream(cur_node, max_task_count); | ||||
| @@ -706,7 +706,8 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) { | |||||
| "It's time to split the stream, split newly-added stream id is %ld", | "It's time to split the stream, split newly-added stream id is %ld", | ||||
| stream_id, stream_node_num_vec[stream_id], max_node_num_one_stream, last_stream_id); | stream_id, stream_node_num_vec[stream_id], max_node_num_one_stream, last_stream_id); | ||||
| NodePtr pre_node = pre_node_vec[stream_id]; | NodePtr pre_node = pre_node_vec[stream_id]; | ||||
| stream_node_num_vec[stream_id] = 1; | |||||
| stream_node_num_vec[stream_id] = 0; | |||||
| AddNodeNum(cur_node, stream_node_num_vec[stream_id]); | |||||
| // try spilt a new stream and move same continuous stream label nodes from this stream | // try spilt a new stream and move same continuous stream label nodes from this stream | ||||
| bool not_use_cur = false; | bool not_use_cur = false; | ||||
| NodePtr not_cur = nullptr; | NodePtr not_cur = nullptr; | ||||
| @@ -720,7 +721,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) { | |||||
| auto stored_op_desc = node->GetOpDesc(); | auto stored_op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(stored_op_desc); | GE_CHECK_NOTNULL(stored_op_desc); | ||||
| stored_op_desc->SetStreamId(last_stream_id); | stored_op_desc->SetStreamId(last_stream_id); | ||||
| stream_node_num_vec[stream_id]++; | |||||
| AddNodeNum(node, stream_node_num_vec[stream_id]); | |||||
| } | } | ||||
| not_use_cur = true; | not_use_cur = true; | ||||
| not_cur = nodes.front(); | not_cur = nodes.front(); | ||||
| @@ -1055,7 +1056,7 @@ Status StreamAllocator::CollectDeactiveStream(const OpDescPtr &op_desc, std::set | |||||
| // Insert StreamActive Op for Entry Stream. | // Insert StreamActive Op for Entry Stream. | ||||
| Status StreamAllocator::InsertActiveEntryStream(const std::vector<uint32_t> &active_streams, int64_t stream_id) { | Status StreamAllocator::InsertActiveEntryStream(const std::vector<uint32_t> &active_streams, int64_t stream_id) { | ||||
| string node_name = "ActiveEntryStream_" + string(STREAMACTIVE); | |||||
| string node_name = whole_graph_->GetName() + "_ActiveEntryStream_" + string(STREAMACTIVE); | |||||
| OpDescPtr op_desc = ge::MakeShared<OpDesc>(node_name, STREAMACTIVE); | OpDescPtr op_desc = ge::MakeShared<OpDesc>(node_name, STREAMACTIVE); | ||||
| if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
| GELOGE(FAILED, "Failed to new opdesc."); | GELOGE(FAILED, "Failed to new opdesc."); | ||||
| @@ -1143,7 +1144,7 @@ Status StreamAllocator::InsertSyncEventNodes() { | |||||
| GE_CHECK_NOTNULL(node->GetInControlAnchor()); | GE_CHECK_NOTNULL(node->GetInControlAnchor()); | ||||
| GE_CHECK_NOTNULL(node->GetOutControlAnchor()); | GE_CHECK_NOTNULL(node->GetOutControlAnchor()); | ||||
| for (auto &event_id : recv_event_id_list) { | for (auto &event_id : recv_event_id_list) { | ||||
| string recv_node_name = "_Recv_" + to_string(event_id); | |||||
| string recv_node_name = whole_graph_->GetName() + "_Recv_" + to_string(event_id); | |||||
| OpDescPtr op_desc_ptr = MakeShared<OpDesc>(recv_node_name, RECV); | OpDescPtr op_desc_ptr = MakeShared<OpDesc>(recv_node_name, RECV); | ||||
| GE_CHECK_NOTNULL(op_desc_ptr); | GE_CHECK_NOTNULL(op_desc_ptr); | ||||
| @@ -1171,7 +1172,7 @@ Status StreamAllocator::InsertSyncEventNodes() { | |||||
| GetSendEventIdList(node, send_event_id_list); | GetSendEventIdList(node, send_event_id_list); | ||||
| for (auto &event_id : send_event_id_list) { | for (auto &event_id : send_event_id_list) { | ||||
| string send_node_name = "_Send_" + to_string(event_id); | |||||
| string send_node_name = whole_graph_->GetName() + "_Send_" + to_string(event_id); | |||||
| OpDescPtr op_desc_ptr = MakeShared<OpDesc>(send_node_name, SEND); | OpDescPtr op_desc_ptr = MakeShared<OpDesc>(send_node_name, SEND); | ||||
| GE_CHECK_NOTNULL(op_desc_ptr); | GE_CHECK_NOTNULL(op_desc_ptr); | ||||
| @@ -1291,6 +1292,15 @@ int64_t StreamAllocator::GetMaxNodeNumPerStream(const NodePtr &node, uint32_t ma | |||||
| return max_node_num_one_stream; | return max_node_num_one_stream; | ||||
| } | } | ||||
| void StreamAllocator::AddNodeNum(const NodePtr &node, int64_t &node_num) { | |||||
| node_num++; | |||||
| vector<uint32_t> events; | |||||
| GetSendEventIdList(node, events); | |||||
| node_num += static_cast<int64_t>(events.size()); | |||||
| GetRecvEventIdList(node, events); | |||||
| node_num += static_cast<int64_t>(events.size()); | |||||
| } | |||||
| // Insert send event id on a node | // Insert send event id on a node | ||||
| void StreamAllocator::AddSendEventId(const NodePtr &node, uint32_t event_id) { | void StreamAllocator::AddSendEventId(const NodePtr &node, uint32_t event_id) { | ||||
| node_to_send_events_[node].emplace_back(event_id); | node_to_send_events_[node].emplace_back(event_id); | ||||
| @@ -80,6 +80,7 @@ class StreamAllocator { | |||||
| Status GetMaxStreamAndTask(bool huge_stream, uint32_t &max_stream_count, uint32_t &max_task_count); | Status GetMaxStreamAndTask(bool huge_stream, uint32_t &max_stream_count, uint32_t &max_task_count); | ||||
| int64_t GetMaxNodeNumPerStream(const NodePtr &node, uint32_t max_node_num_one_stream); | int64_t GetMaxNodeNumPerStream(const NodePtr &node, uint32_t max_node_num_one_stream); | ||||
| void AddNodeNum(const NodePtr &node, int64_t &node_num); | |||||
| void AddSendEventId(const NodePtr &node, uint32_t event_id); | void AddSendEventId(const NodePtr &node, uint32_t event_id); | ||||
| void AddRecvEventId(const NodePtr &node, uint32_t event_id); | void AddRecvEventId(const NodePtr &node, uint32_t event_id); | ||||
| @@ -47,6 +47,7 @@ const char *const kIsOutputVar = "OUTPUT_IS_VAR"; | |||||
| const char *const kProfilingMode = "PROFILING_MODE"; | const char *const kProfilingMode = "PROFILING_MODE"; | ||||
| const char *const kProfilingFpPoint = "FP_POINT"; | const char *const kProfilingFpPoint = "FP_POINT"; | ||||
| const char *const kProfilingBpPoint = "BP_POINT"; | const char *const kProfilingBpPoint = "BP_POINT"; | ||||
| const char *const kOffOptimize = "off_optimize"; | |||||
| const uint32_t kProfilingArStep = 2; | const uint32_t kProfilingArStep = 2; | ||||
| const uint64_t kProfilingFpStartLogid = 1; | const uint64_t kProfilingFpStartLogid = 1; | ||||
| const uint64_t kProfilingBpEndLogid = 2; | const uint64_t kProfilingBpEndLogid = 2; | ||||
| @@ -83,10 +84,10 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t | |||||
| } | } | ||||
| Status ret = SUCCESS; | Status ret = SUCCESS; | ||||
| if (is_unknown_shape) { | if (is_unknown_shape) { | ||||
| GELOGI("Beign to generate unknown shape task."); | |||||
| GELOGI("Beign to generate unknown shape task. Graph name is %s.", graph->GetName().c_str()); | |||||
| ret = GenerateUnknownShapeTask(run_context, graph, task_def_list, op_name_map); | ret = GenerateUnknownShapeTask(run_context, graph, task_def_list, op_name_map); | ||||
| } else { | } else { | ||||
| GELOGI("Beign to generate known shape task."); | |||||
| GELOGI("Beign to generate known shape task. Graph name is %s.", graph->GetName().c_str()); | |||||
| ret = GenerateTask(run_context, graph, task_def_list, op_name_map); | ret = GenerateTask(run_context, graph, task_def_list, op_name_map); | ||||
| } | } | ||||
| GE_DUMP(graph, "GenerateTaskAfter"); | GE_DUMP(graph, "GenerateTaskAfter"); | ||||
| @@ -108,7 +109,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t | |||||
| GELOGE(FAILED, "SetListStr failed."); | GELOGE(FAILED, "SetListStr failed."); | ||||
| return FAILED); | return FAILED); | ||||
| GELOGI("Call GenerateTask Success, task_def_list.size:%zu, op_name_map.size:%zu", task_def_list.size(), | |||||
| GELOGI("Generate task success, task_def_list.size:%zu, op_name_map.size:%zu", task_def_list.size(), | |||||
| op_name_map.size()); | op_name_map.size()); | ||||
| // Init and serialize model_task_def | // Init and serialize model_task_def | ||||
| @@ -130,7 +131,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| GELOGI("Get TaskInfo success. session_id=%lu", session_id); | |||||
| GELOGI("Get TaskInfo success. session id is %lu", session_id); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -253,7 +254,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed."); | GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed."); | ||||
| return GE_CLI_GE_NOT_INITIALIZED; | return GE_CLI_GE_NOT_INITIALIZED; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "MarkNodeAndSetIndex failed."); | |||||
| GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "Mark node and set index failed."); | |||||
| ProfilingPoint profiling_point; | ProfilingPoint profiling_point; | ||||
| vector<uint32_t> all_reduce_nodes; | vector<uint32_t> all_reduce_nodes; | ||||
| GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes)); | GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes)); | ||||
| @@ -263,9 +264,9 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
| GE_TIMESTAMP_CALLNUM_START(GenerateTask); | GE_TIMESTAMP_CALLNUM_START(GenerateTask); | ||||
| // map store fusion nodes | // map store fusion nodes | ||||
| map<int64_t, std::vector<NodePtr>> fusion_nodes; | map<int64_t, std::vector<NodePtr>> fusion_nodes; | ||||
| string buffer_optimize = "off_optimize"; | |||||
| string buffer_optimize = kOffOptimize; | |||||
| (void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); | (void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); | ||||
| if (buffer_optimize != "off_optimize") { | |||||
| if (buffer_optimize != kOffOptimize) { | |||||
| GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph)); | GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph)); | ||||
| } | } | ||||
| std::unordered_set<Node *> fusion_nodes_seen; | std::unordered_set<Node *> fusion_nodes_seen; | ||||
| @@ -371,7 +372,7 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG | |||||
| GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed."); | GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed."); | ||||
| return GE_CLI_GE_NOT_INITIALIZED; | return GE_CLI_GE_NOT_INITIALIZED; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "MarkNodeAndSetIndex failed."); | |||||
| GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "Mark node and set index failed."); | |||||
| ProfilingPoint profiling_point; | ProfilingPoint profiling_point; | ||||
| vector<uint32_t> all_reduce_nodes; | vector<uint32_t> all_reduce_nodes; | ||||
| GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes)); | GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes)); | ||||
| @@ -381,9 +382,9 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG | |||||
| GE_TIMESTAMP_CALLNUM_START(GenerateTask); | GE_TIMESTAMP_CALLNUM_START(GenerateTask); | ||||
| // map store fusion nodes | // map store fusion nodes | ||||
| map<int64_t, std::vector<NodePtr>> fusion_nodes; | map<int64_t, std::vector<NodePtr>> fusion_nodes; | ||||
| string buffer_optimize = "off_optimize"; | |||||
| string buffer_optimize = kOffOptimize; | |||||
| (void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); | (void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); | ||||
| if (buffer_optimize != "off_optimize") { | |||||
| if (buffer_optimize != kOffOptimize) { | |||||
| GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph)); | GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph)); | ||||
| } | } | ||||
| std::unordered_set<Node *> fusion_nodes_seen; | std::unordered_set<Node *> fusion_nodes_seen; | ||||
| @@ -392,7 +393,11 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG | |||||
| rtStream_t stream = nullptr; | rtStream_t stream = nullptr; | ||||
| GE_CHK_RT_RET(rtStreamCreate(&stream, 0)); | GE_CHK_RT_RET(rtStreamCreate(&stream, 0)); | ||||
| run_context.stream = stream; | run_context.stream = stream; | ||||
| GE_CHK_RT_RET(rtModelBindStream(run_context.model, stream, 0)); | |||||
| if (rtModelBindStream(run_context.model, stream, 0) != RT_ERROR_NONE) { | |||||
| GELOGE(FAILED, "Call rt api failed."); | |||||
| GE_CHK_RT(rtStreamDestroy(stream)); | |||||
| return FAILED; | |||||
| } | |||||
| for (auto &node : graph->GetAllNodes()) { | for (auto &node : graph->GetAllNodes()) { | ||||
| OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| @@ -437,7 +442,7 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG | |||||
| size_t task_list_size_before = task_def_list.size(); | size_t task_list_size_before = task_def_list.size(); | ||||
| GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | ||||
| GELOGI("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), | |||||
| GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), | |||||
| name.c_str(), type.c_str(), op_id, stream_id); | name.c_str(), type.c_str(), op_id, stream_id); | ||||
| GE_TIMESTAMP_RESTART(GenerateTask); | GE_TIMESTAMP_RESTART(GenerateTask); | ||||
| auto ret = kernel_info_store->GenerateTask(*node, run_context, task_def_list); | auto ret = kernel_info_store->GenerateTask(*node, run_context, task_def_list); | ||||
| @@ -659,14 +664,15 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { | |||||
| Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_single_stream) const { | Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_single_stream) const { | ||||
| vector<vector<OpDescPtr>> continuous_op_lists(1); | vector<vector<OpDescPtr>> continuous_op_lists(1); | ||||
| const set<string> label_op_types({LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX}); | |||||
| const set<string> separator_types( | |||||
| {LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX, STREAMSWITCH, STREAMSWITCHN}); | |||||
| for (auto &op_desc : ops) { | for (auto &op_desc : ops) { | ||||
| bool attr_notask = false; | bool attr_notask = false; | ||||
| if (ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOTASK, attr_notask) && attr_notask) { | if (ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOTASK, attr_notask) && attr_notask) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| string op_type = op_desc->GetType(); | string op_type = op_desc->GetType(); | ||||
| if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || label_op_types.count(op_type) != 0)) { | |||||
| if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0)) { | |||||
| continuous_op_lists.emplace_back(vector<OpDescPtr>()); | continuous_op_lists.emplace_back(vector<OpDescPtr>()); | ||||
| } else { | } else { | ||||
| continuous_op_lists.back().emplace_back(op_desc); | continuous_op_lists.back().emplace_back(op_desc); | ||||
| @@ -727,7 +733,6 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
| fp_op_desc = in_node_desc; | fp_op_desc = in_node_desc; | ||||
| } | } | ||||
| } | } | ||||
| GELOGI("Find fp_op_desc is %s, id is %ld", fp_op_desc->GetName().c_str(), fp_op_desc->GetId()); | |||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| @@ -736,6 +741,7 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
| GELOGW("not find fp_op_desc."); | GELOGW("not find fp_op_desc."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| GELOGI("Find fp_op_desc is %s, id is %ld", fp_op_desc->GetName().c_str(), fp_op_desc->GetId()); | |||||
| for (auto &node : graph->GetAllNodes()) { | for (auto &node : graph->GetAllNodes()) { | ||||
| OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| @@ -86,6 +86,17 @@ Status GraphExecutor::SetGraphContext(GraphContextPtr graph_context_ptr) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num) { | |||||
| auto model_manager = ge::ModelManager::GetInstance(); | |||||
| GE_CHECK_NOTNULL(model_manager); | |||||
| Status ret = model_manager->SetDynamicSize(model_id, batch_num); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(FAILED, "SetDynamicSize failed"); | |||||
| return ret; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| void GraphExecutor::SetTrainFlag(bool is_train_graph) { train_graph_flag_ = is_train_graph; } | void GraphExecutor::SetTrainFlag(bool is_train_graph) { train_graph_flag_ = is_train_graph; } | ||||
| Status GraphExecutor::FreeInOutBuffer() { | Status GraphExecutor::FreeInOutBuffer() { | ||||
| @@ -476,7 +487,28 @@ Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::ve | |||||
| GELOGE(ret, "GetDynamicBatchInfo failed."); | GELOGE(ret, "GetDynamicBatchInfo failed."); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) { | |||||
| auto model_manager = ge::ModelManager::GetInstance(); | |||||
| GE_CHECK_NOTNULL(model_manager); | |||||
| Status ret = model_manager->GetCurShape(model_id, batch_info); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(FAILED, "GetCurShape failed"); | |||||
| return ret; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphExecutor::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) { | |||||
| auto model_manager = ge::ModelManager::GetInstance(); | |||||
| GE_CHECK_NOTNULL(model_manager); | |||||
| Status ret = model_manager->GetModelAttr(model_id, dynamic_output_shape_info); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(FAILED, "GetModelAttr failed"); | |||||
| return ret; | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -503,4 +535,43 @@ Status GraphExecutor::GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vecto | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { | |||||
| auto model_manager = ge::ModelManager::GetInstance(); | |||||
| GE_CHECK_NOTNULL(model_manager); | |||||
| Status ret = model_manager->GetAIPPInfo(model_id, index, aipp_info); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "GetAIPPInfo failed."); | |||||
| return ret; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphExecutor::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) { | |||||
| auto model_manager = ge::ModelManager::GetInstance(); | |||||
| GE_CHECK_NOTNULL(model_manager); | |||||
| Status ret = model_manager->GetOrigInputInfo(model_id, index, orig_input_info); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "GetOrigInputInfo failed."); | |||||
| return ret; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status GraphExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, | |||||
| std::vector<InputOutputDims> &input_dims, | |||||
| std::vector<InputOutputDims> &output_dims) { | |||||
| auto model_manager = ge::ModelManager::GetInstance(); | |||||
| GE_CHECK_NOTNULL(model_manager); | |||||
| Status ret = model_manager->GetAllAippInputOutputDims(model_id, index, input_dims, output_dims); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "GetAllAippInputOutputDims failed."); | |||||
| return ret; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -56,6 +56,8 @@ class GraphExecutor { | |||||
| Status SetGraphContext(GraphContextPtr graph_context_ptr); | Status SetGraphContext(GraphContextPtr graph_context_ptr); | ||||
| static Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num); | |||||
| void SetTrainFlag(bool is_train_graph); | void SetTrainFlag(bool is_train_graph); | ||||
| const std::vector<InputOutputDescInfo> &GetOutputsDesc() const { return outputs_desc_; } | const std::vector<InputOutputDescInfo> &GetOutputsDesc() const { return outputs_desc_; } | ||||
| @@ -71,6 +73,8 @@ class GraphExecutor { | |||||
| vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats, | vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats, | ||||
| std::vector<uint32_t> &output_formats); | std::vector<uint32_t> &output_formats); | ||||
| static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Get dynamic batch_info | /// @brief Get dynamic batch_info | ||||
| @@ -80,10 +84,17 @@ class GraphExecutor { | |||||
| /// | /// | ||||
| static Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | static Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | ||||
| static Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info); | |||||
| static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info); | |||||
| static Status GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | static Status GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | ||||
| vector<InputOutputDescInfo> &output_desc, | vector<InputOutputDescInfo> &output_desc, | ||||
| std::vector<uint32_t> &input_formats, | std::vector<uint32_t> &input_formats, | ||||
| std::vector<uint32_t> &output_formats); | std::vector<uint32_t> &output_formats); | ||||
| static Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); | |||||
| static Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims, | |||||
| std::vector<InputOutputDims> &output_dims); | |||||
| private: | private: | ||||
| Status PrepareInputData(const std::vector<GeTensor> &input_tensor, InputData &graph_input_data, | Status PrepareInputData(const std::vector<GeTensor> &input_tensor, InputData &graph_input_data, | ||||
| @@ -98,7 +98,7 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) { | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| NodePtr cond_out_node = cond_graph->FindNode(NODE_NAME_NET_OUTPUT); | |||||
| NodePtr cond_out_node = cond_graph->FindFirstNodeMatchType(NETOUTPUT); | |||||
| GE_CHECK_NOTNULL(cond_out_node); | GE_CHECK_NOTNULL(cond_out_node); | ||||
| OpDescPtr cond_out_desc = cond_out_node->GetOpDesc(); | OpDescPtr cond_out_desc = cond_out_node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(cond_out_desc); | GE_CHECK_NOTNULL(cond_out_desc); | ||||
| @@ -0,0 +1,90 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "graph/load/new_model_manager/aipp_utils.h" | |||||
| #include <string> | |||||
| #include "common/debug/log.h" | |||||
| #include "common/op/ge_op_utils.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "graph/utils/attr_utils.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| namespace ge { | |||||
| #define AIPP_CONVERT_TO_AIPP_INFO(KEY) aipp_info.KEY = aipp_params->KEY() | |||||
| #define AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(KEY, INDEX) \ | |||||
| do { \ | |||||
| if (aipp_params->KEY##_size() > 0) { \ | |||||
| aipp_info.KEY = aipp_params->KEY(INDEX); \ | |||||
| } \ | |||||
| } while (0) | |||||
| Status AippUtils::ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, AippConfigInfo &aipp_info) { | |||||
| GE_CHECK_NOTNULL(aipp_params); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(input_format); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(src_image_size_w); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(src_image_size_h); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(crop); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(load_start_pos_w); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(load_start_pos_h); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(crop_size_w); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(crop_size_h); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(resize); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(resize_output_w); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(resize_output_h); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(padding); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(left_padding_size); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(right_padding_size); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(top_padding_size); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(bottom_padding_size); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(csc_switch); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(rbuv_swap_switch); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(ax_swap_switch); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(single_line_mode); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r0c0, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r0c1, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r0c2, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r1c0, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r1c1, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r1c2, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r2c0, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r2c1, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r2c2, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(output_bias_0, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(output_bias_1, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(output_bias_2, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(input_bias_0, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(input_bias_1, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(input_bias_2, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(mean_chn_0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(mean_chn_1); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(mean_chn_2); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(mean_chn_3); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(min_chn_0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(min_chn_1); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(min_chn_2); | |||||
| AIPP_CONVERT_TO_AIPP_INFO(min_chn_3); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_0, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_1, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_2, 0); | |||||
| AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_3, 0); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,48 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_AIPP_UTILS_H_ | |||||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_AIPP_UTILS_H_ | |||||
| #include <vector> | |||||
| #include "common/ge_inner_error_codes.h" | |||||
| #include "common/ge_types.h" | |||||
| #include "graph/op_desc.h" | |||||
| #include "proto/insert_op.pb.h" | |||||
| using std::vector; | |||||
| namespace ge { | |||||
| const uint32_t kAippOriginInputIndex = 0; | |||||
| const uint32_t kAippInfoNum = 6; | |||||
| const uint32_t kAippInfoFormat = 0; | |||||
| const uint32_t kAippInfoDataType = 1; | |||||
| const uint32_t kAippInfoTensorName = 2; | |||||
| const uint32_t kAippInfoTensorSize = 3; | |||||
| const uint32_t kAippInfoDimNum = 4; | |||||
| const uint32_t kAippInfoShape = 5; | |||||
| class AippUtils { | |||||
| public: | |||||
| AippUtils() = default; | |||||
| ~AippUtils() = default; | |||||
| static Status ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, AippConfigInfo &aipp_info); | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_AIPP_UTILS_H_ | |||||
| @@ -35,7 +35,6 @@ | |||||
| namespace { | namespace { | ||||
| const uint32_t kAicpuLoadFlag = 1; | const uint32_t kAicpuLoadFlag = 1; | ||||
| const uint32_t kAicpuUnloadFlag = 0; | const uint32_t kAicpuUnloadFlag = 0; | ||||
| const uint32_t kTimeBufferLen = 80; | |||||
| const char *const kDumpOutput = "output"; | const char *const kDumpOutput = "output"; | ||||
| const char *const kDumpInput = "input"; | const char *const kDumpInput = "input"; | ||||
| const char *const kDumpAll = "all"; | const char *const kDumpAll = "all"; | ||||
| @@ -190,18 +189,6 @@ static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uin | |||||
| } | } | ||||
| } | } | ||||
| static std::string GetCurrentTime() { | |||||
| std::time_t now = std::time(nullptr); | |||||
| std::tm *ptm = std::localtime(&now); | |||||
| if (ptm == nullptr) { | |||||
| return ""; | |||||
| } | |||||
| char buffer[kTimeBufferLen] = {0}; | |||||
| // format: 20171122042550 | |||||
| std::strftime(buffer, kTimeBufferLen, "%Y%m%d%H%M%S", ptm); | |||||
| return std::string(buffer); | |||||
| } | |||||
| Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | ||||
| GELOGI("Start dump output"); | GELOGI("Start dump output"); | ||||
| if (inner_dump_info.is_task) { | if (inner_dump_info.is_task) { | ||||
| @@ -384,10 +371,9 @@ Status DataDumper::LoadDumpInfo() { | |||||
| } | } | ||||
| aicpu::dump::OpMappingInfo op_mapping_info; | aicpu::dump::OpMappingInfo op_mapping_info; | ||||
| std::string time_now = GetCurrentTime(); | |||||
| GELOGI("Time is %s now", time_now.c_str()); | |||||
| op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + time_now + "/" + | |||||
| std::to_string(device_id_) + "/"); | |||||
| auto dump_path = PropertiesManager::Instance().GetDumpOutputPath(); | |||||
| op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + std::to_string(device_id_) + "/"); | |||||
| op_mapping_info.set_model_name(model_name_); | op_mapping_info.set_model_name(model_name_); | ||||
| op_mapping_info.set_model_id(model_id_); | op_mapping_info.set_model_id(model_id_); | ||||
| op_mapping_info.set_flag(kAicpuLoadFlag); | op_mapping_info.set_flag(kAicpuLoadFlag); | ||||
| @@ -80,6 +80,7 @@ const uint32_t kOutputNum = 1; | |||||
| const uint32_t kTrueBranchStreamNum = 1; | const uint32_t kTrueBranchStreamNum = 1; | ||||
| const uint32_t kThreadNum = 16; | const uint32_t kThreadNum = 16; | ||||
| const uint32_t kAddrLen = sizeof(void *); | const uint32_t kAddrLen = sizeof(void *); | ||||
| const char *const kNeedDestroySpecifiedAicpuKernel = "need_destroy_specified_aicpu_kernel"; | |||||
| const int kDecimal = 10; | const int kDecimal = 10; | ||||
| const int kBytes = 8; | const int kBytes = 8; | ||||
| const uint32_t kDataMemAlignSizeCompare = 64; | const uint32_t kDataMemAlignSizeCompare = 64; | ||||
| @@ -579,6 +580,14 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
| auto ret = DoTaskSink(); | auto ret = DoTaskSink(); | ||||
| GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink"); | GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink"); | ||||
| /// In zero copy model, if a aicpu operator is connected to the first or last layer, before model execution, | |||||
| /// the aicpu opertor needs to destroy history record, and update operator memory address. | |||||
| /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel(). | |||||
| if (MarkSpecifiedAicpuKernel() != SUCCESS) { | |||||
| GELOGE(FAILED, "Mark model with specified aicpu operators failed."); | |||||
| return FAILED; | |||||
| } | |||||
| // collect profiling for ge | // collect profiling for ge | ||||
| if (ProfilingManager::Instance().ProfilingOn()) { | if (ProfilingManager::Instance().ProfilingOn()) { | ||||
| std::vector<ComputeGraphDescInfo> compute_graph_desc_info; | std::vector<ComputeGraphDescInfo> compute_graph_desc_info; | ||||
| @@ -593,6 +602,82 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Travel all nodes and determine if destruction is required. | |||||
| /// @return bool | |||||
| /// | |||||
| bool DavinciModel::IsAicpuKernelConnectSpecifiedLayer() { | |||||
| Graph graph = ge_model_->GetGraph(); | |||||
| ComputeGraphPtr compute_graph = GraphUtils::GetComputeGraph(graph); | |||||
| auto all_nodes = compute_graph->GetAllNodes(); | |||||
| for (auto &node : all_nodes) { | |||||
| GE_IF_BOOL_EXEC(node == nullptr, continue); | |||||
| OpDescPtr op_desc = node->GetOpDesc(); | |||||
| GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | |||||
| int64_t imply_type = -1; | |||||
| (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, imply_type); | |||||
| if (imply_type != static_cast<int64_t>(domi::ImplyType::AI_CPU)) { | |||||
| continue; | |||||
| } | |||||
| GELOGD("Current operator imply type is %ld, name is %s.", imply_type, op_desc->GetName().c_str()); | |||||
| for (auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue); | |||||
| auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
| GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue); | |||||
| auto peer_node = peer_out_data_anchor->GetOwnerNode(); | |||||
| GE_IF_BOOL_EXEC(peer_node == nullptr, continue); | |||||
| auto peer_op_desc = peer_node->GetOpDesc(); | |||||
| GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue); | |||||
| if (IsDataOp(peer_op_desc->GetType())) { | |||||
| GELOGI("Mark specified aicpu operator connected to data."); | |||||
| return true; | |||||
| } | |||||
| } | |||||
| for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(out_data_anchor == nullptr, continue); | |||||
| auto peer_in_data_anchors = out_data_anchor->GetPeerInDataAnchors(); | |||||
| for (auto &peer_in_data_anchor : peer_in_data_anchors) { | |||||
| GE_IF_BOOL_EXEC(peer_in_data_anchor == nullptr, continue); | |||||
| auto peer_node = peer_in_data_anchor->GetOwnerNode(); | |||||
| GE_IF_BOOL_EXEC(peer_node == nullptr, continue); | |||||
| auto peer_op_desc = peer_node->GetOpDesc(); | |||||
| GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue); | |||||
| if (peer_op_desc->GetType() == NETOUTPUT) { | |||||
| GELOGI("Mark specified aicpu operator connected to netoutput."); | |||||
| return true; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief mark ge model with specified aicpu operators . | |||||
| /// @return Status | |||||
| /// | |||||
| Status DavinciModel::MarkSpecifiedAicpuKernel() { | |||||
| bool result = IsAicpuKernelConnectSpecifiedLayer(); | |||||
| if (!result) { | |||||
| // No aicpu operator needing destroy. | |||||
| GELOGD("No specified aicpu operator that connects to data or netoutput."); | |||||
| return SUCCESS; | |||||
| } | |||||
| bool ret = ge::AttrUtils::SetBool(ge_model_, kNeedDestroySpecifiedAicpuKernel, result); | |||||
| if (!ret) { | |||||
| GELOGW("Add attr[%s] in ge model failed, and may lead to specified aicpu operators destruction failure.", | |||||
| kNeedDestroySpecifiedAicpuKernel); | |||||
| } | |||||
| GELOGI("Mark ge model success, the model has specified aicpu operators, ge model name: %s.", | |||||
| ge_model_->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Travel all nodes and do some init. | /// @brief Travel all nodes and do some init. | ||||
| @@ -1002,8 +1087,6 @@ Status DavinciModel::BindInputQueue() { | |||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief definiteness queue schedule, bind input queue to task. | /// @brief definiteness queue schedule, bind input queue to task. | ||||
| /// @param [in] queue_id: input queue id from user. | /// @param [in] queue_id: input queue id from user. | ||||
| /// @param [in] addr: Data Op output tensor address. | |||||
| /// @param [in] size: Data Op output tensor size. | |||||
| /// @return: 0 for success / others for failed | /// @return: 0 for success / others for failed | ||||
| Status DavinciModel::CpuModelDequeue(uint32_t queue_id) { | Status DavinciModel::CpuModelDequeue(uint32_t queue_id) { | ||||
| GELOGI("Set CpuKernel model dequeue task enter."); | GELOGI("Set CpuKernel model dequeue task enter."); | ||||
| @@ -1266,10 +1349,76 @@ Status DavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batc | |||||
| } | } | ||||
| break; | break; | ||||
| } | } | ||||
| return SUCCESS; | |||||
| } | |||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get AIPP input info | |||||
| /// @param [in] index | |||||
| /// @param [out] aipp_info | |||||
| /// @return execute result | |||||
| /// | |||||
| Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) { | |||||
| GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); | |||||
| OpDescPtr data_op = data_op_list_[index]; | |||||
| if (!data_op->HasAttr(ATTR_NAME_AIPP)) { | |||||
| GELOGE(GE_AIPP_NOT_EXIST, "GetAIPPInfo: there is not AIPP related with index %u.", index); | |||||
| return GE_AIPP_NOT_EXIST; | |||||
| } | |||||
| std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams()); | |||||
| GE_CHECK_NOTNULL(aipp_params); | |||||
| ge::GeAttrValue::NAMED_ATTRS aipp_attr; | |||||
| GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, | |||||
| "Data node do not contain param aipp!"); | |||||
| GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); | |||||
| GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u", | |||||
| data_op->GetName().c_str(), data_op->GetType().c_str(), index, aipp_params->related_input_rank()); | |||||
| if (aipp_params->aipp_mode() == domi::AippOpParams::dynamic) { | |||||
| GELOGI("GetAIPPInfo, dynamic Aipp is not support to query temporarily."); | |||||
| return GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY; | |||||
| } | |||||
| GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(aipp_params.get(), aipp_info), | |||||
| "convert aipp params to aipp config info failed"); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void DavinciModel::SetDynamicSize(const std::vector<uint64_t> &batch_num) { | |||||
| batch_size_.clear(); | |||||
| if (batch_num.empty()) { | |||||
| GELOGD("User has not set dynammic data"); | |||||
| } | |||||
| for (size_t i = 0; i < batch_num.size(); i++) { | |||||
| batch_size_.emplace_back(batch_num[i]); | |||||
| } | |||||
| } | |||||
| void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info) { | |||||
| if (batch_size_.empty()) { | |||||
| GELOGD("User does not set dynamic size"); | |||||
| } | |||||
| for (size_t i = 0; i < batch_size_.size(); i++) { | |||||
| GELOGI("Start to get current shape"); | |||||
| batch_info.emplace_back(batch_size_[i]); | |||||
| } | |||||
| } | |||||
| void DavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) { | |||||
| for (auto &op : output_op_list_) { | |||||
| if (op->GetType() != NETOUTPUT) { | |||||
| continue; | |||||
| } | |||||
| GELOGI("Start to get dynamic output dims attr"); | |||||
| if (!AttrUtils::GetListStr(op, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { | |||||
| GELOGD("Can not get dynamic output dims attr"); | |||||
| } | |||||
| } | |||||
| } | |||||
| Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc, | Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc, | ||||
| vector<InputOutputDescInfo> &output_desc, | vector<InputOutputDescInfo> &output_desc, | ||||
| std::vector<uint32_t> &input_formats, | std::vector<uint32_t> &input_formats, | ||||
| @@ -1299,7 +1448,7 @@ Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInf | |||||
| } | } | ||||
| Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) { | Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) { | ||||
| for (std::size_t index = 0; index < data_op_list_.size(); ++index) { | |||||
| for (size_t index = 0; index < data_op_list_.size(); ++index) { | |||||
| InputOutputDescInfo input; | InputOutputDescInfo input; | ||||
| GE_CHECK_NOTNULL(data_op_list_[index]); | GE_CHECK_NOTNULL(data_op_list_[index]); | ||||
| GE_CHECK_NOTNULL(data_op_list_[index]->GetInputDescPtr(0)); | GE_CHECK_NOTNULL(data_op_list_[index]->GetInputDescPtr(0)); | ||||
| @@ -1495,7 +1644,14 @@ Status DavinciModel::SinkModelProfile() { | |||||
| // Model Header | // Model Header | ||||
| string name = this->Name(); | string name = this->Name(); | ||||
| int32_t name_len = name.size(); | int32_t name_len = name.size(); | ||||
| reporter_data.deviceId = device_id_; | |||||
| // phy device id | |||||
| uint32_t phy_device_id = 0; | |||||
| rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); | |||||
| return FAILED; | |||||
| } | |||||
| reporter_data.deviceId = phy_device_id; | |||||
| reporter_data.data = (unsigned char *)&name_len; | reporter_data.data = (unsigned char *)&name_len; | ||||
| reporter_data.dataLen = sizeof(int32_t); | reporter_data.dataLen = sizeof(int32_t); | ||||
| GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", | GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", | ||||
| @@ -1671,7 +1827,13 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | |||||
| GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | ||||
| return FAILED, "Sink model tag memcpy error."); | return FAILED, "Sink model tag memcpy error."); | ||||
| // device id | // device id | ||||
| reporter_data.deviceId = device_id_; | |||||
| uint32_t phy_device_id = 0; | |||||
| rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); | |||||
| return FAILED; | |||||
| } | |||||
| reporter_data.deviceId = phy_device_id; | |||||
| // Model Header | // Model Header | ||||
| string name = this->Name(); | string name = this->Name(); | ||||
| @@ -2637,8 +2799,10 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 | |||||
| } | } | ||||
| if (input_size > op_size) { | if (input_size > op_size) { | ||||
| GELOGE(FAILED, "Input size [%u] can not be bigger than op size [%u]", input_size, op_size); | |||||
| return false; | |||||
| GELOGW( | |||||
| "Input size [%u] is bigger than om size need [%u]," | |||||
| "MAY cause inference result ERROR, please check model input", | |||||
| input_size, op_size); | |||||
| } | } | ||||
| bool is_dynamic_aipp = false; | bool is_dynamic_aipp = false; | ||||
| for (const auto &op_desc : data_op_list_) { | for (const auto &op_desc : data_op_list_) { | ||||
| @@ -2707,14 +2871,18 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp | |||||
| /// | /// | ||||
| Status DavinciModel::UpdateIoTaskArgs(const map<uint32_t, pair<int64_t, void *>> &data_info, bool is_input, | Status DavinciModel::UpdateIoTaskArgs(const map<uint32_t, pair<int64_t, void *>> &data_info, bool is_input, | ||||
| const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label) { | const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label) { | ||||
| string input_or_output = "input"; | |||||
| is_input ? input_or_output = "input" : input_or_output = "output"; | |||||
| if (blobs.size() != data_info.size()) { | if (blobs.size() != data_info.size()) { | ||||
| GELOGE(FAILED, "Blobs not match: blobs=%zu datas=%zu", blobs.size(), data_info.size()); | |||||
| GELOGE(FAILED, "Verify %s data num failed: model requires %zu, but user actually feeds %zu", | |||||
| input_or_output.c_str(), data_info.size(), blobs.size()); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| for (const auto &data : data_info) { | for (const auto &data : data_info) { | ||||
| if (data.first >= blobs.size()) { // check data index. | if (data.first >= blobs.size()) { // check data index. | ||||
| GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u", blobs.size(), data_info.size(), data.first); | |||||
| GELOGE(FAILED, "Verify %s data num failed: can not find No.%zu data, because user only feeds %zu", | |||||
| input_or_output.c_str(), data.first, blobs.size()); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| int64_t size = data.second.first; // size of tensor. | int64_t size = data.second.first; // size of tensor. | ||||
| @@ -3262,7 +3430,7 @@ void DavinciModel::PushHcclStream(rtStream_t value) { | |||||
| void DavinciModel::CreateHcclFollowStream(rtStream_t stream, int64_t remain_cap) { | void DavinciModel::CreateHcclFollowStream(rtStream_t stream, int64_t remain_cap) { | ||||
| std::lock_guard<std::mutex> lock(capacity_of_stream_mutex_); | std::lock_guard<std::mutex> lock(capacity_of_stream_mutex_); | ||||
| capacity_of_stream_.emplace_back(make_pair(stream, remain_cap)); | capacity_of_stream_.emplace_back(make_pair(stream, remain_cap)); | ||||
| }; | |||||
| } | |||||
| void DavinciModel::ReuseHcclFollowStream(int64_t remain_cap, int64_t &index) { | void DavinciModel::ReuseHcclFollowStream(int64_t remain_cap, int64_t &index) { | ||||
| std::lock_guard<std::mutex> lock(capacity_of_stream_mutex_); | std::lock_guard<std::mutex> lock(capacity_of_stream_mutex_); | ||||
| @@ -3320,4 +3488,91 @@ Status DavinciModel::GetComputeGraphInfo(std::vector<ComputeGraphDescInfo> &comp | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) { | |||||
| GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); | |||||
| OpDescPtr data_op = data_op_list_[index]; | |||||
| if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { | |||||
| GELOGE(GE_AIPP_NOT_EXIST, "GetOrigInputInfo: there is not AIPP related with index %u.", index); | |||||
| return GE_AIPP_NOT_EXIST; | |||||
| } | |||||
| vector<std::string> inputs; | |||||
| if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { | |||||
| std::string input = inputs[kAippOriginInputIndex]; | |||||
| GELOGI("GetOrigInputInfo: origin input str: %s", input.c_str()); | |||||
| std::vector<std::string> infos = ge::StringUtils::Split(input, ':'); | |||||
| if (infos.size() != kAippInfoNum) { | |||||
| GELOGW("origin input str is invalid."); | |||||
| } | |||||
| orig_input_info.format = TypeUtils::SerialStringToFormat(infos[kAippInfoFormat]); | |||||
| orig_input_info.data_type = TypeUtils::SerialStringToDataType(infos[kAippInfoDataType]); | |||||
| orig_input_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info) { | |||||
| GELOGI("ParseAIPPInfo: origin str: %s", in_out_info.c_str()); | |||||
| std::vector<std::string> infos = ge::StringUtils::Split(in_out_info, ':'); | |||||
| if (infos.size() != kAippInfoNum) { | |||||
| GELOGW("origin input str is invalid."); | |||||
| } | |||||
| dims_info.name = infos[kAippInfoTensorName]; | |||||
| dims_info.size = std::strtol(infos[kAippInfoTensorSize].c_str(), nullptr, kDecimal); | |||||
| dims_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal); | |||||
| std::vector<std::string> dims = ge::StringUtils::Split(infos[kAippInfoShape], ','); | |||||
| for (const auto &dim : dims) { | |||||
| if (dim.empty()) { | |||||
| continue; | |||||
| } | |||||
| dims_info.dims.emplace_back(std::strtol(dim.c_str(), nullptr, kDecimal)); | |||||
| } | |||||
| } | |||||
| Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims, | |||||
| std::vector<InputOutputDims> &output_dims) { | |||||
| GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); | |||||
| OpDescPtr data_op = data_op_list_[index]; | |||||
| if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { | |||||
| GELOGE(GE_AIPP_NOT_EXIST, "GetAllAippInputOutputDims: there is not AIPP related with index %u.", index); | |||||
| return GE_AIPP_NOT_EXIST; | |||||
| } | |||||
| vector<std::string> inputs; | |||||
| if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { | |||||
| GELOGI("GetAllAippInputOutputDims: Data: %s has %u related aippInfo.", data_op->GetName().c_str(), inputs.size()); | |||||
| for (auto it : inputs) { | |||||
| InputOutputDims input_info; | |||||
| ParseAIPPInfo(it, input_info); | |||||
| input_dims.emplace_back(input_info); | |||||
| GELOGD("GetAllAippInputOutputDims Aipp origin input dims info: %s", it.c_str()); | |||||
| ConstGeTensorDescPtr data_input_desc = data_op->GetInputDescPtr(kDataIndex); | |||||
| int64_t data_input_size; | |||||
| (void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size); | |||||
| GELOGD( | |||||
| "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: %s, " | |||||
| "data_type: %s, shape: %s .", | |||||
| index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | |||||
| TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | |||||
| TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), | |||||
| formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); | |||||
| } | |||||
| } | |||||
| vector<std::string> outputs; | |||||
| if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_OUTPUTS, outputs) && !outputs.empty()) { | |||||
| for (auto it : outputs) { | |||||
| InputOutputDims output_info; | |||||
| ParseAIPPInfo(it, output_info); | |||||
| output_dims.emplace_back(output_info); | |||||
| GELOGD("GetAllAippInputOutputDims Aipp output dims info: %s", it.c_str()); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -34,6 +34,7 @@ | |||||
| #include "graph/load/new_model_manager/data_dumper.h" | #include "graph/load/new_model_manager/data_dumper.h" | ||||
| #include "graph/load/new_model_manager/data_inputer.h" | #include "graph/load/new_model_manager/data_inputer.h" | ||||
| #include "graph/load/new_model_manager/model_utils.h" | #include "graph/load/new_model_manager/model_utils.h" | ||||
| #include "graph/load/new_model_manager/aipp_utils.h" | |||||
| #include "graph/load/new_model_manager/zero_copy_task.h" | #include "graph/load/new_model_manager/zero_copy_task.h" | ||||
| #include "graph/model.h" | #include "graph/model.h" | ||||
| #include "graph/node.h" | #include "graph/node.h" | ||||
| @@ -294,6 +295,19 @@ class DavinciModel { | |||||
| /// | /// | ||||
| Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info); | Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info); | ||||
| void GetCurShape(std::vector<int64_t> &batch_info); | |||||
| void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info); | |||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get AIPP input info | |||||
| /// @param [in] index | |||||
| /// @param [out] aipp_info | |||||
| /// @return execute result | |||||
| /// | |||||
| Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Get model_id. | /// @brief Get model_id. | ||||
| @@ -407,6 +421,8 @@ class DavinciModel { | |||||
| void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info, void *args, | void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info, void *args, | ||||
| size_t size, size_t offset); | size_t size, size_t offset); | ||||
| void SetDynamicSize(const std::vector<uint64_t> &batch_num); | |||||
| bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; } | bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; } | ||||
| void SetProfileTime(ModelProcStage stage, int64_t endTime = 0); | void SetProfileTime(ModelProcStage stage, int64_t endTime = 0); | ||||
| @@ -452,6 +468,10 @@ class DavinciModel { | |||||
| Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
| Status UpdateKnownZeroCopyAddr(vector<void *> &io_addrs, uint32_t args_offset); | Status UpdateKnownZeroCopyAddr(vector<void *> &io_addrs, uint32_t args_offset); | ||||
| Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | |||||
| Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims, | |||||
| std::vector<InputOutputDims> &output_dims); | |||||
| private: | private: | ||||
| // memory address of weights | // memory address of weights | ||||
| uint8_t *weights_mem_base_; | uint8_t *weights_mem_base_; | ||||
| @@ -560,6 +580,10 @@ class DavinciModel { | |||||
| void UnbindTaskSinkStream(); | void UnbindTaskSinkStream(); | ||||
| bool IsAicpuKernelConnectSpecifiedLayer(); | |||||
| Status MarkSpecifiedAicpuKernel(); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Travel all nodes and do some init. | /// @brief Travel all nodes and do some init. | ||||
| @@ -741,6 +765,8 @@ class DavinciModel { | |||||
| Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, | Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, | ||||
| std::vector<ge::OutputTensorInfo> &outputs); | std::vector<ge::OutputTensorInfo> &outputs); | ||||
| void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); | |||||
| bool is_model_has_inited_; | bool is_model_has_inited_; | ||||
| uint32_t model_id_; | uint32_t model_id_; | ||||
| uint32_t runtime_model_id_; | uint32_t runtime_model_id_; | ||||
| @@ -856,6 +882,8 @@ class DavinciModel { | |||||
| void *args_host_ = nullptr; | void *args_host_ = nullptr; | ||||
| std::map<const void *, void *> knonw_input_data_info_; | std::map<const void *, void *> knonw_input_data_info_; | ||||
| std::map<const void *, void *> knonw_output_data_info_; | std::map<const void *, void *> knonw_output_data_info_; | ||||
| vector<uint64_t> batch_size_; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ | #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ | ||||
| @@ -22,6 +22,8 @@ | |||||
| #include "common/profiling/profiling_manager.h" | #include "common/profiling/profiling_manager.h" | ||||
| #include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "framework/common/util.h" | |||||
| #include "graph/load/new_model_manager/davinci_model.h" | #include "graph/load/new_model_manager/davinci_model.h" | ||||
| #include "graph/load/new_model_manager/davinci_model_parser.h" | #include "graph/load/new_model_manager/davinci_model_parser.h" | ||||
| #include "model/ge_root_model.h" | #include "model/ge_root_model.h" | ||||
| @@ -31,6 +33,7 @@ thread_local uint32_t device_count = 0; | |||||
| namespace { | namespace { | ||||
| const int kCmdParSize = 2; | const int kCmdParSize = 2; | ||||
| const int kDumpCmdPairSize = 2; | const int kDumpCmdPairSize = 2; | ||||
| const char *const kNeedDestroySpecifiedAicpuKernel = "need_destroy_specified_aicpu_kernel"; | |||||
| } // namespace | } // namespace | ||||
| std::shared_ptr<ModelManager> ModelManager::GetInstance() { | std::shared_ptr<ModelManager> ModelManager::GetInstance() { | ||||
| @@ -39,7 +42,10 @@ std::shared_ptr<ModelManager> ModelManager::GetInstance() { | |||||
| return instance_ptr; | return instance_ptr; | ||||
| } | } | ||||
| ModelManager::ModelManager() { max_model_id_ = 0; } | |||||
| ModelManager::ModelManager() { | |||||
| max_model_id_ = 0; | |||||
| session_id_bias_ = 0; | |||||
| } | |||||
| Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id) { | Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id) { | ||||
| STR_FWK_OP_KERNEL param_base = {}; | STR_FWK_OP_KERNEL param_base = {}; | ||||
| @@ -69,6 +75,8 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u | |||||
| GE_CHK_RT(rtFree(aicpu_kernel_addr)); return FAILED;) | GE_CHK_RT(rtFree(aicpu_kernel_addr)); return FAILED;) | ||||
| uint64_t kernel_id_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(aicpu_kernel_addr)); | uint64_t kernel_id_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(aicpu_kernel_addr)); | ||||
| param_base.fwkKernelBase.fwk_kernel.kernelID = kernel_id_addr; | param_base.fwkKernelBase.fwk_kernel.kernelID = kernel_id_addr; | ||||
| // In the scene of loading once and running many times, the kernel needs to be destroyed many times, | |||||
| // and connot be removed from kernel map. | |||||
| } | } | ||||
| } | } | ||||
| @@ -213,6 +221,13 @@ Status ModelManager::SetDevice(int32_t deviceId) const { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num) { | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||||
| GE_CHECK_NOTNULL(davinci_model); | |||||
| davinci_model->SetDynamicSize(batch_num); | |||||
| return SUCCESS; | |||||
| } | |||||
| ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const shared_ptr<ge::GeRootModel> &ge_root_model, | ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const shared_ptr<ge::GeRootModel> &ge_root_model, | ||||
| const shared_ptr<ModelListener> &listener) { | const shared_ptr<ModelListener> &listener) { | ||||
| auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model); | auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model); | ||||
| @@ -616,7 +631,7 @@ Status ModelManager::HandleDumpCommand(const Command &command) { | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') { | if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') { | ||||
| dump_path += "/"; | |||||
| dump_path = dump_path + "/" + CurrentTimeInStr() + "/"; | |||||
| } | } | ||||
| GELOGI("dump status = %s.", dump_path.c_str()); | GELOGI("dump status = %s.", dump_path.c_str()); | ||||
| @@ -647,7 +662,6 @@ Status ModelManager::HandleDumpCommand(const Command &command) { | |||||
| Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size) { | Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size) { | ||||
| auto hybrid_model = GetHybridModel(model_id); | auto hybrid_model = GetHybridModel(model_id); | ||||
| if (hybrid_model != nullptr) { | if (hybrid_model != nullptr) { | ||||
| // TODO hybrid use dynamic memory allocation | |||||
| max_size = 0; | max_size = 0; | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -694,6 +708,20 @@ Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<st | |||||
| return davinci_model->GetDynamicBatchInfo(batch_info); | return davinci_model->GetDynamicBatchInfo(batch_info); | ||||
| } | } | ||||
| Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) { | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||||
| GE_CHECK_NOTNULL(davinci_model); | |||||
| davinci_model->GetCurShape(batch_info); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) { | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||||
| GE_CHECK_NOTNULL(davinci_model); | |||||
| davinci_model->GetModelAttr(dynamic_output_shape_info); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | ||||
| vector<InputOutputDescInfo> &output_desc, | vector<InputOutputDescInfo> &output_desc, | ||||
| std::vector<uint32_t> &inputFormats, | std::vector<uint32_t> &inputFormats, | ||||
| @@ -705,6 +733,52 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, | |||||
| return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); | return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); | ||||
| } | } | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get AIPP info | |||||
| /// @param [in] model_id | |||||
| /// @param [in] index | |||||
| /// @param [out] aipp_info | |||||
| /// @return execute result | |||||
| /// | |||||
| Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetAIPPInfo failed, invalid model_id is %u.", | |||||
| model_id); | |||||
| return davinci_model->GetAIPPInfo(index, aipp_info); | |||||
| } | |||||
| Status ModelManager::GenSessionId(uint64_t &session_id) { | |||||
| std::lock_guard<std::mutex> lock(session_id_create_mutex_); | |||||
| struct timeval tv; | |||||
| if (gettimeofday(&tv, nullptr) != 0) { | |||||
| GELOGE(INTERNAL_ERROR, "Failed to get current time."); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| session_id = static_cast<uint64_t>(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us | |||||
| session_id_bias_++; | |||||
| // max bais 100. | |||||
| session_id_bias_ = session_id_bias_ % 100; | |||||
| session_id = session_id * 100 + session_id_bias_; | |||||
| GELOGD("Generate new session id: %lu.", session_id); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ModelManager::UpdateSessionId(std::shared_ptr<DavinciModel> &davinci_model, uint64_t session_id) { | |||||
| GeModelPtr ge_model_current = davinci_model->GetGeModel(); | |||||
| GE_CHECK_NOTNULL(ge_model_current); | |||||
| if (!ge::AttrUtils::SetInt(ge_model_current, ge::MODEL_ATTR_SESSION_ID, static_cast<int64_t>(session_id))) { | |||||
| GELOGW("Set attr[%s] failed in updating session_id.", MODEL_ATTR_SESSION_ID.c_str()); | |||||
| } | |||||
| GELOGD("Update session id: %lu.", session_id); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener, | Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener, | ||||
| void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | ||||
| GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, PARAM_INVALID, | GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, PARAM_INVALID, | ||||
| @@ -747,6 +821,15 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||||
| } | } | ||||
| davinci_model->SetDeviceId(device_id); | davinci_model->SetDeviceId(device_id); | ||||
| /// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. | |||||
| /// These session_ids come from the same model, so the values of session_id are the same. | |||||
| /// Update session_id for infer in load model to avoid the same session_id. | |||||
| uint64_t new_session_id; | |||||
| ret = GenSessionId(new_session_id); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Generate session_id for infer failed."); | |||||
| ret = UpdateSessionId(davinci_model, new_session_id); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Update session_id for infer failed."); | |||||
| ret = davinci_model->Init(dev_ptr, mem_size, weight_ptr, weight_size); | ret = davinci_model->Init(dev_ptr, mem_size, weight_ptr, weight_size); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "DavinciInit failed."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "DavinciInit failed."); | ||||
| @@ -805,9 +888,17 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| /// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. | |||||
| /// These session_ids come from the same model, so the values of session_id are the same. | |||||
| /// Update session_id for infer in load model to avoid the same session_id. | |||||
| uint64_t new_session_id; | |||||
| ret = GenSessionId(new_session_id); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed."); | |||||
| ret = UpdateSessionId(davinci_model, new_session_id); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed."); | |||||
| GenModelId(&model_id); | GenModelId(&model_id); | ||||
| davinci_model->SetId(model_id); | davinci_model->SetId(model_id); | ||||
| davinci_model->SetSessionId(model_id); | |||||
| ret = davinci_model->SetQueIds(input_queue_ids, output_queue_ids); | ret = davinci_model->SetQueIds(input_queue_ids, output_queue_ids); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "set model queue ids failed."); | GELOGE(ret, "set model queue ids failed."); | ||||
| @@ -840,6 +931,22 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id); | GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id); | ||||
| GeModelPtr ge_model_current = davinci_model->GetGeModel(); | |||||
| bool need_destroy_aicpu_kernel = false; | |||||
| bool result = ge::AttrUtils::GetBool(ge_model_current, kNeedDestroySpecifiedAicpuKernel, need_destroy_aicpu_kernel); | |||||
| if (result && need_destroy_aicpu_kernel) { | |||||
| GELOGI("Get attr %s successfully, start to destroy specified aicpu kernel.", kNeedDestroySpecifiedAicpuKernel); | |||||
| // Zero copy is enabled by default, no need to judge. | |||||
| uint64_t session_id_davinci = davinci_model->GetSessionId(); | |||||
| uint32_t model_id_davinci = davinci_model->GetModelId(); | |||||
| Status status = DestroyAicpuKernel(session_id_davinci, model_id_davinci); | |||||
| if (status != SUCCESS) { | |||||
| GELOGW("Destroy specified aicpu kernel failed, session id is %lu, model id is %u.", session_id_davinci, | |||||
| model_id_davinci); | |||||
| } | |||||
| } | |||||
| Status status = davinci_model->NnExecute(stream, async_mode, input_data, output_data); | Status status = davinci_model->NnExecute(stream, async_mode, input_data, output_data); | ||||
| if (status == SUCCESS) { | if (status == SUCCESS) { | ||||
| GELOGI("Execute model %u success.", model_id); | GELOGI("Execute model %u success.", model_id); | ||||
| @@ -920,4 +1027,23 @@ void ModelManager::GenModelId(uint32_t *id) { | |||||
| std::lock_guard<std::mutex> lock(map_mutex_); | std::lock_guard<std::mutex> lock(map_mutex_); | ||||
| *id = ++max_model_id_; | *id = ++max_model_id_; | ||||
| } | } | ||||
| Status ModelManager::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) { | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetOrigInputInfo failed, invalid model_id is %u.", | |||||
| model_id); | |||||
| return davinci_model->GetOrigInputInfo(index, orig_input_info); | |||||
| } | |||||
| Status ModelManager::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, | |||||
| std::vector<InputOutputDims> &input_dims, | |||||
| std::vector<InputOutputDims> &output_dims) { | |||||
| std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||||
| GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, | |||||
| "GetAllAippInputOutputDims failed, invalid model_id is %u.", model_id); | |||||
| return davinci_model->GetAllAippInputOutputDims(index, input_dims, output_dims); | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -17,6 +17,7 @@ | |||||
| #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_MODEL_MANAGER_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_MODEL_MANAGER_H_ | ||||
| #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_MODEL_MANAGER_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_MODEL_MANAGER_H_ | ||||
| #include <model/ge_root_model.h> | |||||
| #include <pthread.h> | #include <pthread.h> | ||||
| #include <stdint.h> | #include <stdint.h> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| @@ -25,7 +26,6 @@ | |||||
| #include <set> | #include <set> | ||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||
| #include <model/ge_root_model.h> | |||||
| #include "cce/aicpu_engine_struct.h" | #include "cce/aicpu_engine_struct.h" | ||||
| #include "common/ge_inner_error_codes.h" | #include "common/ge_inner_error_codes.h" | ||||
| #include "common/ge_types.h" | #include "common/ge_types.h" | ||||
| @@ -188,6 +188,16 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| /// | /// | ||||
| ge::Status GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | ge::Status GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | ||||
| /// | |||||
| /// @ingroup ge | |||||
| /// @brief Get AIPP info | |||||
| /// @param [in] model_id | |||||
| /// @param [in] index | |||||
| /// @param [out] aipp_info | |||||
| /// @return execute result | |||||
| /// | |||||
| ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | |||||
| /// | /// | ||||
| /// @ingroup domi_ome | /// @ingroup domi_ome | ||||
| /// @brief set model input and output size zero copy | /// @brief set model input and output size zero copy | ||||
| @@ -202,8 +212,14 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| std::vector<uint32_t> &inputFormats, | std::vector<uint32_t> &inputFormats, | ||||
| std::vector<uint32_t> &outputFormats); | std::vector<uint32_t> &outputFormats); | ||||
| ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info); | |||||
| ge::Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info); | |||||
| ge::Status SetDevice(int32_t deviceId) const; | ge::Status SetDevice(int32_t deviceId) const; | ||||
| ge::Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num); | |||||
| /// | /// | ||||
| /// @ingroup domi_ome | /// @ingroup domi_ome | ||||
| /// @brief Get model according to given id | /// @brief Get model according to given id | ||||
| @@ -226,6 +242,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); | ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); | ||||
| ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); | |||||
| ge::Status GenSessionId(uint64_t &session_id); | |||||
| ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims, | |||||
| std::vector<InputOutputDims> &output_dims); | |||||
| private: | private: | ||||
| /// | /// | ||||
| /// @ingroup domi_ome | /// @ingroup domi_ome | ||||
| @@ -253,6 +276,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| ge::Status DeleteModel(uint32_t id); | ge::Status DeleteModel(uint32_t id); | ||||
| void GenModelId(uint32_t *id); | void GenModelId(uint32_t *id); | ||||
| ge::Status UpdateSessionId(std::shared_ptr<DavinciModel> &davinci_model, uint64_t session_id); | |||||
| std::map<uint32_t, std::shared_ptr<DavinciModel>> model_map_; | std::map<uint32_t, std::shared_ptr<DavinciModel>> model_map_; | ||||
| std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_; | std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_; | ||||
| @@ -260,6 +284,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| uint32_t max_model_id_; | uint32_t max_model_id_; | ||||
| std::mutex map_mutex_; | std::mutex map_mutex_; | ||||
| std::mutex sess_ids_mutex_; | std::mutex sess_ids_mutex_; | ||||
| std::mutex session_id_create_mutex_; | |||||
| uint64_t session_id_bias_; | |||||
| std::set<uint64_t> sess_ids_; | std::set<uint64_t> sess_ids_; | ||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -177,6 +177,7 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode | |||||
| rt_ret = rtModelBindStream(davinci_model->GetRtModelHandle(), stream, RT_MODEL_WAIT_ACTIVE_STREAM); | rt_ret = rtModelBindStream(davinci_model->GetRtModelHandle(), stream, RT_MODEL_WAIT_ACTIVE_STREAM); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
| (void)rtStreamDestroy(stream); | |||||
| return RT_FAILED; | return RT_FAILED; | ||||
| } | } | ||||
| GELOGD("hccl_stream addr is=%p", stream); | GELOGD("hccl_stream addr is=%p", stream); | ||||
| @@ -67,6 +67,18 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| const auto &ext_info = kernel_ex_def.kernel_ext_info(); | |||||
| if (!ext_info.empty()) { | |||||
| auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||||
| GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||||
| return FAILED;) | |||||
| rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||||
| GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||||
| return FAILED;) | |||||
| } | |||||
| // 2.1 get loop cond variable for tensor array write | // 2.1 get loop cond variable for tensor array write | ||||
| uint64_t step_id_addr = 0; | uint64_t step_id_addr = 0; | ||||
| OpDescPtr step_id_node = davinci_model_->GetVariableOp(NODE_NAME_GLOBAL_STEP); | OpDescPtr step_id_node = davinci_model_->GetVariableOp(NODE_NAME_GLOBAL_STEP); | ||||
| @@ -77,7 +89,9 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
| } | } | ||||
| } | } | ||||
| auto session_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID; | |||||
| auto session_id = davinci_model_->GetSessionId(); | |||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID = session_id; | |||||
| // 2.2 Collect aicpu kernel | // 2.2 Collect aicpu kernel | ||||
| uint64_t kernel_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.kernelID; | uint64_t kernel_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.kernelID; | ||||
| GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(), kernel_id) != SUCCESS, | GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(), kernel_id) != SUCCESS, | ||||
| @@ -97,8 +111,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = | fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = | ||||
| static_cast<uint64_t>(reinterpret_cast<uintptr_t>(workspace_base_addr)); | static_cast<uint64_t>(reinterpret_cast<uintptr_t>(workspace_base_addr)); | ||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = step_id_addr; | fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = step_id_addr; | ||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoNum = 0; | |||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = 0; | |||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = ext_info.size(); | |||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_addr_); | |||||
| rt_ret = rtMalloc(&kernel_buf_, kernel_buf_size_, RT_MEMORY_HBM); | rt_ret = rtMalloc(&kernel_buf_, kernel_buf_size_, RT_MEMORY_HBM); | ||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;) | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;) | ||||
| @@ -149,8 +163,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = workspace_base_addr; | fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = workspace_base_addr; | ||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = input_output_addr; | fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = input_output_addr; | ||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = step_id_addr; | fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = step_id_addr; | ||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoNum = 0; | |||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = 0; | |||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = ext_info.size(); | |||||
| fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_addr_); | |||||
| // 4. Create session | // 4. Create session | ||||
| GE_CHECK_NOTNULL(ModelManager::GetInstance()); | GE_CHECK_NOTNULL(ModelManager::GetInstance()); | ||||
| @@ -291,6 +305,15 @@ Status KernelExTaskInfo::Release() { | |||||
| input_output_addr_ = nullptr; | input_output_addr_ = nullptr; | ||||
| } | } | ||||
| } | } | ||||
| if (ext_info_addr_ != nullptr) { | |||||
| rtError_t rt_ret = rtFree(ext_info_addr_); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGW("rtFree ext_info_addr[%p] error, ret: 0x%X", ext_info_addr_, rt_ret); | |||||
| ret = FAILED; | |||||
| } else { | |||||
| ext_info_addr_ = nullptr; | |||||
| } | |||||
| } | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -31,6 +31,7 @@ class KernelExTaskInfo : public TaskInfo { | |||||
| davinci_model_(nullptr), | davinci_model_(nullptr), | ||||
| kernel_buf_(nullptr), | kernel_buf_(nullptr), | ||||
| input_output_addr_(nullptr), | input_output_addr_(nullptr), | ||||
| ext_info_addr_(nullptr), | |||||
| dump_args_(nullptr) {} | dump_args_(nullptr) {} | ||||
| ~KernelExTaskInfo() override {} | ~KernelExTaskInfo() override {} | ||||
| @@ -64,6 +65,7 @@ class KernelExTaskInfo : public TaskInfo { | |||||
| DavinciModel *davinci_model_; | DavinciModel *davinci_model_; | ||||
| void *kernel_buf_; | void *kernel_buf_; | ||||
| void *input_output_addr_; | void *input_output_addr_; | ||||
| void *ext_info_addr_; | |||||
| void *dump_args_; | void *dump_args_; | ||||
| OpDescPtr op_desc_ = nullptr; | OpDescPtr op_desc_ = nullptr; | ||||
| uint32_t args_offset_ = 0; | uint32_t args_offset_ = 0; | ||||
| @@ -414,6 +414,7 @@ Status KernelTaskInfo::Release() { | |||||
| FreeRtMem(&custom_info_.output_descs); | FreeRtMem(&custom_info_.output_descs); | ||||
| FreeRtMem(&custom_info_.output_addrs); | FreeRtMem(&custom_info_.output_addrs); | ||||
| FreeRtMem(&custom_info_.attr_handle); | FreeRtMem(&custom_info_.attr_handle); | ||||
| FreeRtMem(&aicpu_ext_info_addr_); | |||||
| if (ctx_.argsOffset != nullptr) { | if (ctx_.argsOffset != nullptr) { | ||||
| delete[] ctx_.argsOffset; | delete[] ctx_.argsOffset; | ||||
| @@ -792,6 +793,16 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
| } | } | ||||
| } | } | ||||
| auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get()); | |||||
| const auto &ext_info = kernel_def.kernel_ext_info(); | |||||
| auto init_ret = InitAicpuTaskExtInfo(ext_info); | |||||
| if (init_ret != SUCCESS) { | |||||
| GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); | |||||
| return init_ret; | |||||
| } | |||||
| aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | |||||
| aicpu_param_head->extInfoLength = reinterpret_cast<uintptr_t>(ext_info.size()); | |||||
| // malloc device memory for args | // malloc device memory for args | ||||
| rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| @@ -823,6 +834,24 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { | |||||
| if (ext_info.empty()) { | |||||
| return SUCCESS; | |||||
| } | |||||
| auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||||
| return FAILED; | |||||
| } | |||||
| rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||||
| return FAILED; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_data_addrs, | Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_data_addrs, | ||||
| const std::vector<void *> &output_data_addrs, | const std::vector<void *> &output_data_addrs, | ||||
| const std::vector<::tagCcAICPUTensor> &input_descs, | const std::vector<::tagCcAICPUTensor> &input_descs, | ||||
| @@ -100,6 +100,8 @@ class KernelTaskInfo : public TaskInfo { | |||||
| Status InitAicpuTask(uint32_t op_index, const domi::KernelDef &kernel_def); | Status InitAicpuTask(uint32_t op_index, const domi::KernelDef &kernel_def); | ||||
| Status InitAicpuTaskExtInfo(const std::string &ext_info); | |||||
| Status StoreInputOutputTensor(const std::vector<void *> &input_data_addrs, | Status StoreInputOutputTensor(const std::vector<void *> &input_data_addrs, | ||||
| const std::vector<void *> &output_data_addrs, | const std::vector<void *> &output_data_addrs, | ||||
| const std::vector<::tagCcAICPUTensor> &input_descs, | const std::vector<::tagCcAICPUTensor> &input_descs, | ||||
| @@ -152,6 +154,9 @@ class KernelTaskInfo : public TaskInfo { | |||||
| DavinciModel *davinci_model_; | DavinciModel *davinci_model_; | ||||
| uint32_t args_offset_ = 0; | uint32_t args_offset_ = 0; | ||||
| // aicpu ext_info device mem | |||||
| void *aicpu_ext_info_addr_ = nullptr; | |||||
| // For super kernel | // For super kernel | ||||
| uint32_t skt_id_; | uint32_t skt_id_; | ||||
| std::string stub_func_name_; | std::string stub_func_name_; | ||||
| @@ -133,10 +133,11 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;) | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;) | ||||
| rt_ret = | rt_ret = | ||||
| rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); | rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); | ||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); return FAILED;) | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); | |||||
| GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;) | |||||
| rt_ret = rtKernelConfigTransArg(hbm_nav_table_addr, sizeof(uint64_t), 0, &hbm_nav_table_addr_pys); | rt_ret = rtKernelConfigTransArg(hbm_nav_table_addr, sizeof(uint64_t), 0, &hbm_nav_table_addr_pys); | ||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelConfigTransArg failed. error: 0x%X", rt_ret); | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelConfigTransArg failed. error: 0x%X", rt_ret); | ||||
| return FAILED;) | |||||
| GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;) | |||||
| GELOGD("SKT: hbm_nav_table_addr %p, hbm_nav_table_addr_pys %p", hbm_nav_table_addr, hbm_nav_table_addr_pys); | GELOGD("SKT: hbm_nav_table_addr %p, hbm_nav_table_addr_pys %p", hbm_nav_table_addr, hbm_nav_table_addr_pys); | ||||
| // Create the necessary metadata for the super kernel | // Create the necessary metadata for the super kernel | ||||
| @@ -159,7 +160,8 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;) | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;) | ||||
| rt_ret = | rt_ret = | ||||
| rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); | rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); | ||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); return FAILED;) | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); | |||||
| GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;) | |||||
| // Create the necessary metadata for the super kernel | // Create the necessary metadata for the super kernel | ||||
| h = new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim); | h = new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim); | ||||
| } | } | ||||
| @@ -41,20 +41,24 @@ | |||||
| #include "graph/ge_local_context.h" | #include "graph/ge_local_context.h" | ||||
| #include "graph/manager/graph_mem_allocator.h" | #include "graph/manager/graph_mem_allocator.h" | ||||
| #include "graph/manager/util/rt_context_util.h" | #include "graph/manager/util/rt_context_util.h" | ||||
| #include "graph/partition/dynamic_shape_partition.h" | |||||
| #include "graph/passes/addn_pass.h" | #include "graph/passes/addn_pass.h" | ||||
| #include "graph/passes/atomic_addr_clean_pass.h" | #include "graph/passes/atomic_addr_clean_pass.h" | ||||
| #include "graph/passes/cast_remove_pass.h" | #include "graph/passes/cast_remove_pass.h" | ||||
| #include "graph/passes/common_subexpression_elimination_pass.h" | #include "graph/passes/common_subexpression_elimination_pass.h" | ||||
| #include "graph/passes/compile_nodes_pass.h" | #include "graph/passes/compile_nodes_pass.h" | ||||
| #include "graph/passes/cond_remove_pass.h" | |||||
| #include "graph/passes/constant_folding_pass.h" | #include "graph/passes/constant_folding_pass.h" | ||||
| #include "graph/passes/constant_fuse_same_pass.h" | #include "graph/passes/constant_fuse_same_pass.h" | ||||
| #include "graph/passes/control_trigger_pass.h" | #include "graph/passes/control_trigger_pass.h" | ||||
| #include "graph/passes/ctrl_edge_transfer_pass.h" | |||||
| #include "graph/passes/dimension_adjust_pass.h" | #include "graph/passes/dimension_adjust_pass.h" | ||||
| #include "graph/passes/dimension_compute_pass.h" | |||||
| #include "graph/passes/flow_ctrl_pass.h" | #include "graph/passes/flow_ctrl_pass.h" | ||||
| #include "graph/passes/hccl_group_pass.h" | #include "graph/passes/hccl_group_pass.h" | ||||
| #include "graph/passes/hccl_memcpy_pass.h" | #include "graph/passes/hccl_memcpy_pass.h" | ||||
| #include "graph/passes/identity_pass.h" | |||||
| #include "graph/passes/identify_reference_pass.h" | #include "graph/passes/identify_reference_pass.h" | ||||
| #include "graph/passes/identity_pass.h" | |||||
| #include "graph/passes/iterator_op_pass.h" | #include "graph/passes/iterator_op_pass.h" | ||||
| #include "graph/passes/link_gen_mask_nodes_pass.h" | #include "graph/passes/link_gen_mask_nodes_pass.h" | ||||
| #include "graph/passes/merge_pass.h" | #include "graph/passes/merge_pass.h" | ||||
| @@ -63,10 +67,11 @@ | |||||
| #include "graph/passes/permute_pass.h" | #include "graph/passes/permute_pass.h" | ||||
| #include "graph/passes/prune_pass.h" | #include "graph/passes/prune_pass.h" | ||||
| #include "graph/passes/replace_with_empty_const_pass.h" | #include "graph/passes/replace_with_empty_const_pass.h" | ||||
| #include "graph/passes/reshape_remove_pass.h" | |||||
| #include "graph/passes/reshape_recovery_pass.h" | #include "graph/passes/reshape_recovery_pass.h" | ||||
| #include "graph/passes/reshape_remove_pass.h" | |||||
| #include "graph/passes/same_transdata_breadth_fusion_pass.h" | #include "graph/passes/same_transdata_breadth_fusion_pass.h" | ||||
| #include "graph/passes/subgraph_pass.h" | #include "graph/passes/subgraph_pass.h" | ||||
| #include "graph/passes/switch_data_edges_bypass.h" | |||||
| #include "graph/passes/switch_dead_branch_elimination.h" | #include "graph/passes/switch_dead_branch_elimination.h" | ||||
| #include "graph/passes/switch_logic_remove_pass.h" | #include "graph/passes/switch_logic_remove_pass.h" | ||||
| #include "graph/passes/switch_op_pass.h" | #include "graph/passes/switch_op_pass.h" | ||||
| @@ -76,14 +81,10 @@ | |||||
| #include "graph/passes/transop_symmetry_elimination_pass.h" | #include "graph/passes/transop_symmetry_elimination_pass.h" | ||||
| #include "graph/passes/transop_without_reshape_fusion_pass.h" | #include "graph/passes/transop_without_reshape_fusion_pass.h" | ||||
| #include "graph/passes/transpose_transdata_pass.h" | #include "graph/passes/transpose_transdata_pass.h" | ||||
| #include "graph/passes/dimension_compute_pass.h" | |||||
| #include "graph/passes/variable_op_pass.h" | #include "graph/passes/variable_op_pass.h" | ||||
| #include "graph/passes/variable_prepare_op_pass.h" | #include "graph/passes/variable_prepare_op_pass.h" | ||||
| #include "graph/passes/variable_ref_delete_op_pass.h" | #include "graph/passes/variable_ref_delete_op_pass.h" | ||||
| #include "graph/passes/variable_ref_useless_control_out_delete_pass.h" | #include "graph/passes/variable_ref_useless_control_out_delete_pass.h" | ||||
| #include "graph/passes/cond_remove_pass.h" | |||||
| #include "graph/passes/ctrl_edge_transfer_pass.h" | |||||
| #include "graph/partition/dynamic_shape_partition.h" | |||||
| #include "graph/utils/tensor_adapter.h" | #include "graph/utils/tensor_adapter.h" | ||||
| #include "inc/pass_manager.h" | #include "inc/pass_manager.h" | ||||
| #include "init/gelib.h" | #include "init/gelib.h" | ||||
| @@ -369,14 +370,15 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||||
| GM_RUN_AND_DUMP("Prepare", graph_preparer_.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph, | GM_RUN_AND_DUMP("Prepare", graph_preparer_.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph, | ||||
| session_id); | session_id); | ||||
| GM_RUN_AND_DUMP("OptimizeOriginalGraph", graph_optimize_.OptimizeOriginalGraph, compute_graph); | GM_RUN_AND_DUMP("OptimizeOriginalGraph", graph_optimize_.OptimizeOriginalGraph, compute_graph); | ||||
| GM_RUN_AND_DUMP("PrepareRunningFormatRefiner", graph_preparer_.PrepareRunningFormatRefiner); | GM_RUN_AND_DUMP("PrepareRunningFormatRefiner", graph_preparer_.PrepareRunningFormatRefiner); | ||||
| GM_RUN_AND_DUMP("RefineRunningFormat", graph_optimize_.OptimizeOriginalGraphJudgeInsert, compute_graph); | GM_RUN_AND_DUMP("RefineRunningFormat", graph_optimize_.OptimizeOriginalGraphJudgeInsert, compute_graph); | ||||
| GE_RUN(GraphManager, graph_preparer_.RecordAIPPInfo, compute_graph); | |||||
| if (IsTailingOptimization()) { | if (IsTailingOptimization()) { | ||||
| GM_RUN_AND_DUMP("OptimizeSwitchOp", graph_preparer_.SwitchOpOptimize, compute_graph); | GM_RUN_AND_DUMP("OptimizeSwitchOp", graph_preparer_.SwitchOpOptimize, compute_graph); | ||||
| } | } | ||||
| GM_RUN_AND_DUMP("Optimize1", OptimizeStage1, compute_graph); | GM_RUN_AND_DUMP("Optimize1", OptimizeStage1, compute_graph); | ||||
| GM_RUN_AND_DUMP("InferShape2", compute_graph->InferShapeInNeed); | GM_RUN_AND_DUMP("InferShape2", compute_graph->InferShapeInNeed); | ||||
| // TODO: to be delete | |||||
| const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); | const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); | ||||
| if (unknown_shape_skip != nullptr) { | if (unknown_shape_skip != nullptr) { | ||||
| PassManager graph_pass; | PassManager graph_pass; | ||||
| @@ -423,7 +425,11 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| } | } | ||||
| ret = LoadGraph(ge_root_model, graph_node); | |||||
| if (!graph_node->IsAsync()) { | |||||
| ret = LoadGraph(ge_root_model, graph_node); | |||||
| } else { | |||||
| ret = LoadGraphAsync(ge_root_model, graph_node); | |||||
| } | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "LoadGraph Failed."); | GELOGE(ret, "LoadGraph Failed."); | ||||
| return ret; | return ret; | ||||
| @@ -432,7 +438,11 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: | |||||
| var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId()); | var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId()); | ||||
| } else if (!graph_node->GetLoadFlag()) { | } else if (!graph_node->GetLoadFlag()) { | ||||
| GeRootModelPtr ge_root_model_ptr = graph_node->GetGeRootModel(); | GeRootModelPtr ge_root_model_ptr = graph_node->GetGeRootModel(); | ||||
| ret = LoadGraph(ge_root_model_ptr, graph_node); | |||||
| if (!graph_node->IsAsync()) { | |||||
| ret = LoadGraph(ge_root_model_ptr, graph_node); | |||||
| } else { | |||||
| ret = LoadGraphAsync(ge_root_model_ptr, graph_node); | |||||
| } | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "LoadGraph Failed."); | GELOGE(ret, "LoadGraph Failed."); | ||||
| return ret; | return ret; | ||||
| @@ -587,7 +597,7 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vector<GeTenso | |||||
| GELOGI("[RunGraph] start to run graph, graph_id = %u, is_train_graph: %d", graph_id, GetTrainFlag()); | GELOGI("[RunGraph] start to run graph, graph_id = %u, is_train_graph: %d", graph_id, GetTrainFlag()); | ||||
| if (inputs.empty()) { | if (inputs.empty()) { | ||||
| GELOGI("[RunGraph] initilize sub graph has no inputs."); | |||||
| GELOGI("[RunGraph] initialize sub graph has no inputs"); | |||||
| } | } | ||||
| // find graph | // find graph | ||||
| @@ -689,7 +699,7 @@ Status GraphManager::GenerateInfershapeGraph(GraphId &graph_id) { | |||||
| } | } | ||||
| Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs, | Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs, | ||||
| GeRootModelPtr &ge_root_model) { | |||||
| GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) { | |||||
| GELOGI("[BuildGraph] start to build graph, graph_id=%u.", graph_id); | GELOGI("[BuildGraph] start to build graph, graph_id=%u.", graph_id); | ||||
| if (inputs.empty()) { | if (inputs.empty()) { | ||||
| GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs"); | GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs"); | ||||
| @@ -712,15 +722,10 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTen | |||||
| GELOGE(GE_GRAPH_ALREADY_RUNNING, "[BuildGraph] graph already running, graph id = %u", graph_node->GetGraphId()); | GELOGE(GE_GRAPH_ALREADY_RUNNING, "[BuildGraph] graph already running, graph id = %u", graph_node->GetGraphId()); | ||||
| return GE_GRAPH_ALREADY_RUNNING; | return GE_GRAPH_ALREADY_RUNNING; | ||||
| } | } | ||||
| graph_node->SetAsync(async); | |||||
| // set graph's run flag | // set graph's run flag | ||||
| graph_node->SetRunFlag(true); | graph_node->SetRunFlag(true); | ||||
| struct timeval tv; | |||||
| if (gettimeofday(&tv, nullptr) != 0) { | |||||
| GELOGE(INTERNAL_ERROR, "get the time of day failed."); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| uint64_t session_id = static_cast<uint64_t>(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us | |||||
| ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id); | ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id); | ||||
| graph_node->SetRunFlag(false); | graph_node->SetRunFlag(false); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -954,6 +959,9 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti | |||||
| } | } | ||||
| options_.enable_print_op_pass = true; | options_.enable_print_op_pass = true; | ||||
| ret = ParseOption(options, ENABLE_PRINT_OP_PASS, options_.enable_print_op_pass); | ret = ParseOption(options, ENABLE_PRINT_OP_PASS, options_.enable_print_op_pass); | ||||
| options_.is_single_op = false; | |||||
| ret = ParseOption(options, SINGLE_OP_FLAG, options_.is_single_op); | |||||
| GE_IF_BOOL_EXEC(ret != SUCCESS, | GE_IF_BOOL_EXEC(ret != SUCCESS, | ||||
| GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.enablePrintOpPass value is invalid, must be 0 or 1."); | GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.enablePrintOpPass value is invalid, must be 0 or 1."); | ||||
| return GE_GRAPH_OPTIONS_INVALID); | return GE_GRAPH_OPTIONS_INVALID); | ||||
| @@ -1554,6 +1562,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||||
| GELOGI("get ge.exec.variable_acc failed. set default value."); | GELOGI("get ge.exec.variable_acc failed. set default value."); | ||||
| } | } | ||||
| PassManager after_merge_passes; | PassManager after_merge_passes; | ||||
| GE_CHK_STATUS_RET( | |||||
| after_merge_passes.AddPass("OptimizeStage1_1::SwitchDataEdgesBypass", new (std::nothrow) SwitchDataEdgesBypass)); | |||||
| GE_CHK_STATUS_RET( | GE_CHK_STATUS_RET( | ||||
| after_merge_passes.AddPass("OptimizeStage1_1::ConstantFuseSamePass", new (std::nothrow) ConstantFuseSamePass)); | after_merge_passes.AddPass("OptimizeStage1_1::ConstantFuseSamePass", new (std::nothrow) ConstantFuseSamePass)); | ||||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CommonSubexpressionEliminationPass", | GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CommonSubexpressionEliminationPass", | ||||
| @@ -1579,8 +1589,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||||
| GE_IF_BOOL_EXEC(options == "default" || options == "1", GELOGI("turn on variable accelerator"); | GE_IF_BOOL_EXEC(options == "default" || options == "1", GELOGI("turn on variable accelerator"); | ||||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::VariableOpPass", | GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::VariableOpPass", | ||||
| new (std::nothrow) VariableOpPass(&var_acc_ctrl_)))) | new (std::nothrow) VariableOpPass(&var_acc_ctrl_)))) | ||||
| GE_CHK_STATUS_RET( | |||||
| after_merge_passes.AddPass("OptimizeStage1_1::TransOpDepthFusionPass", new (std::nothrow) TransOpDepthFusionPass)) | |||||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpWithoutReshapeFusionPass", | GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpWithoutReshapeFusionPass", | ||||
| new (std::nothrow) TransOpWithoutReshapeFusionPass)) | new (std::nothrow) TransOpWithoutReshapeFusionPass)) | ||||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass", | GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass", | ||||
| @@ -1660,7 +1668,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||||
| GELOGE(ret, "Run identity remove pass for preprocess failed, ret:%u.", ret); | GELOGE(ret, "Run identity remove pass for preprocess failed, ret:%u.", ret); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -1688,10 +1695,6 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||||
| names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | ||||
| names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | ||||
| names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass); | names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass); | ||||
| HcclGroupPass hccl_group_pass; | |||||
| if (IsTailingOptimization()) { | |||||
| names_to_passes.emplace_back("HcclGroupPass", &hccl_group_pass); | |||||
| } | |||||
| GE_TIMESTAMP_START(names_to_passes); | GE_TIMESTAMP_START(names_to_passes); | ||||
| ret = GEPass(compute_graph).Run(names_to_passes); | ret = GEPass(compute_graph).Run(names_to_passes); | ||||
| GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses"); | GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses"); | ||||
| @@ -1708,19 +1711,12 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||||
| PassManager pass_for_control_attr_optimize; | PassManager pass_for_control_attr_optimize; | ||||
| if (options_.train_graph_flag) { | if (options_.train_graph_flag) { | ||||
| // TODO: to be delete | |||||
| const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); | const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); | ||||
| if (unknown_shape_skip == nullptr) { | if (unknown_shape_skip == nullptr) { | ||||
| GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::FlowCtrlPass", | GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::FlowCtrlPass", | ||||
| new (std::nothrow) FlowCtrlPass)) | new (std::nothrow) FlowCtrlPass)) | ||||
| } | } | ||||
| } | } | ||||
| // TODO: to be delete | |||||
| const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); | |||||
| if (unknown_shape_skip == nullptr) { | |||||
| GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass", | |||||
| new (std::nothrow) SubgraphPass)); | |||||
| } | |||||
| GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::MultiBatchPass", | GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::MultiBatchPass", | ||||
| new (std::nothrow) MultiBatchPass)) | new (std::nothrow) MultiBatchPass)) | ||||
| @@ -1739,6 +1735,14 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||||
| GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::AtomicAddrCleanPass", | GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::AtomicAddrCleanPass", | ||||
| new (std::nothrow) AtomicAddrCleanPass)) | new (std::nothrow) AtomicAddrCleanPass)) | ||||
| const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); | |||||
| if (unknown_shape_skip == nullptr) { | |||||
| // SubgraphPass solves memory_assign_conflicts by insert MemcpyAsync node, which depends on multi attrs and | |||||
| // graph-structure. So try not to add new pass after SubgraphPass. | |||||
| GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass", | |||||
| new (std::nothrow) SubgraphPass)); | |||||
| } | |||||
| GE_TIMESTAMP_START(pass_for_control_attr_optimize); | GE_TIMESTAMP_START(pass_for_control_attr_optimize); | ||||
| ret = pass_for_control_attr_optimize.Run(compute_graph); | ret = pass_for_control_attr_optimize.Run(compute_graph); | ||||
| GE_TIMESTAMP_END(pass_for_control_attr_optimize, "OptimizeStage2::ControlAttrOptimize"); | GE_TIMESTAMP_END(pass_for_control_attr_optimize, "OptimizeStage2::ControlAttrOptimize"); | ||||
| @@ -1908,6 +1912,7 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G | |||||
| graph_node->SetRunFlag(false); | graph_node->SetRunFlag(false); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| graph_node->SetLoadFlag(true); | |||||
| ge_root_model->SetModelId(model_id_info.model_id); | ge_root_model->SetModelId(model_id_info.model_id); | ||||
| graph_node->SetGeRootModel(ge_root_model); | graph_node->SetGeRootModel(ge_root_model); | ||||
| } | } | ||||
| @@ -99,7 +99,8 @@ class GraphManager { | |||||
| /// @param [out] models build result | /// @param [out] models build result | ||||
| /// @return Status result of function | /// @return Status result of function | ||||
| /// | /// | ||||
| ge::Status BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs, GeRootModelPtr &models); | |||||
| ge::Status BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs, GeRootModelPtr &models, | |||||
| uint64_t session_id = 0, bool async = false); | |||||
| /// | /// | ||||
| /// @ingroup ge_graph | /// @ingroup ge_graph | ||||
| @@ -40,6 +40,7 @@ GraphNode::GraphNode(GraphId graph_id) | |||||
| compute_graph_(nullptr), | compute_graph_(nullptr), | ||||
| build_flag_(false), | build_flag_(false), | ||||
| load_flag_(false), | load_flag_(false), | ||||
| async_(false), | |||||
| ge_model_(nullptr), | ge_model_(nullptr), | ||||
| sem_(1) { | sem_(1) { | ||||
| graph_run_async_listener_ = MakeShared<RunAsyncListener>(); | graph_run_async_listener_ = MakeShared<RunAsyncListener>(); | ||||
| @@ -152,6 +152,9 @@ class GraphNode { | |||||
| bool GetRunFlag() const { return run_flag_; } | bool GetRunFlag() const { return run_flag_; } | ||||
| void SetRunFlag(bool flag) { run_flag_ = flag; } | void SetRunFlag(bool flag) { run_flag_ = flag; } | ||||
| bool IsAsync() const { return async_; } | |||||
| void SetAsync(bool flag) { async_ = flag; } | |||||
| void SetSubGraph(std::vector<SubGraphInfoPtr> &subgraph_ptr_list) { subgraph_ptr_list_ = subgraph_ptr_list; } | void SetSubGraph(std::vector<SubGraphInfoPtr> &subgraph_ptr_list) { subgraph_ptr_list_ = subgraph_ptr_list; } | ||||
| const std::vector<SubGraphInfoPtr> &GetAllSubGraph() const { return subgraph_ptr_list_; } | const std::vector<SubGraphInfoPtr> &GetAllSubGraph() const { return subgraph_ptr_list_; } | ||||
| @@ -181,6 +184,7 @@ class GraphNode { | |||||
| ComputeGraphPtr compute_graph_; | ComputeGraphPtr compute_graph_; | ||||
| bool build_flag_; | bool build_flag_; | ||||
| bool load_flag_; | bool load_flag_; | ||||
| bool async_; | |||||
| GeModelPtr ge_model_; | GeModelPtr ge_model_; | ||||
| GeRootModelPtr ge_root_model_; | GeRootModelPtr ge_root_model_; | ||||
| BlockingQueue<uint8_t> sem_; | BlockingQueue<uint8_t> sem_; | ||||
| @@ -239,6 +243,7 @@ struct GraphManagerOptions { | |||||
| bool local_fmk_op_flag; | bool local_fmk_op_flag; | ||||
| bool hcom_parallel; | bool hcom_parallel; | ||||
| bool enable_print_op_pass; | bool enable_print_op_pass; | ||||
| bool is_single_op; | |||||
| std::map<std::string, int> stream_max_parallel_num; | std::map<std::string, int> stream_max_parallel_num; | ||||
| std::string output_datatype; | std::string output_datatype; | ||||
| std::string original_model_file; | std::string original_model_file; | ||||
| @@ -247,7 +252,7 @@ struct GraphManagerOptions { | |||||
| : stream_num(1), | : stream_num(1), | ||||
| perf_level(domi::GEN_TASK_WITHOUT_FUSION), | perf_level(domi::GEN_TASK_WITHOUT_FUSION), | ||||
| encrypt_mode(-1), | encrypt_mode(-1), | ||||
| framework_type(domi::FMK_TYPE_T), | |||||
| framework_type(domi::TENSORFLOW), | |||||
| ek_file(""), | ek_file(""), | ||||
| cert_file(""), | cert_file(""), | ||||
| hw_key_file(""), | hw_key_file(""), | ||||
| @@ -263,6 +268,7 @@ struct GraphManagerOptions { | |||||
| local_fmk_op_flag(false), | local_fmk_op_flag(false), | ||||
| hcom_parallel(false), | hcom_parallel(false), | ||||
| enable_print_op_pass(true), | enable_print_op_pass(true), | ||||
| is_single_op(false), | |||||
| save_original_model("false") {} | save_original_model("false") {} | ||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -301,7 +301,7 @@ Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uin | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| int64_t MemResource::GetVarMemSize() const { return var_mem_size_; } | |||||
| uint64_t MemResource::GetVarMemSize() const { return var_mem_size_; } | |||||
| void MemResource::UpdateVarMemSize(int64_t mem_size) { var_mem_size_ = mem_size; }; | void MemResource::UpdateVarMemSize(int64_t mem_size) { var_mem_size_ = mem_size; }; | ||||
| @@ -177,7 +177,7 @@ class MemResource { | |||||
| Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset); | Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset); | ||||
| int64_t GetVarMemSize() const; | |||||
| uint64_t GetVarMemSize() const; | |||||
| void UpdateVarMemSize(int64_t mem_size); | void UpdateVarMemSize(int64_t mem_size); | ||||
| @@ -31,6 +31,10 @@ class RtContextUtil { | |||||
| void AddrtContext(rtContext_t context); | void AddrtContext(rtContext_t context); | ||||
| const rtContext_t GetNormalModeContext() const { return before_prerun_ctx_; } | |||||
| void SetNormalModeContext(rtContext_t context) { before_prerun_ctx_ = context; } | |||||
| void DestroyrtContexts(); | void DestroyrtContexts(); | ||||
| RtContextUtil &operator=(const RtContextUtil &) = delete; | RtContextUtil &operator=(const RtContextUtil &) = delete; | ||||
| @@ -41,8 +45,8 @@ class RtContextUtil { | |||||
| ~RtContextUtil() {} | ~RtContextUtil() {} | ||||
| std::vector<rtContext_t> rtContexts_; | std::vector<rtContext_t> rtContexts_; | ||||
| rtContext_t before_prerun_ctx_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| #endif // GE_GRAPH_MANAGER_UTIL_RT_CONTEXT_UTIL_H_ | #endif // GE_GRAPH_MANAGER_UTIL_RT_CONTEXT_UTIL_H_ | ||||
| @@ -34,7 +34,7 @@ const char *const kAicoreEngine = "AIcoreEngine"; | |||||
| namespace ge { | namespace ge { | ||||
| GraphOptimize::GraphOptimize() | GraphOptimize::GraphOptimize() | ||||
| : optimize_type_(domi::FrameworkType::FMK_TYPE_T), | |||||
| : optimize_type_(domi::FrameworkType::TENSORFLOW), | |||||
| cal_config_(""), | cal_config_(""), | ||||
| insert_op_config_(""), | insert_op_config_(""), | ||||
| parse_out_node_(""), | parse_out_node_(""), | ||||
| @@ -73,7 +73,7 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) { | |||||
| src_index_list.emplace_back(peer_out_anchor->GetIdx()); | src_index_list.emplace_back(peer_out_anchor->GetIdx()); | ||||
| node_op_desc->SetSrcName(src_name_list); | node_op_desc->SetSrcName(src_name_list); | ||||
| node_op_desc->SetSrcIndex(src_index_list); | node_op_desc->SetSrcIndex(src_index_list); | ||||
| GE_IF_BOOL_EXEC(!(node_op_desc->GetType() == NETOUTPUT && domi::GetContext().type == domi::FMK_TYPE_T), | |||||
| GE_IF_BOOL_EXEC(!(node_op_desc->GetType() == NETOUTPUT && domi::GetContext().type == domi::TENSORFLOW), | |||||
| ge::NodePtr peer_owner_node = peer_out_anchor->GetOwnerNode(); | ge::NodePtr peer_owner_node = peer_out_anchor->GetOwnerNode(); | ||||
| input_name_list.emplace_back( | input_name_list.emplace_back( | ||||
| peer_owner_node->GetName() + | peer_owner_node->GetName() + | ||||
| @@ -260,7 +260,7 @@ Status GraphOptimize::OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_ | |||||
| } | } | ||||
| Status GraphOptimize::SetOptions(const ge::GraphManagerOptions &options) { | Status GraphOptimize::SetOptions(const ge::GraphManagerOptions &options) { | ||||
| if (options.framework_type >= static_cast<int32_t>(domi::FrameworkType::FMK_TYPE_RESERVED)) { | |||||
| if (options.framework_type >= static_cast<int32_t>(domi::FrameworkType::FRAMEWORK_RESERVED)) { | |||||
| GELOGE(GE_GRAPH_OPTIONS_INVALID, "Optimize Type %d invalid.", options.framework_type); | GELOGE(GE_GRAPH_OPTIONS_INVALID, "Optimize Type %d invalid.", options.framework_type); | ||||
| return GE_GRAPH_OPTIONS_INVALID; | return GE_GRAPH_OPTIONS_INVALID; | ||||
| } | } | ||||
| @@ -293,7 +293,7 @@ void GraphOptimize::TranFrameOp(ComputeGraphPtr &compute_graph) { | |||||
| // set - framework_type | // set - framework_type | ||||
| // [No need to verify return value] | // [No need to verify return value] | ||||
| op->SetType("FrameworkOp"); | op->SetType("FrameworkOp"); | ||||
| if (!AttrUtils::SetInt(op, ATTR_NAME_FRAMEWORK_FWK_TYPE, domi::FrameworkType::FMK_TYPE_T)) { | |||||
| if (!AttrUtils::SetInt(op, ATTR_NAME_FRAMEWORK_FWK_TYPE, domi::FrameworkType::TENSORFLOW)) { | |||||
| GELOGW("TranFrameOp SetInt ATTR_NAME_FRAMEWORK_FWK_TYPE failed"); | GELOGW("TranFrameOp SetInt ATTR_NAME_FRAMEWORK_FWK_TYPE failed"); | ||||
| } | } | ||||
| } | } | ||||
| @@ -0,0 +1,397 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "graph/optimize/optimizer/allreduce_fusion_pass.h" | |||||
| #include <string> | |||||
| #include "common/debug/log.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "common/types.h" | |||||
| #include "common/util.h" | |||||
| #include "graph/anchor.h" | |||||
| #include "graph/node.h" | |||||
| #include "graph/op_desc.h" | |||||
| #include "graph/utils/attr_utils.h" | |||||
| #include "graph/utils/graph_utils.h" | |||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "hccl/base.h" | |||||
| #include "hccl/hcom.h" | |||||
| namespace ge { | |||||
| Status AllReducePass::Run(ge::ComputeGraphPtr graph) { | |||||
| GELOGI("FusionAllReducePass: start"); | |||||
| std::vector<NodePtr> fusionOps; | |||||
| std::vector<float> inputGradientSize; | |||||
| std::vector<float> inputGradientTime; | |||||
| static const float inputGradientSizeTemp = 0.0; | |||||
| static const float inputGradientTimeTemp = 0.0; | |||||
| // Get all nodes | |||||
| for (auto nodePtr : graph->GetDirectNode()) { | |||||
| GE_IF_BOOL_EXEC(nullptr == nodePtr, GELOGW("FusionAllReducePass: null node exists"); continue;); | |||||
| ge::OpDescPtr opDescPtr = nodePtr->GetOpDesc(); | |||||
| GE_IF_BOOL_EXEC(nullptr == opDescPtr, | |||||
| GELOGW("FusionAllReducePass: desc of node %s is null", nodePtr->GetName().c_str()); | |||||
| continue;) | |||||
| GE_IF_BOOL_EXEC(HCOMALLREDUCE == opDescPtr->GetType(), | |||||
| // the op is allreduce and fusion > 0, then run fusion | |||||
| std::int64_t hcom_fusion = 1; | |||||
| GE_IF_BOOL_EXEC(!ge::AttrUtils::GetInt(opDescPtr, HCOM_ATTR_FUSION, hcom_fusion), | |||||
| GELOGW("FusionAllReducePass: not get hcom_fusion from opDescPtr " | |||||
| "by HCOM_ATTR_FUSION")); | |||||
| GELOGI("after GetInt, hcom_fusion is :%ld", hcom_fusion); GE_IF_BOOL_EXEC( | |||||
| hcom_fusion > 0, fusionOps.push_back(nodePtr); inputGradientSize.push_back(inputGradientSizeTemp); | |||||
| inputGradientTime.push_back(inputGradientTimeTemp);)) | |||||
| } | |||||
| // The number of allredecue operator must be more than 1 | |||||
| GE_IF_BOOL_EXEC(1 >= fusionOps.size(), GELOGW("FusionAllReducePass NOT_CHANGED: the graph has " | |||||
| "%lu allreduce operator", | |||||
| fusionOps.size()); | |||||
| return NOT_CHANGED;); | |||||
| string group = "group"; | |||||
| u32 gradientNum = fusionOps.size(); | |||||
| string model_name_str = graph->GetName(); | |||||
| const char *model_name = model_name_str.c_str(); | |||||
| model_feature modelFeature{model_name, gradientNum, inputGradientSize.data(), inputGradientTime.data()}; | |||||
| u32 segmentNum = 0; | |||||
| u32 segmentIndex[HCCL_MAX_SEGMENT_NUM] = {}; | |||||
| // Call HCCL function: hcom_gradient_segment | |||||
| GELOGI("FusionAllReducePass: invoking hcom_get_split_strategy"); | |||||
| GE_IF_BOOL_EXEC(HCCL_SUCCESS != hcom_get_split_strategy(group.c_str(), &modelFeature, HCCL_MAX_SEGMENT_NUM, | |||||
| &segmentNum, segmentIndex), | |||||
| GELOGE(FAILED, "FusionAllReducePass FAILED: the graph has %lu allreduce operator", fusionOps.size()); | |||||
| return FAILED;) | |||||
| GELOGI("FusionAllReducePass: invoke hcom_get_split_strategy successfully"); | |||||
| // check whether segmentNum is legal or not | |||||
| GE_IF_BOOL_EXEC((HCCL_MAX_SEGMENT_NUM < segmentNum || 1 > segmentNum || segmentNum > gradientNum), | |||||
| GELOGE(FAILED, | |||||
| "FusionAllReducePass FAILED: illegal segmentNum=%u, " | |||||
| "HCCL_MAX_SEGMENT_NUM=%u, gradientNum=%u", | |||||
| segmentNum, HCCL_MAX_SEGMENT_NUM, gradientNum); | |||||
| return FAILED;); | |||||
| // check whether segmentIndex is legal or not | |||||
| GE_IF_BOOL_EXEC((segmentIndex[segmentNum - 1] != gradientNum - 1), | |||||
| GELOGE(FAILED, | |||||
| "FusionAllReducePass FAILED: illegal segmentIndex[0]=%u, " | |||||
| "segmentIndex[segmentNum-1]=%u, gradientNum=%u", | |||||
| segmentIndex[0], segmentIndex[(segmentNum)-1], gradientNum); | |||||
| return FAILED;); | |||||
| for (uint32_t i = 0; i < segmentNum - 1; i++) { | |||||
| GE_IF_BOOL_EXEC(segmentIndex[i] >= segmentIndex[i + 1], GELOGE(FAILED, | |||||
| "FusionAllReducePass FAILED: illegal " | |||||
| "segmentIndex[%u]=%u, segmentIndex[%u]=%u", | |||||
| i, segmentIndex[i], i + 1, segmentIndex[i + 1]); | |||||
| return FAILED;); | |||||
| } | |||||
| // check whether fusion is needed or not | |||||
| GE_IF_BOOL_EXEC( | |||||
| segmentNum == gradientNum, | |||||
| GELOGE(NOT_CHANGED, "FusionAllReducePass NOT_CHANGED: segmentNum=%u, gradientNum=%u", segmentNum, gradientNum); | |||||
| return NOT_CHANGED;) | |||||
| std::unordered_set<void *> anchorPtrSet; | |||||
| std::vector<ge::OutDataAnchorPtr> fusionOpPeerOutDataAnchor; | |||||
| std::vector<ge::OutDataAnchorPtr> fusionOpPeerOutDataToInControl; | |||||
| std::vector<ge::OutControlAnchorPtr> fusionOpPeerOutControlAnchor; | |||||
| std::vector<std::pair<int, ge::InDataAnchorPtr>> fusionOpPeerInDataAnchor; | |||||
| std::vector<std::pair<int, ge::InControlAnchorPtr>> fusionOpPeerInControlFromOutData; | |||||
| std::vector<ge::InControlAnchorPtr> fusionOpPeerInControlAnchor; | |||||
| ge::OutControlAnchorPtr previousNewAllreduceOutControlAnchor = nullptr; | |||||
| // Traversing the segmentNum | |||||
| uint32_t start = 0; | |||||
| uint32_t end = 0; | |||||
| for (uint32_t segmentIdx = 0; segmentIdx < segmentNum; segmentIdx++) { | |||||
| end = segmentIndex[segmentIdx]; | |||||
| GE_IF_BOOL_EXEC(end - start < 1, | |||||
| GELOGI("FusionAllReducePass: segmentIndex[%u]=%u", segmentIdx, segmentIndex[segmentIdx]); | |||||
| start = end + 1; continue;); | |||||
| ge::OpDescPtr originDescPtr = fusionOps[start]->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(originDescPtr); | |||||
| ge::OpDescPtr newAllreduceDesc = AttrUtils::CloneOpDesc(originDescPtr); | |||||
| GE_CHECK_NOTNULL(newAllreduceDesc); | |||||
| // Cleat buffer | |||||
| anchorPtrSet.clear(); | |||||
| fusionOpPeerOutDataAnchor.clear(); | |||||
| fusionOpPeerOutDataToInControl.clear(); | |||||
| fusionOpPeerOutControlAnchor.clear(); | |||||
| fusionOpPeerInDataAnchor.clear(); | |||||
| fusionOpPeerInControlFromOutData.clear(); | |||||
| fusionOpPeerInControlAnchor.clear(); | |||||
| // Traversing the Allreduce operators of each group | |||||
| int outDataAnchorIndex = 0; | |||||
| GE_CHK_STATUS_RET(GetPeerOutDataToInData(anchorPtrSet, fusionOpPeerOutDataAnchor, fusionOps[start]), | |||||
| "Get peer outDataAnchor to inDataAnchor failed"); | |||||
| GE_CHK_STATUS_RET(GetPeerInAnchorToOutData(anchorPtrSet, fusionOpPeerInDataAnchor, fusionOpPeerInControlFromOutData, | |||||
| fusionOps[start]), | |||||
| "Get peer inDataAnchor and inControlAnchor to outDataAnchor failed"); | |||||
| GE_CHK_STATUS_RET(GetPeerOutDataToInControl(anchorPtrSet, fusionOpPeerOutDataToInControl, fusionOps[start]), | |||||
| "Get peer outDataAnchor to inControlAnchor failed"); | |||||
| GE_CHK_STATUS_RET(GetPeerOutControlToInControl(anchorPtrSet, fusionOpPeerOutControlAnchor, fusionOps[start]), | |||||
| "Get peer outControlAnchor to inControlAnchor failed"); | |||||
| GE_CHK_STATUS_RET(GetPeerInControlFromOutControl(anchorPtrSet, fusionOpPeerInControlAnchor, fusionOps[start]), | |||||
| "Get peer outControlAnchor from inControlAnchor failed"); | |||||
| GE_CHK_STATUS_RET(graph->RemoveNode(fusionOps[start]), "FusionAllReducePass FAILED: remove node %s\n.", | |||||
| fusionOps[start]->GetName().c_str()); | |||||
| for (uint32_t idx = start + 1; idx <= end; idx++) { | |||||
| GE_CHK_STATUS_RET( | |||||
| GetPeerOutDataToInData(anchorPtrSet, fusionOpPeerOutDataAnchor, fusionOps[idx], newAllreduceDesc), | |||||
| "Get peer outDataAnchor to inDataAnchor failed"); | |||||
| GE_CHK_STATUS_RET(GetPeerOutDataToInControl(anchorPtrSet, fusionOpPeerOutDataToInControl, fusionOps[idx]), | |||||
| "Get peer outDataAnchor to inControlAnchor failed"); | |||||
| GE_CHK_STATUS_RET(GetPeerOutControlToInControl(anchorPtrSet, fusionOpPeerOutControlAnchor, fusionOps[idx]), | |||||
| "Get peer outControlAnchor to inControlAnchor failed"); | |||||
| GE_CHK_STATUS_RET( | |||||
| GetPeerAnchorFromOutData(anchorPtrSet, fusionOpPeerInDataAnchor, fusionOpPeerInControlFromOutData, | |||||
| fusionOps[idx], newAllreduceDesc, outDataAnchorIndex), | |||||
| "Get peerAnchor from outDataAnchor failed"); | |||||
| GE_CHK_STATUS_RET(GetPeerInControlFromOutControl(anchorPtrSet, fusionOpPeerInControlAnchor, fusionOps[idx]), | |||||
| "Get peer outControlAnchor from inControlAnchor failed"); | |||||
| // Delete the node | |||||
| GE_CHK_STATUS_RET(graph->RemoveNode(fusionOps[idx]), "FusionAllReducePass FAILED: remove node %s\n.", | |||||
| fusionOps[idx]->GetName().c_str()); | |||||
| } | |||||
| NodePtr newAllReducePtr = graph->AddNode(newAllreduceDesc); | |||||
| GE_CHECK_NOTNULL(newAllReducePtr); | |||||
| // Link the inputDataAnchor | |||||
| for (uint32_t i = 0; i < fusionOpPeerOutDataAnchor.size(); i++) { | |||||
| GE_CHK_STATUS_RET( | |||||
| GraphUtils::AddEdge(fusionOpPeerOutDataAnchor[i], newAllReducePtr->GetInDataAnchor(static_cast<int>(i))), | |||||
| "FusionAllReducePass FAILED: add input data edge failed"); | |||||
| } | |||||
| // Link the inputControlAnchor | |||||
| for (uint32_t i = 0; i < fusionOpPeerOutControlAnchor.size(); i++) { | |||||
| GE_CHK_STATUS_RET(GraphUtils::AddEdge(fusionOpPeerOutControlAnchor[i], newAllReducePtr->GetInControlAnchor()), | |||||
| "FusionAllReducePass FAILED: add input control edge failed"); | |||||
| } | |||||
| for (uint32_t i = 0; i < fusionOpPeerOutDataToInControl.size(); i++) { | |||||
| GE_CHK_STATUS_RET(GraphUtils::AddEdge(fusionOpPeerOutDataToInControl[i], newAllReducePtr->GetInControlAnchor()), | |||||
| "FusionAllReducePass FAILED: add edge from out data to incontrol " | |||||
| "failed"); | |||||
| } | |||||
| // Link the outputDataAnchor | |||||
| for (uint32_t i = 0; i < fusionOpPeerInDataAnchor.size(); i++) { | |||||
| auto peerInDataAnchor = fusionOpPeerInDataAnchor[i].second; | |||||
| GE_CHK_STATUS_RET( | |||||
| GraphUtils::AddEdge(newAllReducePtr->GetOutDataAnchor(fusionOpPeerInDataAnchor[i].first), peerInDataAnchor), | |||||
| "FusionAllReducePass FAILED: add output data edge failed"); | |||||
| } | |||||
| for (uint32_t i = 0; i < fusionOpPeerInControlFromOutData.size(); i++) { | |||||
| auto peerInControlAnchor = fusionOpPeerInControlFromOutData[i].second; | |||||
| GE_CHK_STATUS_RET( | |||||
| GraphUtils::AddEdge(newAllReducePtr->GetOutDataAnchor(fusionOpPeerInControlFromOutData[i].first), | |||||
| peerInControlAnchor), | |||||
| "FusionAllReducePass FAILED: add edge from out data to in control " | |||||
| "failed"); | |||||
| } | |||||
| // Link the outputControlAnchor | |||||
| for (uint32_t i = 0; i < fusionOpPeerInControlAnchor.size(); i++) { | |||||
| GE_CHK_STATUS_RET(GraphUtils::AddEdge(newAllReducePtr->GetOutControlAnchor(), fusionOpPeerInControlAnchor[i]), | |||||
| "FusionAllReducePass FAILED: add output control edge failed"); | |||||
| } | |||||
| // Link the newAllreduce | |||||
| if (segmentIdx > 0 && previousNewAllreduceOutControlAnchor != nullptr) { | |||||
| GE_CHK_STATUS_RET( | |||||
| GraphUtils::AddEdge(previousNewAllreduceOutControlAnchor, newAllReducePtr->GetInControlAnchor()), | |||||
| "FusionAllReducePass FAILED: add input previous control edge failed"); | |||||
| } | |||||
| previousNewAllreduceOutControlAnchor = newAllReducePtr->GetOutControlAnchor(); | |||||
| start = end + 1; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AllReducePass::GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, | |||||
| ge::NodePtr &srcNodePtr) { | |||||
| for (auto inDataAnchor : srcNodePtr->GetAllInDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(inDataAnchor == nullptr, continue;); | |||||
| OutDataAnchorPtr peerOutDataAnchor = inDataAnchor->GetPeerOutAnchor(); | |||||
| GE_IF_BOOL_EXEC(peerOutDataAnchor == nullptr, continue;); | |||||
| if (anchorSet.count(peerOutDataAnchor.get()) == 0) { | |||||
| peerOutDataAnchorVec.push_back(peerOutDataAnchor); | |||||
| anchorSet.insert(peerOutDataAnchor.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataAnchor, inDataAnchor)); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AllReducePass::GetPeerInAnchorToOutData( | |||||
| std::unordered_set<void *> &anchorSet, std::vector<std::pair<int, ge::InDataAnchorPtr>> &fusionOpPeerInDataAnchor, | |||||
| std::vector<std::pair<int, ge::InControlAnchorPtr>> &fusionOpPeerInControlFromOutData, ge::NodePtr &srcNodePtr) { | |||||
| for (auto outDataAnchor : srcNodePtr->GetAllOutDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(outDataAnchor == nullptr, continue;); | |||||
| for (auto peerInDataAnchor : outDataAnchor->GetPeerInDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(peerInDataAnchor == nullptr, continue;); | |||||
| if (anchorSet.count(peerInDataAnchor.get()) == 0) { | |||||
| std::pair<int, ge::InDataAnchorPtr> pairPeerInDataAnchor; | |||||
| pairPeerInDataAnchor.first = 0; | |||||
| pairPeerInDataAnchor.second = peerInDataAnchor; | |||||
| fusionOpPeerInDataAnchor.push_back(pairPeerInDataAnchor); | |||||
| anchorSet.insert(peerInDataAnchor.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInDataAnchor)); | |||||
| } | |||||
| } | |||||
| for (auto peerInControlAnchorFromData : outDataAnchor->GetPeerInControlAnchors()) { | |||||
| GE_IF_BOOL_EXEC(peerInControlAnchorFromData == nullptr, continue;); | |||||
| if (anchorSet.count(peerInControlAnchorFromData.get()) == 0) { | |||||
| std::pair<uint32_t, ge::InControlAnchorPtr> pairPeerInControlAnchorFromData; | |||||
| pairPeerInControlAnchorFromData.first = 0; | |||||
| pairPeerInControlAnchorFromData.second = peerInControlAnchorFromData; | |||||
| fusionOpPeerInControlFromOutData.push_back(pairPeerInControlAnchorFromData); | |||||
| anchorSet.insert(peerInControlAnchorFromData.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInControlAnchorFromData)); | |||||
| } | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AllReducePass::GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, | |||||
| ge::NodePtr &srcNodePtr, ge::OpDescPtr &dstOpDescPtr) { | |||||
| for (auto inDataAnchor : srcNodePtr->GetAllInDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(inDataAnchor == nullptr, continue;); | |||||
| OutDataAnchorPtr peerOutDataAnchor = inDataAnchor->GetPeerOutAnchor(); | |||||
| GE_IF_BOOL_EXEC(peerOutDataAnchor == nullptr, continue;); | |||||
| if (anchorSet.count(peerOutDataAnchor.get()) == 0) { | |||||
| peerOutDataAnchorVec.push_back(peerOutDataAnchor); | |||||
| anchorSet.insert(peerOutDataAnchor.get()); | |||||
| if (dstOpDescPtr->AddInputDesc(inDataAnchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(inDataAnchor->GetIdx())) != | |||||
| ge::GRAPH_SUCCESS) { | |||||
| GELOGW("GetPeerOutDataToInData: AddInputDesc failed"); | |||||
| } | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataAnchor, inDataAnchor)); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AllReducePass::GetPeerOutDataToInControl(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::OutDataAnchorPtr> &peerOutDataToInControlVec, | |||||
| ge::NodePtr &srcNodePtr) { | |||||
| InControlAnchorPtr inControlAnchor = srcNodePtr->GetInControlAnchor(); | |||||
| GE_CHECK_NOTNULL(inControlAnchor); | |||||
| for (auto peerOutDataToInControl : inControlAnchor->GetPeerOutDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(peerOutDataToInControl == nullptr, continue;); | |||||
| if (anchorSet.count(peerOutDataToInControl.get()) == 0) { | |||||
| peerOutDataToInControlVec.push_back(peerOutDataToInControl); | |||||
| anchorSet.insert(peerOutDataToInControl.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataToInControl, inControlAnchor)); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AllReducePass::GetPeerOutControlToInControl(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::OutControlAnchorPtr> &peerOutControlToInControlVec, | |||||
| ge::NodePtr &srcNodePtr) { | |||||
| InControlAnchorPtr inControlAnchor = srcNodePtr->GetInControlAnchor(); | |||||
| GE_CHECK_NOTNULL(inControlAnchor); | |||||
| for (auto peerOutControlAnchor : inControlAnchor->GetPeerOutControlAnchors()) { | |||||
| GE_IF_BOOL_EXEC(peerOutControlAnchor == nullptr, continue;); | |||||
| if (anchorSet.count(peerOutControlAnchor.get()) == 0) { | |||||
| peerOutControlToInControlVec.push_back(peerOutControlAnchor); | |||||
| anchorSet.insert(peerOutControlAnchor.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutControlAnchor, inControlAnchor)); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AllReducePass::GetPeerAnchorFromOutData( | |||||
| std::unordered_set<void *> &anchorSet, vector<std::pair<int, ge::InDataAnchorPtr>> &peerInDataFromOutDataVec, | |||||
| vector<std::pair<int, ge::InControlAnchorPtr>> &peerInControlFromOutDataVec, ge::NodePtr &srcNodePtr, | |||||
| ge::OpDescPtr &dstOpDescPtr, int &index) { | |||||
| for (auto outDataAnchor : srcNodePtr->GetAllOutDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(outDataAnchor == nullptr, continue;) | |||||
| if (outDataAnchor->GetPeerInDataAnchors().size() > 0 || outDataAnchor->GetPeerInControlAnchors().size() > 0) { | |||||
| if (dstOpDescPtr->AddOutputDesc( | |||||
| outDataAnchor->GetOwnerNode()->GetOpDesc()->GetOutputDesc(outDataAnchor->GetIdx())) != ge::GRAPH_SUCCESS) { | |||||
| GELOGW("GetPeerAnchorFromOutData: AddOutputDesc failed"); | |||||
| } | |||||
| index++; | |||||
| } | |||||
| for (auto peerInDataAnchor : outDataAnchor->GetPeerInDataAnchors()) { | |||||
| GE_IF_BOOL_EXEC(peerInDataAnchor == nullptr, continue;) | |||||
| if (anchorSet.count(peerInDataAnchor.get()) == 0) { | |||||
| std::pair<int, ge::InDataAnchorPtr> pairPeerInDataAnchor; | |||||
| pairPeerInDataAnchor.first = index; | |||||
| pairPeerInDataAnchor.second = peerInDataAnchor; | |||||
| peerInDataFromOutDataVec.push_back(pairPeerInDataAnchor); | |||||
| anchorSet.insert(peerInDataAnchor.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInDataAnchor)) | |||||
| } | |||||
| } | |||||
| for (auto peerInControlAnchorFromData : outDataAnchor->GetPeerInControlAnchors()) { | |||||
| GE_IF_BOOL_EXEC(peerInControlAnchorFromData == nullptr, continue;) | |||||
| if (anchorSet.count(peerInControlAnchorFromData.get()) == 0) { | |||||
| std::pair<int, ge::InControlAnchorPtr> pairPeerInControlAnchorFromData; | |||||
| pairPeerInControlAnchorFromData.first = index; | |||||
| pairPeerInControlAnchorFromData.second = peerInControlAnchorFromData; | |||||
| peerInControlFromOutDataVec.push_back(pairPeerInControlAnchorFromData); | |||||
| anchorSet.insert(peerInControlAnchorFromData.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInControlAnchorFromData)) | |||||
| } | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AllReducePass::GetPeerInControlFromOutControl(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::InControlAnchorPtr> &peerInControlFromOutControlVec, | |||||
| ge::NodePtr &srcNodePtr) { | |||||
| OutControlAnchorPtr outControlAnchor = srcNodePtr->GetOutControlAnchor(); | |||||
| GE_CHECK_NOTNULL(outControlAnchor); | |||||
| for (auto peerInControlAnchor : outControlAnchor->GetPeerInControlAnchors()) { | |||||
| GE_IF_BOOL_EXEC(peerInControlAnchor == nullptr, continue;) | |||||
| if (anchorSet.count(peerInControlAnchor.get()) == 0) { | |||||
| peerInControlFromOutControlVec.push_back(peerInControlAnchor); | |||||
| anchorSet.insert(peerInControlAnchor.get()); | |||||
| GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outControlAnchor, peerInControlAnchor)) | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,55 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_ | |||||
| #define GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_ | |||||
| #include <unordered_set> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include "inc/graph_pass.h" | |||||
| namespace ge { | |||||
| // | |||||
| class AllReducePass : public GraphPass { | |||||
| public: | |||||
| Status Run(ge::ComputeGraphPtr graph) override; | |||||
| private: | |||||
| Status GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, ge::NodePtr &srcNodePtr, | |||||
| ge::OpDescPtr &dstOpDescPtr); | |||||
| Status GetPeerOutDataToInControl(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::OutDataAnchorPtr> &peerOutDataToInControlVec, ge::NodePtr &srcNodePtr); | |||||
| Status GetPeerOutControlToInControl(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::OutControlAnchorPtr> &peerOutControlToInControlVec, | |||||
| ge::NodePtr &srcNodePtr); | |||||
| Status GetPeerAnchorFromOutData(std::unordered_set<void *> &anchorSet, | |||||
| vector<std::pair<int, ge::InDataAnchorPtr>> &peerInDataFromOutDataVec, | |||||
| vector<std::pair<int, ge::InControlAnchorPtr>> &peerInControlFromOutDataVec, | |||||
| ge::NodePtr &srcNodePtr, ge::OpDescPtr &dstOpDescPtr, int &index); | |||||
| Status GetPeerInControlFromOutControl(std::unordered_set<void *> &anchorSet, | |||||
| vector<ge::InControlAnchorPtr> &peerInControlFromOutControlVec, | |||||
| ge::NodePtr &srcNodePtr); | |||||
| Status GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet, | |||||
| std::vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, ge::NodePtr &srcNodePtr); | |||||
| Status GetPeerInAnchorToOutData(std::unordered_set<void *> &anchorSet, | |||||
| std::vector<std::pair<int, ge::InDataAnchorPtr>> &fusionOpPeerInDataAnchor, | |||||
| std::vector<std::pair<int, ge::InControlAnchorPtr>> &fusionOpPeerInControlFromOutData, | |||||
| ge::NodePtr &srcNodePtr); | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_ | |||||
| @@ -745,7 +745,8 @@ Status Cluster::BuildPartitionSubgraph() { | |||||
| } | } | ||||
| int64_t parent_node_index = 0; | int64_t parent_node_index = 0; | ||||
| for (auto anchor : inputs_) { | for (auto anchor : inputs_) { | ||||
| auto data_op = MakeShared<OpDesc>(std::string("Data_") + std::to_string(parent_node_index), ge::DATA); | |||||
| auto data_op = | |||||
| MakeShared<OpDesc>(subgraph_->GetName() + std::string("Data_") + std::to_string(parent_node_index), ge::DATA); | |||||
| REQUIRE_NOT_NULL(data_op, "Failed new memory for data op."); | REQUIRE_NOT_NULL(data_op, "Failed new memory for data op."); | ||||
| auto input_desc = anchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(anchor->GetIdx()); | auto input_desc = anchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(anchor->GetIdx()); | ||||
| REQUIRE_GRAPH_SUCCESS(data_op->AddInputDesc(input_desc), "Failed add input desc."); | REQUIRE_GRAPH_SUCCESS(data_op->AddInputDesc(input_desc), "Failed add input desc."); | ||||
| @@ -763,7 +764,7 @@ Status Cluster::BuildPartitionSubgraph() { | |||||
| if (outputs_.empty() && control_outputs_.empty()) { | if (outputs_.empty() && control_outputs_.empty()) { | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| auto net_output_op = MakeShared<OpDesc>(NODE_NAME_NET_OUTPUT, ge::NETOUTPUT); | |||||
| auto net_output_op = MakeShared<OpDesc>(subgraph_->GetName() + "_" + NODE_NAME_NET_OUTPUT, ge::NETOUTPUT); | |||||
| REQUIRE_NOT_NULL(net_output_op, "Failed new memory for netoutput op."); | REQUIRE_NOT_NULL(net_output_op, "Failed new memory for netoutput op."); | ||||
| for (size_t i = 0; i < outputs_.size(); ++i) { | for (size_t i = 0; i < outputs_.size(); ++i) { | ||||
| GeTensorDesc input_desc; | GeTensorDesc input_desc; | ||||
| @@ -300,11 +300,9 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr | |||||
| GE_CHECK_NOTNULL(end_graph); | GE_CHECK_NOTNULL(end_graph); | ||||
| const auto &src_node = out_anchor->GetOwnerNode(); | const auto &src_node = out_anchor->GetOwnerNode(); | ||||
| const auto &dst_node = peer_in_anchor->GetOwnerNode(); | const auto &dst_node = peer_in_anchor->GetOwnerNode(); | ||||
| string engine_end_name; | |||||
| string engine_pld_name; | |||||
| // link input -> end | // link input -> end | ||||
| string end_name = kEndType + std::to_string(graph_info_.num_of_pld_end_); | string end_name = kEndType + std::to_string(graph_info_.num_of_pld_end_); | ||||
| auto end_op_desc = MakeShared<OpDesc>(end_name, END); | |||||
| auto end_op_desc = MakeShared<OpDesc>(end_graph->GetName() + "_" + end_name, END); | |||||
| if (end_op_desc == nullptr) { | if (end_op_desc == nullptr) { | ||||
| GELOGE(GRAPH_PARAM_INVALID, "pld_op_desc is nullptr."); | GELOGE(GRAPH_PARAM_INVALID, "pld_op_desc is nullptr."); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -318,15 +316,13 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr | |||||
| bool is_need_update_desc = (output_index >= 0) && (graph_info_.mode_ == kPartitioning); | bool is_need_update_desc = (output_index >= 0) && (graph_info_.mode_ == kPartitioning); | ||||
| if (is_need_update_desc) { | if (is_need_update_desc) { | ||||
| if (UpdateEndOpDesc(src_node, output_index, end_op_desc) != SUCCESS) { | if (UpdateEndOpDesc(src_node, output_index, end_op_desc) != SUCCESS) { | ||||
| GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, input index %d, engine name is %s", output_index, | |||||
| engine_end_name.c_str()); | |||||
| GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, input index %d", output_index); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| } else { | } else { | ||||
| GeTensorDesc input_desc; | GeTensorDesc input_desc; | ||||
| if (end_op_desc->AddInputDesc(input_desc) != SUCCESS) { | if (end_op_desc->AddInputDesc(input_desc) != SUCCESS) { | ||||
| GELOGE(GRAPH_PARAM_INVALID, "AddInputDesc failed, input index %d, engine name is %s", output_index, | |||||
| engine_end_name.c_str()); | |||||
| GELOGE(GRAPH_PARAM_INVALID, "AddInputDesc failed, input index %d", output_index); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| } | } | ||||
| @@ -346,11 +342,11 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr | |||||
| } | } | ||||
| /// For fe, op id has been set in AddNode, | /// For fe, op id has been set in AddNode, | ||||
| /// we can take op id of srcNode as the mark of parentId now | /// we can take op id of srcNode as the mark of parentId now | ||||
| auto const &src_node_opdesc = src_node->GetOpDesc(); | |||||
| const auto &src_node_opdesc = src_node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(src_node_opdesc); | GE_CHECK_NOTNULL(src_node_opdesc); | ||||
| int64_t node_id = src_node_opdesc->GetId(); | int64_t node_id = src_node_opdesc->GetId(); | ||||
| const string pld_name = kPlaceHolderType + std::to_string(graph_info_.num_of_pld_end_); | const string pld_name = kPlaceHolderType + std::to_string(graph_info_.num_of_pld_end_); | ||||
| auto pld_op_desc = MakeShared<OpDesc>(pld_name, PLACEHOLDER); | |||||
| auto pld_op_desc = MakeShared<OpDesc>(pld_graph->GetName() + "_" + pld_name, PLACEHOLDER); | |||||
| if (pld_op_desc == nullptr) { | if (pld_op_desc == nullptr) { | ||||
| GELOGE(GRAPH_PARAM_INVALID, "pld_op_desc is nullptr."); | GELOGE(GRAPH_PARAM_INVALID, "pld_op_desc is nullptr."); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -370,15 +366,13 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr | |||||
| is_need_update_desc = (input_index >= 0) && (graph_info_.mode_ == kPartitioning); | is_need_update_desc = (input_index >= 0) && (graph_info_.mode_ == kPartitioning); | ||||
| if (is_need_update_desc) { | if (is_need_update_desc) { | ||||
| if (UpdatePldOpDesc(dst_node, input_index, pld_op_desc) != SUCCESS) { | if (UpdatePldOpDesc(dst_node, input_index, pld_op_desc) != SUCCESS) { | ||||
| GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, output index %d, engine name is %s", input_index, | |||||
| engine_pld_name.c_str()); | |||||
| GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, output index %d", input_index); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| } else { | } else { | ||||
| GeTensorDesc output_desc; | GeTensorDesc output_desc; | ||||
| if (pld_op_desc->AddOutputDesc(output_desc) != SUCCESS) { | if (pld_op_desc->AddOutputDesc(output_desc) != SUCCESS) { | ||||
| GELOGE(GRAPH_PARAM_INVALID, "AddOutputDesc failed, input index %d, engine name is %s", input_index, | |||||
| engine_pld_name.c_str()); | |||||
| GELOGE(GRAPH_PARAM_INVALID, "AddOutputDesc failed, input index %d", input_index); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| } | } | ||||
| @@ -399,8 +393,8 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| graph_info_.index_2_end_[graph_info_.num_of_pld_end_] = new_end_node; | graph_info_.index_2_end_[graph_info_.num_of_pld_end_] = new_end_node; | ||||
| graph_info_.pld_2_end_[new_pld_node] = new_end_node; | |||||
| graph_info_.end_2_pld_[new_end_node] = new_pld_node; | graph_info_.end_2_pld_[new_end_node] = new_pld_node; | ||||
| graph_info_.pld_2_end_[new_pld_node] = new_end_node; | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -591,7 +585,8 @@ Status ge::GraphPartitioner::AddPartitionsToGraphNode(vector<ge::SubGraphInfoPtr | |||||
| sgi->SetOutputContext(graph_info_.output_name_); | sgi->SetOutputContext(graph_info_.output_name_); | ||||
| AddEndPldInformationToSubGraphInfo(sgi); | AddEndPldInformationToSubGraphInfo(sgi); | ||||
| GELOGI("[GraphPartitioner]: subGraph engine name is %s, graph name is %s, stream label is %s", | GELOGI("[GraphPartitioner]: subGraph engine name is %s, graph name is %s, stream label is %s", | ||||
| engine_name.c_str(), sub_graph->GetName().c_str(), sgi->GetStreamLabel().c_str()); | |||||
| engine_name.c_str(), sub_graph->GetName().c_str(), | |||||
| sgi->GetStreamLabel().empty() ? "null" : sgi->GetStreamLabel().c_str()); | |||||
| output_subgraphs.push_back(sgi); | output_subgraphs.push_back(sgi); | ||||
| } | } | ||||
| } | } | ||||
| @@ -896,8 +891,8 @@ Status ge::GraphPartitioner::AddPlaceHolderEnd(const AnchorPtr &out_anchor, cons | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| // nodes in original graph | // nodes in original graph | ||||
| auto src_node = out_anchor->GetOwnerNode(); | |||||
| auto dst_node = in_anchor->GetOwnerNode(); | |||||
| const auto &src_node = out_anchor->GetOwnerNode(); | |||||
| const auto &dst_node = in_anchor->GetOwnerNode(); | |||||
| if ((src_node == nullptr) || (dst_node == nullptr)) { | if ((src_node == nullptr) || (dst_node == nullptr)) { | ||||
| GELOGE(GE_GRAPH_PARAM_NULLPTR, "src_node or dst_node is null."); | GELOGE(GE_GRAPH_PARAM_NULLPTR, "src_node or dst_node is null."); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -323,7 +323,7 @@ Status AicpuConstantFoldingPass::LaunchSingleOpRunTask(const NodePtr &node, cons | |||||
| aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = 0; | aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = 0; | ||||
| aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = 0; | aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = 0; | ||||
| aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; | aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; | ||||
| aicpu_task.fwkKernelBase.fwk_kernel.extInfoNum = 0; | |||||
| aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; | |||||
| std::string task_info; | std::string task_info; | ||||
| Status ret = kernel_info->GenSingleOpRunTask(node, aicpu_task, task_info); | Status ret = kernel_info->GenSingleOpRunTask(node, aicpu_task, task_info); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -378,7 +378,7 @@ Status AicpuConstantFoldingPass::LaunchMemCopyTask(const vector<uint64_t> &data_ | |||||
| aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = 0; | aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = 0; | ||||
| aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = 0; | aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = 0; | ||||
| aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; | aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; | ||||
| aicpu_task.fwkKernelBase.fwk_kernel.extInfoNum = 0; | |||||
| aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; | |||||
| std::string task_info; | std::string task_info; | ||||
| Status ret = kernel_info->GenMemCopyTask(data_infos.size(), aicpu_task, task_info); | Status ret = kernel_info->GenMemCopyTask(data_infos.size(), aicpu_task, task_info); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -172,9 +172,12 @@ NodePtr AtomicAddrCleanPass::InsertAtomicAddrCleanNode(ComputeGraphPtr &graph) { | |||||
| if (!session_graph_id.empty()) { | if (!session_graph_id.empty()) { | ||||
| (void)AttrUtils::SetStr(op_desc, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); | (void)AttrUtils::SetStr(op_desc, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); | ||||
| } | } | ||||
| // Only flush subgraph name | |||||
| string node_name = (graph->GetParentGraph() != nullptr) | |||||
| ? (graph->GetName() + "_" + op_desc->GetName() + session_graph_id) | |||||
| : (op_desc->GetName() + session_graph_id); | |||||
| string name = op_desc->GetName() + session_graph_id; | |||||
| op_desc->SetName(name); | |||||
| op_desc->SetName(node_name); | |||||
| GELOGI("Create cleanAddr op:%s.", op_desc->GetName().c_str()); | GELOGI("Create cleanAddr op:%s.", op_desc->GetName().c_str()); | ||||
| // To avoid same name between graphs, set session graph id to this node | // To avoid same name between graphs, set session graph id to this node | ||||
| NodePtr clean_addr_node = graph->AddNodeFront(op_desc); | NodePtr clean_addr_node = graph->AddNodeFront(op_desc); | ||||