From: @li-lei0106 Reviewed-by: @ji_chen Signed-off-by: @ji_chentags/v1.2.0
| @@ -165,7 +165,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint | |||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | ||||
| GELOGD("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | |||||
| GELOGD("ModelPartitionTable num:%u, ModelFileHeader length:%zu, ModelPartitionTable length:%zu", | |||||
| partition_table->num, sizeof(ModelFileHeader), mem_offset); | partition_table->num, sizeof(ModelFileHeader), mem_offset); | ||||
| if (model_data_size <= mem_offset) { | if (model_data_size <= mem_offset) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | ||||
| @@ -207,7 +207,8 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m | |||||
| "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | ||||
| index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); | index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); | ||||
| if (model_data_size <= cur_offset) { | if (model_data_size <= cur_offset) { | ||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | |||||
| GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, | |||||
| "invalid model data, partition_table->num:%u, model data size %u", | |||||
| partition_table->num, model_data_size); | partition_table->num, model_data_size); | ||||
| return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | ||||
| } | } | ||||
| @@ -454,7 +454,8 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||||
| if (all_data_dims[i] < 0) { | if (all_data_dims[i] < 0) { | ||||
| cur_dynamic_dims.push_back(dynamic_dims[i]); | cur_dynamic_dims.push_back(dynamic_dims[i]); | ||||
| } else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) { | } else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) { | ||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %lu should be %ld", | |||||
| GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | |||||
| "Static dims should be same, index: %zu value: %lu should be %ld", | |||||
| i, dynamic_dims[i], all_data_dims[i]); | i, dynamic_dims[i], all_data_dims[i]); | ||||
| return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | ||||
| } | } | ||||
| @@ -157,8 +157,8 @@ ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() { | |||||
| } | } | ||||
| ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, | ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, | ||||
| int64_t dim_index, int64_t &output_mem_size, | |||||
| int64_t &batch_dim_num, int64_t &out_size) { | |||||
| int64_t dim_index, int64_t &output_mem_size, | |||||
| int64_t &batch_dim_num, int64_t &out_size) { | |||||
| graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); | graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); | ||||
| if (graph_status != GRAPH_SUCCESS) { | if (graph_status != GRAPH_SUCCESS) { | ||||
| GELOGE(FAILED, "Opdesc GetSize failed!"); | GELOGE(FAILED, "Opdesc GetSize failed!"); | ||||
| @@ -1042,7 +1042,7 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P | |||||
| } | } | ||||
| GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu", | GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu", | ||||
| is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index, | is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index, | ||||
| profiling_point.end_index.size() ); | |||||
| profiling_point.end_index.size()); | |||||
| bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | ||||
| if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) { | if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) { | ||||
| @@ -94,7 +94,7 @@ const int32_t kModelAbortNormal = 0x0704000e; | |||||
| const int32_t kModelAbortNormalNew = 507024; | const int32_t kModelAbortNormalNew = 507024; | ||||
| inline bool IsDataOp(const std::string &node_type) { | inline bool IsDataOp(const std::string &node_type) { | ||||
| return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE; | |||||
| return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE); | |||||
| } | } | ||||
| inline bool IsTbeTask(const OpDescPtr &op_desc) { | inline bool IsTbeTask(const OpDescPtr &op_desc) { | ||||
| @@ -187,12 +187,12 @@ DavinciModel::~DavinciModel() { | |||||
| UnbindTaskSinkStream(); | UnbindTaskSinkStream(); | ||||
| for (size_t i = 0; i < label_list_.size(); ++i) { | for (size_t i = 0; i < label_list_.size(); ++i) { | ||||
| if (label_list_[i] != nullptr) { | if (label_list_[i] != nullptr) { | ||||
| GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index: %zu", i); | |||||
| GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index:%zu", i); | |||||
| } | } | ||||
| } | } | ||||
| for (size_t i = 0; i < stream_list_.size(); ++i) { | for (size_t i = 0; i < stream_list_.size(); ++i) { | ||||
| GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index: %zu", i); | |||||
| GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index:%zu", i); | |||||
| } | } | ||||
| for (size_t i = 0; i < event_list_.size(); ++i) { | for (size_t i = 0; i < event_list_.size(); ++i) { | ||||
| @@ -337,7 +337,7 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh | |||||
| Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | ||||
| if (is_feature_map_mem_has_inited_) { | if (is_feature_map_mem_has_inited_) { | ||||
| GELOGE(PARAM_INVALID, "call InitFeatureMapMem more than once."); | |||||
| GELOGE(PARAM_INVALID, "call InitFeatureMapMem more than once"); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| is_feature_map_mem_has_inited_ = true; | is_feature_map_mem_has_inited_ = true; | ||||
| @@ -381,7 +381,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||||
| is_inner_p2p_mem_base_ = true; | is_inner_p2p_mem_base_ = true; | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); | |||||
| GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed"); | |||||
| runtime_param_.mem_base = mem_base_; | runtime_param_.mem_base = mem_base_; | ||||
| runtime_param_.weight_base = weights_mem_base_; | runtime_param_.weight_base = weights_mem_base_; | ||||
| runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_; | runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_; | ||||
| @@ -391,7 +391,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||||
| Status DavinciModel::InitVariableMem() { | Status DavinciModel::InitVariableMem() { | ||||
| // malloc variable memory base | // malloc variable memory base | ||||
| var_mem_base_ = VarManager::Instance(session_id_)->GetVarMemoryBase(RT_MEMORY_HBM); | var_mem_base_ = VarManager::Instance(session_id_)->GetVarMemoryBase(RT_MEMORY_HBM); | ||||
| if (TotalVarMemSize() && var_mem_base_ == nullptr) { | |||||
| if (TotalVarMemSize() && (var_mem_base_ == nullptr)) { | |||||
| Status ret = VarManager::Instance(session_id_)->MallocVarMemory(TotalVarMemSize()); | Status ret = VarManager::Instance(session_id_)->MallocVarMemory(TotalVarMemSize()); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Malloc variable memory failed."); | GELOGE(ret, "Malloc variable memory failed."); | ||||
| @@ -500,25 +500,25 @@ Status DavinciModel::DoTaskSink() { | |||||
| } | } | ||||
| GE_CHK_RT_RET(rtGetAicpuDeploy(&deploy_type_)); | GE_CHK_RT_RET(rtGetAicpuDeploy(&deploy_type_)); | ||||
| GELOGI("do task_sink. AiCpu deploy type is: %x.", deploy_type_); | |||||
| GELOGI("do task_sink. AiCpu deploy type is: %x", deploy_type_); | |||||
| GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed."); | GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed."); | ||||
| if (known_node_) { | if (known_node_) { | ||||
| GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed."); | |||||
| GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed"); | |||||
| } | } | ||||
| GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed."); | |||||
| GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed"); | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed"); | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed."); | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed"); | |||||
| GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); | |||||
| GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed"); | |||||
| GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed."); | |||||
| GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed"); | |||||
| GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); | |||||
| GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed"); | |||||
| GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); | GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); | ||||
| @@ -3332,7 +3332,7 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp | |||||
| /// | /// | ||||
| Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input, | Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input, | ||||
| const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label) { | const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label) { | ||||
| string input_or_output = "input"; | |||||
| string input_or_output; | |||||
| is_input ? input_or_output = "input" : input_or_output = "output"; | is_input ? input_or_output = "input" : input_or_output = "output"; | ||||
| if (blobs.size() != data_info.size()) { | if (blobs.size() != data_info.size()) { | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: model requires %zu, but user actually feeds %zu", | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: model requires %zu, but user actually feeds %zu", | ||||
| @@ -3342,7 +3342,8 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> & | |||||
| for (const auto &data : data_info) { | for (const auto &data : data_info) { | ||||
| if (data.first >= blobs.size()) { // check data index. | if (data.first >= blobs.size()) { // check data index. | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: can not find No.%u data, because user only feeds %zu", | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||||
| "Verify %s data num failed: can not find No.%u data, because user only feeds %zu", | |||||
| input_or_output.c_str(), data.first, blobs.size()); | input_or_output.c_str(), data.first, blobs.size()); | ||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| @@ -4133,10 +4134,10 @@ Status DavinciModel::InitAippInputOutputDims(uint32_t index, const OpDescPtr &op | |||||
| int64_t data_input_size; | int64_t data_input_size; | ||||
| (void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size); | (void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size); | ||||
| GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s", | GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s", | ||||
| index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | |||||
| TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | |||||
| TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), | |||||
| formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); | |||||
| index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | |||||
| TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | |||||
| TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), | |||||
| formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); | |||||
| } | } | ||||
| } | } | ||||
| @@ -2254,9 +2254,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass", | GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass", | ||||
| new (std::nothrow) | new (std::nothrow) | ||||
| LinkGenMaskNodesPass(options_.stream_max_parallel_num))); | LinkGenMaskNodesPass(options_.stream_max_parallel_num))); | ||||
| GE_CHK_STATUS_RET( | |||||
| after_merge_passes.AddPass("OptimizeStage2::HcclContinuousMemcpyPass", | |||||
| new (std::nothrow) HcclContinuousMemcpyPass)); | |||||
| GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::HcclContinuousMemcpyPass", | |||||
| new (std::nothrow) HcclContinuousMemcpyPass)); | |||||
| GE_TIMESTAMP_START(after_merge_passes); | GE_TIMESTAMP_START(after_merge_passes); | ||||
| auto ret = after_merge_passes.Run(compute_graph); | auto ret = after_merge_passes.Run(compute_graph); | ||||
| @@ -26,6 +26,7 @@ | |||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
| #include "graph/manager/host_mem_allocator.h" | |||||
| #include "graph/node.h" | #include "graph/node.h" | ||||
| #include "runtime/mem.h" | #include "runtime/mem.h" | ||||
| @@ -139,7 +140,6 @@ class MemoryAllocator { | |||||
| using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; | using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; | ||||
| class CachingAllocator; | class CachingAllocator; | ||||
| class RdmaPoolAllocator; | class RdmaPoolAllocator; | ||||
| class HostMemAllocator; | |||||
| class MemManager { | class MemManager { | ||||
| public: | public: | ||||
| MemManager(); | MemManager(); | ||||
| @@ -24,9 +24,9 @@ namespace { | |||||
| constexpr uint32_t kValidInputNodeOutputNum = 1; | constexpr uint32_t kValidInputNodeOutputNum = 1; | ||||
| constexpr int32_t kAssignRefInputIndex = 0; | constexpr int32_t kAssignRefInputIndex = 0; | ||||
| constexpr int32_t kAssignValueInputIndex = 1; | constexpr int32_t kAssignValueInputIndex = 1; | ||||
| static const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||||
| ge::CONSTANT, ge::CONSTANTOP, | |||||
| ge::VARIABLE, ge::VARIABLEV2 }; | |||||
| const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||||
| ge::CONSTANT, ge::CONSTANTOP, | |||||
| ge::VARIABLE, ge::VARIABLEV2 }; | |||||
| } | } | ||||
| Status AssignRemovePass::Run(NodePtr &node) { | Status AssignRemovePass::Run(NodePtr &node) { | ||||
| @@ -50,13 +50,11 @@ Status RunOpKernelWithCheck(NodePtr &node, | |||||
| return FoldingPass::RunOpKernel(node, inputs, outputs); | return FoldingPass::RunOpKernel(node, inputs, outputs); | ||||
| } | } | ||||
| const std::map<std::string, std::pair<std::uint64_t, uint64_t>> | |||||
| &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | |||||
| const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | |||||
| return statistic_of_ge_constant_folding_; | return statistic_of_ge_constant_folding_; | ||||
| } | } | ||||
| const std::map<std::string, std::pair<std::uint64_t, uint64_t>> | |||||
| &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | |||||
| const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | |||||
| return statistic_of_op_constant_folding_; | return statistic_of_op_constant_folding_; | ||||
| } | } | ||||
| @@ -140,7 +140,8 @@ bool HcclContinuousMemcpyPass::IsDataNode(const std::string& node_type) { | |||||
| /// @param [in] ge::OutDataAnchorPtr in_node | /// @param [in] ge::OutDataAnchorPtr in_node | ||||
| /// @return ge::NodePtr | /// @return ge::NodePtr | ||||
| /// | /// | ||||
| NodePtr HcclContinuousMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { | |||||
| NodePtr HcclContinuousMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, | |||||
| const OutDataAnchorPtr &out_data_anchor) { | |||||
| GE_CHECK_NOTNULL_EXEC(graph, return nullptr); | GE_CHECK_NOTNULL_EXEC(graph, return nullptr); | ||||
| NodePtr pre_node = out_data_anchor->GetOwnerNode(); | NodePtr pre_node = out_data_anchor->GetOwnerNode(); | ||||
| OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | ||||
| @@ -205,8 +206,9 @@ std::string HcclContinuousMemcpyPass::CheckDuplicateName(const std::string &node | |||||
| /// @param [in] InDataAnchorPtr hccl_in_anchor | /// @param [in] InDataAnchorPtr hccl_in_anchor | ||||
| /// @return status | /// @return status | ||||
| /// | /// | ||||
| Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor) { | |||||
| Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, | |||||
| const OutDataAnchorPtr &src_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor) { | |||||
| GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode()); | GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode()); | ||||
| GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode()); | GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode()); | ||||
| @@ -235,8 +237,9 @@ Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &gra | |||||
| /// @param [in] InDataAnchorPtr hccl_in_anchor | /// @param [in] InDataAnchorPtr hccl_in_anchor | ||||
| /// @return status | /// @return status | ||||
| /// | /// | ||||
| Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor) { | |||||
| Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, | |||||
| const OutDataAnchorPtr &src_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor) { | |||||
| GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), | GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), | ||||
| hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | ||||
| NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); | NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); | ||||
| @@ -274,8 +277,8 @@ Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr | |||||
| /// @return status | /// @return status | ||||
| /// | /// | ||||
| Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, | Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, | ||||
| const OutDataAnchorPtr &var_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor) { | |||||
| const OutDataAnchorPtr &var_out_anchor, | |||||
| const InDataAnchorPtr &hccl_in_anchor) { | |||||
| if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) { | if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) { | ||||
| GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -354,8 +357,9 @@ Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeG | |||||
| /// @param [in] ge::OutDataAnchorPtr variable node out anchor | /// @param [in] ge::OutDataAnchorPtr variable node out anchor | ||||
| /// @return ge::NodePtr | /// @return ge::NodePtr | ||||
| /// | /// | ||||
| NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { | |||||
| GE_CHECK_NOTNULL_EXEC(graph , return nullptr); | |||||
| NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, | |||||
| const OutDataAnchorPtr &out_data_anchor) { | |||||
| GE_CHECK_NOTNULL_EXEC(graph, return nullptr); | |||||
| NodePtr pre_node = out_data_anchor->GetOwnerNode(); | NodePtr pre_node = out_data_anchor->GetOwnerNode(); | ||||
| OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | ||||
| if (pre_op_desc == nullptr) { | if (pre_op_desc == nullptr) { | ||||
| @@ -23,9 +23,9 @@ namespace ge { | |||||
| namespace { | namespace { | ||||
| constexpr uint32_t kInplaceSupportOutputIndex = 0; | constexpr uint32_t kInplaceSupportOutputIndex = 0; | ||||
| constexpr uint32_t kInplaceSupportOutputNum = 1; | constexpr uint32_t kInplaceSupportOutputNum = 1; | ||||
| static const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||||
| ge::CONSTANT, ge::CONSTANTOP, | |||||
| ge::VARIABLE, ge::VARIABLEV2 }; | |||||
| const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||||
| ge::CONSTANT, ge::CONSTANTOP, | |||||
| ge::VARIABLE, ge::VARIABLEV2 }; | |||||
| } | } | ||||
| Status InplaceSupportCheckPass::Run(NodePtr &node) { | Status InplaceSupportCheckPass::Run(NodePtr &node) { | ||||
| GELOGD("InplaceSupportCheckPass running"); | GELOGD("InplaceSupportCheckPass running"); | ||||
| @@ -43,7 +43,7 @@ Status ReshapeRemovePass::Run(NodePtr &node) { | |||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
| int key = kToBeDeleteOp.find(node->GetType()) == kToBeDeleteOp.end() ? kOpNoDelete : kToBeDeleteOp[node->GetType()]; | int key = kToBeDeleteOp.find(node->GetType()) == kToBeDeleteOp.end() ? kOpNoDelete : kToBeDeleteOp[node->GetType()]; | ||||
| switch(key) { | |||||
| switch (key) { | |||||
| case kReshapeType: { | case kReshapeType: { | ||||
| bool is_shape_unknown = false; | bool is_shape_unknown = false; | ||||
| if (NodeUtils::GetNodeUnknownShapeStatus(*node, is_shape_unknown) == GRAPH_SUCCESS) { | if (NodeUtils::GetNodeUnknownShapeStatus(*node, is_shape_unknown) == GRAPH_SUCCESS) { | ||||
| @@ -385,7 +385,7 @@ Status SubgraphConstMigrationPass::DetachParallelNode(const ComputeGraphPtr &gra | |||||
| // Break Move and follow, Link Data and follow. | // Break Move and follow, Link Data and follow. | ||||
| const auto &out_anchor = const_node->GetOutDataAnchor(kZeroIndex); | const auto &out_anchor = const_node->GetOutDataAnchor(kZeroIndex); | ||||
| const auto in_anchors =out_anchor->GetPeerInDataAnchors(); | |||||
| const auto in_anchors = out_anchor->GetPeerInDataAnchors(); | |||||
| for (const auto in_anchor : in_anchors) { | for (const auto in_anchor : in_anchors) { | ||||
| GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); | GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); | ||||
| GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); | GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); | ||||
| @@ -991,7 +991,6 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range, | |||||
| Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option, | Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option, | ||||
| vector<vector<std::pair<int64_t, int64_t>>> &range_vec) { | vector<vector<std::pair<int64_t, int64_t>>> &range_vec) { | ||||
| // check both mode and shape_range option are all enabled | // check both mode and shape_range option are all enabled | ||||
| auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE); | auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE); | ||||
| bool enable_dynamic_execute_mode = (mode_iter != graph_option.end()) && (mode_iter->second == "dynamic_execute"); | bool enable_dynamic_execute_mode = (mode_iter != graph_option.end()) && (mode_iter->second == "dynamic_execute"); | ||||
| if (!enable_dynamic_execute_mode) { | if (!enable_dynamic_execute_mode) { | ||||
| @@ -1272,9 +1271,10 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option) { | |||||
| Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, | |||||
| const std::map<string, string> &graph_option) { | |||||
| // Get shape range of input in dynamic_execute mode | // Get shape range of input in dynamic_execute mode | ||||
| vector<vector<std::pair<int64_t,int64_t>>> dynamic_shape_range_vec; | |||||
| vector<vector<std::pair<int64_t, int64_t>>> dynamic_shape_range_vec; | |||||
| auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec); | auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec); | ||||
| GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode."); | GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode."); | ||||
| compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format)); | compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format)); | ||||
| @@ -2012,7 +2012,8 @@ Status GraphPrepare::ProcessNetOutput() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input,const std::map<string,string> &graph_option) { | |||||
| Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input, | |||||
| const std::map<string, string> &graph_option) { | |||||
| compute_graph_->SetInputSize(user_input.size()); | compute_graph_->SetInputSize(user_input.size()); | ||||
| if (user_input.empty()) { | if (user_input.empty()) { | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -63,8 +63,8 @@ class GraphPrepare { | |||||
| Status CheckRefOp(); | Status CheckRefOp(); | ||||
| Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); | Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); | ||||
| Status AdjustDataOpOutput(const NodePtr &node); | Status AdjustDataOpOutput(const NodePtr &node); | ||||
| Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option); | |||||
| Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option); | |||||
| Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option); | |||||
| Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option); | |||||
| Status CheckConstOp(); | Status CheckConstOp(); | ||||
| Status VerifyConstOp(const NodePtr &node); | Status VerifyConstOp(const NodePtr &node); | ||||
| Status CheckUserInput(const std::vector<GeTensor> &user_input); | Status CheckUserInput(const std::vector<GeTensor> &user_input); | ||||
| @@ -105,7 +105,7 @@ GE_FUNC_VISIBILITY bool CheckDynamicBatchShape(const vector<int64_t> &shape, con | |||||
| /// @return 0: true/false | /// @return 0: true/false | ||||
| /// | /// | ||||
| GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name, | GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name, | ||||
| const std::string &input_format); | |||||
| const std::string &input_format); | |||||
| } // namespace multibatch | } // namespace multibatch | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -62,9 +62,9 @@ struct GraphExecutionContext { | |||||
| rtStream_t stream = nullptr; | rtStream_t stream = nullptr; | ||||
| rtContext_t rt_context = nullptr; | rtContext_t rt_context = nullptr; | ||||
| rtContext_t rt_gen_context = nullptr; | rtContext_t rt_gen_context = nullptr; | ||||
| std::unique_ptr<CallbackManager> callback_manager; | |||||
| std::unique_ptr<CallbackManager> callback_manager = nullptr; | |||||
| NpuMemoryAllocator *allocator = nullptr; | NpuMemoryAllocator *allocator = nullptr; | ||||
| mutable std::unique_ptr<HybridProfiler> profiler; | |||||
| mutable std::unique_ptr<HybridProfiler> profiler = nullptr; | |||||
| DumpProperties dump_properties; | DumpProperties dump_properties; | ||||
| bool trace_enabled = false; | bool trace_enabled = false; | ||||
| bool dump_enabled = false; | bool dump_enabled = false; | ||||
| @@ -275,10 +275,10 @@ Status SubgraphExecutor::PrepareNodes(int group) { | |||||
| Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const { | Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const { | ||||
| GetContext().SetSessionId(context_->context_id); | GetContext().SetSessionId(context_->context_id); | ||||
| HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), | HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), | ||||
| "[%s] Failed to InferShape.", node_state.GetName().c_str()); | |||||
| "[%s] Failed to InferShape.", node_state.GetName().c_str()); | |||||
| GetContext().SetSessionId(context_->session_id); | GetContext().SetSessionId(context_->session_id); | ||||
| HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state), | HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state), | ||||
| "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); | |||||
| "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -520,7 +520,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream | |||||
| const NodeItem &node_item = GetNodeItem(); | const NodeItem &node_item = GetNodeItem(); | ||||
| auto op_desc = node_item.GetOpDesc(); | auto op_desc = node_item.GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| const GraphExecutionContext * graph_context = GetExecutionContext(); | |||||
| const GraphExecutionContext *graph_context = GetExecutionContext(); | |||||
| GE_CHECK_NOTNULL(graph_context); | GE_CHECK_NOTNULL(graph_context); | ||||
| const HybridModel *model = graph_context->model; | const HybridModel *model = graph_context->model; | ||||
| GE_CHECK_NOTNULL(model); | GE_CHECK_NOTNULL(model); | ||||
| @@ -551,7 +551,7 @@ Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream | |||||
| const NodeItem &node_item = GetNodeItem(); | const NodeItem &node_item = GetNodeItem(); | ||||
| auto op_desc = node_item.GetOpDesc(); | auto op_desc = node_item.GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| const GraphExecutionContext * graph_context = GetExecutionContext(); | |||||
| const GraphExecutionContext *graph_context = GetExecutionContext(); | |||||
| GE_CHECK_NOTNULL(graph_context); | GE_CHECK_NOTNULL(graph_context); | ||||
| const HybridModel *model = graph_context->model; | const HybridModel *model = graph_context->model; | ||||
| GE_CHECK_NOTNULL(model); | GE_CHECK_NOTNULL(model); | ||||
| @@ -62,19 +62,18 @@ using std::shared_ptr; | |||||
| using std::string; | using std::string; | ||||
| using std::vector; | using std::vector; | ||||
| namespace { | |||||
| static bool is_dynamic_input = false; | static bool is_dynamic_input = false; | ||||
| const char *const kModeSupport = "only support 0(model to framework model), " | const char *const kModeSupport = "only support 0(model to framework model), " | ||||
| "1(framework model to json), 3(only pre-check), " | "1(framework model to json), 3(only pre-check), " | ||||
| "5(pbtxt to json), 6(display model info)"; | "5(pbtxt to json), 6(display model info)"; | ||||
| const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)"; | const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)"; | ||||
| static const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model"; | |||||
| static const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model"; | |||||
| static const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model"; | |||||
| const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model"; | |||||
| const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model"; | |||||
| const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model"; | |||||
| // limit available mem size 2G | // limit available mem size 2G | ||||
| const long kMinAvailableMem = 2097152; // 2 * 1024 * 1024 | const long kMinAvailableMem = 2097152; // 2 * 1024 * 1024 | ||||
| } // namespace | |||||
| DEFINE_string(model, "", "The model file."); | DEFINE_string(model, "", "The model file."); | ||||
| DEFINE_string(output, "", "The output file path&name."); | DEFINE_string(output, "", "The output file path&name."); | ||||
| @@ -965,7 +965,8 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOm(const char *model_file, const char *js | |||||
| } else { | } else { | ||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10003", | ErrorManager::GetInstance().ATCReportErrMessage("E10003", | ||||
| {"parameter", "value", "reason"}, {"om", model_file, "invalid om file"}); | {"parameter", "value", "reason"}, {"om", model_file, "invalid om file"}); | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||||
| "ParseModelContent failed because of invalid om file. Please check --om param."); | |||||
| } | } | ||||
| if (model.model_data != nullptr) { | if (model.model_data != nullptr) { | ||||
| @@ -385,7 +385,8 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint | |||||
| num_inputs_, | num_inputs_, | ||||
| num_outputs_, | num_outputs_, | ||||
| unknown_type_)); | unknown_type_)); | ||||
| GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, "Malloc aicpu_ext_handle mem failed!"); | |||||
| GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Malloc aicpu_ext_handle mem failed!"); | |||||
| Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); | Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -423,7 +424,7 @@ Status AiCpuBaseTask::SetInputConst() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||||
| Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||||
| std::vector<GeTensorDesc> &output_desc, | std::vector<GeTensorDesc> &output_desc, | ||||
| rtStream_t stream) { | rtStream_t stream) { | ||||
| GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_); | GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_); | ||||
| @@ -55,7 +55,8 @@ typedef void *OpTensor_t; | |||||
| /// @return 0 for success / others for fail | /// @return 0 for success / others for fail | ||||
| /// | /// | ||||
| GE_FUNC_VISIBILITY extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num, | GE_FUNC_VISIBILITY extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num, | ||||
| const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, const char *om_file); | |||||
| const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, | |||||
| const char *om_file); | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| @@ -52,7 +52,8 @@ GE_FUNC_VISIBILITY Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_ME | |||||
| /// \param var_info [in] host variable addr infos. | /// \param var_info [in] host variable addr infos. | ||||
| /// \param mem_type [in] memory type for rdma pool. | /// \param mem_type [in] memory type for rdma pool. | ||||
| /// \return Status result of function | /// \return Status result of function | ||||
| GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t mem_type = RT_MEMORY_HBM); | |||||
| GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, | |||||
| rtMemType_t mem_type = RT_MEMORY_HBM); | |||||
| /// | /// | ||||
| /// \param tensor_info [in] description for tensor stored shared memory. | /// \param tensor_info [in] description for tensor stored shared memory. | ||||