From: @li-lei0106 Reviewed-by: @ji_chen Signed-off-by: @ji_chentags/v1.2.0
@@ -165,7 +165,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint | |||||
return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
} | } | ||||
size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | ||||
GELOGD("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | |||||
GELOGD("ModelPartitionTable num:%u, ModelFileHeader length:%zu, ModelPartitionTable length:%zu", | |||||
partition_table->num, sizeof(ModelFileHeader), mem_offset); | partition_table->num, sizeof(ModelFileHeader), mem_offset); | ||||
if (model_data_size <= mem_offset) { | if (model_data_size <= mem_offset) { | ||||
GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | ||||
@@ -207,7 +207,8 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m | |||||
"ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | ||||
index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); | index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); | ||||
if (model_data_size <= cur_offset) { | if (model_data_size <= cur_offset) { | ||||
GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | |||||
GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, | |||||
"invalid model data, partition_table->num:%u, model data size %u", | |||||
partition_table->num, model_data_size); | partition_table->num, model_data_size); | ||||
return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | ||||
} | } | ||||
@@ -454,7 +454,8 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||||
if (all_data_dims[i] < 0) { | if (all_data_dims[i] < 0) { | ||||
cur_dynamic_dims.push_back(dynamic_dims[i]); | cur_dynamic_dims.push_back(dynamic_dims[i]); | ||||
} else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) { | } else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) { | ||||
GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %lu should be %ld", | |||||
GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, | |||||
"Static dims should be same, index: %zu value: %lu should be %ld", | |||||
i, dynamic_dims[i], all_data_dims[i]); | i, dynamic_dims[i], all_data_dims[i]); | ||||
return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | ||||
} | } | ||||
@@ -157,8 +157,8 @@ ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() { | |||||
} | } | ||||
ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, | ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, | ||||
int64_t dim_index, int64_t &output_mem_size, | |||||
int64_t &batch_dim_num, int64_t &out_size) { | |||||
int64_t dim_index, int64_t &output_mem_size, | |||||
int64_t &batch_dim_num, int64_t &out_size) { | |||||
graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); | graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); | ||||
if (graph_status != GRAPH_SUCCESS) { | if (graph_status != GRAPH_SUCCESS) { | ||||
GELOGE(FAILED, "Opdesc GetSize failed!"); | GELOGE(FAILED, "Opdesc GetSize failed!"); | ||||
@@ -1042,7 +1042,7 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P | |||||
} | } | ||||
GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu", | GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu", | ||||
is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index, | is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index, | ||||
profiling_point.end_index.size() ); | |||||
profiling_point.end_index.size()); | |||||
bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | ||||
if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) { | if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) { | ||||
@@ -94,7 +94,7 @@ const int32_t kModelAbortNormal = 0x0704000e; | |||||
const int32_t kModelAbortNormalNew = 507024; | const int32_t kModelAbortNormalNew = 507024; | ||||
inline bool IsDataOp(const std::string &node_type) { | inline bool IsDataOp(const std::string &node_type) { | ||||
return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE; | |||||
return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE); | |||||
} | } | ||||
inline bool IsTbeTask(const OpDescPtr &op_desc) { | inline bool IsTbeTask(const OpDescPtr &op_desc) { | ||||
@@ -187,12 +187,12 @@ DavinciModel::~DavinciModel() { | |||||
UnbindTaskSinkStream(); | UnbindTaskSinkStream(); | ||||
for (size_t i = 0; i < label_list_.size(); ++i) { | for (size_t i = 0; i < label_list_.size(); ++i) { | ||||
if (label_list_[i] != nullptr) { | if (label_list_[i] != nullptr) { | ||||
GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index: %zu", i); | |||||
GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index:%zu", i); | |||||
} | } | ||||
} | } | ||||
for (size_t i = 0; i < stream_list_.size(); ++i) { | for (size_t i = 0; i < stream_list_.size(); ++i) { | ||||
GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index: %zu", i); | |||||
GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index:%zu", i); | |||||
} | } | ||||
for (size_t i = 0; i < event_list_.size(); ++i) { | for (size_t i = 0; i < event_list_.size(); ++i) { | ||||
@@ -337,7 +337,7 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh | |||||
Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | ||||
if (is_feature_map_mem_has_inited_) { | if (is_feature_map_mem_has_inited_) { | ||||
GELOGE(PARAM_INVALID, "call InitFeatureMapMem more than once."); | |||||
GELOGE(PARAM_INVALID, "call InitFeatureMapMem more than once"); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
is_feature_map_mem_has_inited_ = true; | is_feature_map_mem_has_inited_ = true; | ||||
@@ -381,7 +381,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||||
is_inner_p2p_mem_base_ = true; | is_inner_p2p_mem_base_ = true; | ||||
} | } | ||||
GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); | |||||
GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed"); | |||||
runtime_param_.mem_base = mem_base_; | runtime_param_.mem_base = mem_base_; | ||||
runtime_param_.weight_base = weights_mem_base_; | runtime_param_.weight_base = weights_mem_base_; | ||||
runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_; | runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_; | ||||
@@ -391,7 +391,7 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||||
Status DavinciModel::InitVariableMem() { | Status DavinciModel::InitVariableMem() { | ||||
// malloc variable memory base | // malloc variable memory base | ||||
var_mem_base_ = VarManager::Instance(session_id_)->GetVarMemoryBase(RT_MEMORY_HBM); | var_mem_base_ = VarManager::Instance(session_id_)->GetVarMemoryBase(RT_MEMORY_HBM); | ||||
if (TotalVarMemSize() && var_mem_base_ == nullptr) { | |||||
if (TotalVarMemSize() && (var_mem_base_ == nullptr)) { | |||||
Status ret = VarManager::Instance(session_id_)->MallocVarMemory(TotalVarMemSize()); | Status ret = VarManager::Instance(session_id_)->MallocVarMemory(TotalVarMemSize()); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Malloc variable memory failed."); | GELOGE(ret, "Malloc variable memory failed."); | ||||
@@ -500,25 +500,25 @@ Status DavinciModel::DoTaskSink() { | |||||
} | } | ||||
GE_CHK_RT_RET(rtGetAicpuDeploy(&deploy_type_)); | GE_CHK_RT_RET(rtGetAicpuDeploy(&deploy_type_)); | ||||
GELOGI("do task_sink. AiCpu deploy type is: %x.", deploy_type_); | |||||
GELOGI("do task_sink. AiCpu deploy type is: %x", deploy_type_); | |||||
GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed."); | GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed."); | ||||
if (known_node_) { | if (known_node_) { | ||||
GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed."); | |||||
GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed"); | |||||
} | } | ||||
GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed."); | |||||
GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed"); | |||||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); | |||||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed"); | |||||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed."); | |||||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed"); | |||||
GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); | |||||
GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed"); | |||||
GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed."); | |||||
GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed"); | |||||
GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); | |||||
GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed"); | |||||
GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); | GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); | ||||
@@ -3332,7 +3332,7 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp | |||||
/// | /// | ||||
Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input, | Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input, | ||||
const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label) { | const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label) { | ||||
string input_or_output = "input"; | |||||
string input_or_output; | |||||
is_input ? input_or_output = "input" : input_or_output = "output"; | is_input ? input_or_output = "input" : input_or_output = "output"; | ||||
if (blobs.size() != data_info.size()) { | if (blobs.size() != data_info.size()) { | ||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: model requires %zu, but user actually feeds %zu", | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: model requires %zu, but user actually feeds %zu", | ||||
@@ -3342,7 +3342,8 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> & | |||||
for (const auto &data : data_info) { | for (const auto &data : data_info) { | ||||
if (data.first >= blobs.size()) { // check data index. | if (data.first >= blobs.size()) { // check data index. | ||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Verify %s data num failed: can not find No.%u data, because user only feeds %zu", | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||||
"Verify %s data num failed: can not find No.%u data, because user only feeds %zu", | |||||
input_or_output.c_str(), data.first, blobs.size()); | input_or_output.c_str(), data.first, blobs.size()); | ||||
return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
} | } | ||||
@@ -4133,10 +4134,10 @@ Status DavinciModel::InitAippInputOutputDims(uint32_t index, const OpDescPtr &op | |||||
int64_t data_input_size; | int64_t data_input_size; | ||||
(void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size); | (void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size); | ||||
GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s", | GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s", | ||||
index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | |||||
TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | |||||
TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), | |||||
formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); | |||||
index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | |||||
TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | |||||
TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), | |||||
formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); | |||||
} | } | ||||
} | } | ||||
@@ -2254,9 +2254,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||||
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass", | GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass", | ||||
new (std::nothrow) | new (std::nothrow) | ||||
LinkGenMaskNodesPass(options_.stream_max_parallel_num))); | LinkGenMaskNodesPass(options_.stream_max_parallel_num))); | ||||
GE_CHK_STATUS_RET( | |||||
after_merge_passes.AddPass("OptimizeStage2::HcclContinuousMemcpyPass", | |||||
new (std::nothrow) HcclContinuousMemcpyPass)); | |||||
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::HcclContinuousMemcpyPass", | |||||
new (std::nothrow) HcclContinuousMemcpyPass)); | |||||
GE_TIMESTAMP_START(after_merge_passes); | GE_TIMESTAMP_START(after_merge_passes); | ||||
auto ret = after_merge_passes.Run(compute_graph); | auto ret = after_merge_passes.Run(compute_graph); | ||||
@@ -26,6 +26,7 @@ | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
#include "graph/manager/host_mem_allocator.h" | |||||
#include "graph/node.h" | #include "graph/node.h" | ||||
#include "runtime/mem.h" | #include "runtime/mem.h" | ||||
@@ -139,7 +140,6 @@ class MemoryAllocator { | |||||
using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; | using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; | ||||
class CachingAllocator; | class CachingAllocator; | ||||
class RdmaPoolAllocator; | class RdmaPoolAllocator; | ||||
class HostMemAllocator; | |||||
class MemManager { | class MemManager { | ||||
public: | public: | ||||
MemManager(); | MemManager(); | ||||
@@ -24,9 +24,9 @@ namespace { | |||||
constexpr uint32_t kValidInputNodeOutputNum = 1; | constexpr uint32_t kValidInputNodeOutputNum = 1; | ||||
constexpr int32_t kAssignRefInputIndex = 0; | constexpr int32_t kAssignRefInputIndex = 0; | ||||
constexpr int32_t kAssignValueInputIndex = 1; | constexpr int32_t kAssignValueInputIndex = 1; | ||||
static const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||||
ge::CONSTANT, ge::CONSTANTOP, | |||||
ge::VARIABLE, ge::VARIABLEV2 }; | |||||
const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||||
ge::CONSTANT, ge::CONSTANTOP, | |||||
ge::VARIABLE, ge::VARIABLEV2 }; | |||||
} | } | ||||
Status AssignRemovePass::Run(NodePtr &node) { | Status AssignRemovePass::Run(NodePtr &node) { | ||||
@@ -50,13 +50,11 @@ Status RunOpKernelWithCheck(NodePtr &node, | |||||
return FoldingPass::RunOpKernel(node, inputs, outputs); | return FoldingPass::RunOpKernel(node, inputs, outputs); | ||||
} | } | ||||
const std::map<std::string, std::pair<std::uint64_t, uint64_t>> | |||||
&ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | |||||
const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | |||||
return statistic_of_ge_constant_folding_; | return statistic_of_ge_constant_folding_; | ||||
} | } | ||||
const std::map<std::string, std::pair<std::uint64_t, uint64_t>> | |||||
&ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | |||||
const map<string, pair<uint64_t, uint64_t>> &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | |||||
return statistic_of_op_constant_folding_; | return statistic_of_op_constant_folding_; | ||||
} | } | ||||
@@ -140,7 +140,8 @@ bool HcclContinuousMemcpyPass::IsDataNode(const std::string& node_type) { | |||||
/// @param [in] ge::OutDataAnchorPtr in_node | /// @param [in] ge::OutDataAnchorPtr in_node | ||||
/// @return ge::NodePtr | /// @return ge::NodePtr | ||||
/// | /// | ||||
NodePtr HcclContinuousMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { | |||||
NodePtr HcclContinuousMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, | |||||
const OutDataAnchorPtr &out_data_anchor) { | |||||
GE_CHECK_NOTNULL_EXEC(graph, return nullptr); | GE_CHECK_NOTNULL_EXEC(graph, return nullptr); | ||||
NodePtr pre_node = out_data_anchor->GetOwnerNode(); | NodePtr pre_node = out_data_anchor->GetOwnerNode(); | ||||
OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | ||||
@@ -205,8 +206,9 @@ std::string HcclContinuousMemcpyPass::CheckDuplicateName(const std::string &node | |||||
/// @param [in] InDataAnchorPtr hccl_in_anchor | /// @param [in] InDataAnchorPtr hccl_in_anchor | ||||
/// @return status | /// @return status | ||||
/// | /// | ||||
Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | |||||
const InDataAnchorPtr &hccl_in_anchor) { | |||||
Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, | |||||
const OutDataAnchorPtr &src_out_anchor, | |||||
const InDataAnchorPtr &hccl_in_anchor) { | |||||
GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode()); | GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode()); | ||||
GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode()); | GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode()); | ||||
@@ -235,8 +237,9 @@ Status HcclContinuousMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &gra | |||||
/// @param [in] InDataAnchorPtr hccl_in_anchor | /// @param [in] InDataAnchorPtr hccl_in_anchor | ||||
/// @return status | /// @return status | ||||
/// | /// | ||||
Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, | |||||
const InDataAnchorPtr &hccl_in_anchor) { | |||||
Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, | |||||
const OutDataAnchorPtr &src_out_anchor, | |||||
const InDataAnchorPtr &hccl_in_anchor) { | |||||
GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), | GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), | ||||
hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | ||||
NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); | NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); | ||||
@@ -274,8 +277,8 @@ Status HcclContinuousMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr | |||||
/// @return status | /// @return status | ||||
/// | /// | ||||
Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, | Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, | ||||
const OutDataAnchorPtr &var_out_anchor, | |||||
const InDataAnchorPtr &hccl_in_anchor) { | |||||
const OutDataAnchorPtr &var_out_anchor, | |||||
const InDataAnchorPtr &hccl_in_anchor) { | |||||
if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) { | if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) { | ||||
GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str()); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -354,8 +357,9 @@ Status HcclContinuousMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeG | |||||
/// @param [in] ge::OutDataAnchorPtr variable node out anchor | /// @param [in] ge::OutDataAnchorPtr variable node out anchor | ||||
/// @return ge::NodePtr | /// @return ge::NodePtr | ||||
/// | /// | ||||
NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { | |||||
GE_CHECK_NOTNULL_EXEC(graph , return nullptr); | |||||
NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, | |||||
const OutDataAnchorPtr &out_data_anchor) { | |||||
GE_CHECK_NOTNULL_EXEC(graph, return nullptr); | |||||
NodePtr pre_node = out_data_anchor->GetOwnerNode(); | NodePtr pre_node = out_data_anchor->GetOwnerNode(); | ||||
OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | OpDescPtr pre_op_desc = pre_node->GetOpDesc(); | ||||
if (pre_op_desc == nullptr) { | if (pre_op_desc == nullptr) { | ||||
@@ -23,9 +23,9 @@ namespace ge { | |||||
namespace { | namespace { | ||||
constexpr uint32_t kInplaceSupportOutputIndex = 0; | constexpr uint32_t kInplaceSupportOutputIndex = 0; | ||||
constexpr uint32_t kInplaceSupportOutputNum = 1; | constexpr uint32_t kInplaceSupportOutputNum = 1; | ||||
static const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||||
ge::CONSTANT, ge::CONSTANTOP, | |||||
ge::VARIABLE, ge::VARIABLEV2 }; | |||||
const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||||
ge::CONSTANT, ge::CONSTANTOP, | |||||
ge::VARIABLE, ge::VARIABLEV2 }; | |||||
} | } | ||||
Status InplaceSupportCheckPass::Run(NodePtr &node) { | Status InplaceSupportCheckPass::Run(NodePtr &node) { | ||||
GELOGD("InplaceSupportCheckPass running"); | GELOGD("InplaceSupportCheckPass running"); | ||||
@@ -43,7 +43,7 @@ Status ReshapeRemovePass::Run(NodePtr &node) { | |||||
GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
int key = kToBeDeleteOp.find(node->GetType()) == kToBeDeleteOp.end() ? kOpNoDelete : kToBeDeleteOp[node->GetType()]; | int key = kToBeDeleteOp.find(node->GetType()) == kToBeDeleteOp.end() ? kOpNoDelete : kToBeDeleteOp[node->GetType()]; | ||||
switch(key) { | |||||
switch (key) { | |||||
case kReshapeType: { | case kReshapeType: { | ||||
bool is_shape_unknown = false; | bool is_shape_unknown = false; | ||||
if (NodeUtils::GetNodeUnknownShapeStatus(*node, is_shape_unknown) == GRAPH_SUCCESS) { | if (NodeUtils::GetNodeUnknownShapeStatus(*node, is_shape_unknown) == GRAPH_SUCCESS) { | ||||
@@ -385,7 +385,7 @@ Status SubgraphConstMigrationPass::DetachParallelNode(const ComputeGraphPtr &gra | |||||
// Break Move and follow, Link Data and follow. | // Break Move and follow, Link Data and follow. | ||||
const auto &out_anchor = const_node->GetOutDataAnchor(kZeroIndex); | const auto &out_anchor = const_node->GetOutDataAnchor(kZeroIndex); | ||||
const auto in_anchors =out_anchor->GetPeerInDataAnchors(); | |||||
const auto in_anchors = out_anchor->GetPeerInDataAnchors(); | |||||
for (const auto in_anchor : in_anchors) { | for (const auto in_anchor : in_anchors) { | ||||
GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); | GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); | ||||
GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); | GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); | ||||
@@ -991,7 +991,6 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range, | |||||
Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option, | Status GetDynamicInputShapeRange(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option, | ||||
vector<vector<std::pair<int64_t, int64_t>>> &range_vec) { | vector<vector<std::pair<int64_t, int64_t>>> &range_vec) { | ||||
// check both mode and shape_range option are all enabled | // check both mode and shape_range option are all enabled | ||||
auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE); | auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE); | ||||
bool enable_dynamic_execute_mode = (mode_iter != graph_option.end()) && (mode_iter->second == "dynamic_execute"); | bool enable_dynamic_execute_mode = (mode_iter != graph_option.end()) && (mode_iter->second == "dynamic_execute"); | ||||
if (!enable_dynamic_execute_mode) { | if (!enable_dynamic_execute_mode) { | ||||
@@ -1272,9 +1271,10 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option) { | |||||
Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, | |||||
const std::map<string, string> &graph_option) { | |||||
// Get shape range of input in dynamic_execute mode | // Get shape range of input in dynamic_execute mode | ||||
vector<vector<std::pair<int64_t,int64_t>>> dynamic_shape_range_vec; | |||||
vector<vector<std::pair<int64_t, int64_t>>> dynamic_shape_range_vec; | |||||
auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec); | auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec); | ||||
GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode."); | GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode."); | ||||
compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format)); | compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format)); | ||||
@@ -2012,7 +2012,8 @@ Status GraphPrepare::ProcessNetOutput() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input,const std::map<string,string> &graph_option) { | |||||
Status GraphPrepare::CheckAndUpdateInput(const std::vector<GeTensor> &user_input, | |||||
const std::map<string, string> &graph_option) { | |||||
compute_graph_->SetInputSize(user_input.size()); | compute_graph_->SetInputSize(user_input.size()); | ||||
if (user_input.empty()) { | if (user_input.empty()) { | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -63,8 +63,8 @@ class GraphPrepare { | |||||
Status CheckRefOp(); | Status CheckRefOp(); | ||||
Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); | Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); | ||||
Status AdjustDataOpOutput(const NodePtr &node); | Status AdjustDataOpOutput(const NodePtr &node); | ||||
Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option); | |||||
Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string,string> &graph_option); | |||||
Status UpdateInput(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option); | |||||
Status CheckAndUpdateInput(const std::vector<GeTensor> &user_input, const std::map<string, string> &graph_option); | |||||
Status CheckConstOp(); | Status CheckConstOp(); | ||||
Status VerifyConstOp(const NodePtr &node); | Status VerifyConstOp(const NodePtr &node); | ||||
Status CheckUserInput(const std::vector<GeTensor> &user_input); | Status CheckUserInput(const std::vector<GeTensor> &user_input); | ||||
@@ -105,7 +105,7 @@ GE_FUNC_VISIBILITY bool CheckDynamicBatchShape(const vector<int64_t> &shape, con | |||||
/// @return 0: true/false | /// @return 0: true/false | ||||
/// | /// | ||||
GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name, | GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name, | ||||
const std::string &input_format); | |||||
const std::string &input_format); | |||||
} // namespace multibatch | } // namespace multibatch | ||||
} // namespace ge | } // namespace ge | ||||
@@ -62,9 +62,9 @@ struct GraphExecutionContext { | |||||
rtStream_t stream = nullptr; | rtStream_t stream = nullptr; | ||||
rtContext_t rt_context = nullptr; | rtContext_t rt_context = nullptr; | ||||
rtContext_t rt_gen_context = nullptr; | rtContext_t rt_gen_context = nullptr; | ||||
std::unique_ptr<CallbackManager> callback_manager; | |||||
std::unique_ptr<CallbackManager> callback_manager = nullptr; | |||||
NpuMemoryAllocator *allocator = nullptr; | NpuMemoryAllocator *allocator = nullptr; | ||||
mutable std::unique_ptr<HybridProfiler> profiler; | |||||
mutable std::unique_ptr<HybridProfiler> profiler = nullptr; | |||||
DumpProperties dump_properties; | DumpProperties dump_properties; | ||||
bool trace_enabled = false; | bool trace_enabled = false; | ||||
bool dump_enabled = false; | bool dump_enabled = false; | ||||
@@ -275,10 +275,10 @@ Status SubgraphExecutor::PrepareNodes(int group) { | |||||
Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const { | Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const { | ||||
GetContext().SetSessionId(context_->context_id); | GetContext().SetSessionId(context_->context_id); | ||||
HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), | HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), | ||||
"[%s] Failed to InferShape.", node_state.GetName().c_str()); | |||||
"[%s] Failed to InferShape.", node_state.GetName().c_str()); | |||||
GetContext().SetSessionId(context_->session_id); | GetContext().SetSessionId(context_->session_id); | ||||
HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state), | HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state), | ||||
"[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); | |||||
"[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -520,7 +520,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream | |||||
const NodeItem &node_item = GetNodeItem(); | const NodeItem &node_item = GetNodeItem(); | ||||
auto op_desc = node_item.GetOpDesc(); | auto op_desc = node_item.GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
const GraphExecutionContext * graph_context = GetExecutionContext(); | |||||
const GraphExecutionContext *graph_context = GetExecutionContext(); | |||||
GE_CHECK_NOTNULL(graph_context); | GE_CHECK_NOTNULL(graph_context); | ||||
const HybridModel *model = graph_context->model; | const HybridModel *model = graph_context->model; | ||||
GE_CHECK_NOTNULL(model); | GE_CHECK_NOTNULL(model); | ||||
@@ -551,7 +551,7 @@ Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream | |||||
const NodeItem &node_item = GetNodeItem(); | const NodeItem &node_item = GetNodeItem(); | ||||
auto op_desc = node_item.GetOpDesc(); | auto op_desc = node_item.GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
const GraphExecutionContext * graph_context = GetExecutionContext(); | |||||
const GraphExecutionContext *graph_context = GetExecutionContext(); | |||||
GE_CHECK_NOTNULL(graph_context); | GE_CHECK_NOTNULL(graph_context); | ||||
const HybridModel *model = graph_context->model; | const HybridModel *model = graph_context->model; | ||||
GE_CHECK_NOTNULL(model); | GE_CHECK_NOTNULL(model); | ||||
@@ -62,19 +62,18 @@ using std::shared_ptr; | |||||
using std::string; | using std::string; | ||||
using std::vector; | using std::vector; | ||||
namespace { | |||||
static bool is_dynamic_input = false; | static bool is_dynamic_input = false; | ||||
const char *const kModeSupport = "only support 0(model to framework model), " | const char *const kModeSupport = "only support 0(model to framework model), " | ||||
"1(framework model to json), 3(only pre-check), " | "1(framework model to json), 3(only pre-check), " | ||||
"5(pbtxt to json), 6(display model info)"; | "5(pbtxt to json), 6(display model info)"; | ||||
const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)"; | const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)"; | ||||
static const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model"; | |||||
static const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model"; | |||||
static const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model"; | |||||
const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model"; | |||||
const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model"; | |||||
const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model"; | |||||
// limit available mem size 2G | // limit available mem size 2G | ||||
const long kMinAvailableMem = 2097152; // 2 * 1024 * 1024 | const long kMinAvailableMem = 2097152; // 2 * 1024 * 1024 | ||||
} // namespace | |||||
DEFINE_string(model, "", "The model file."); | DEFINE_string(model, "", "The model file."); | ||||
DEFINE_string(output, "", "The output file path&name."); | DEFINE_string(output, "", "The output file path&name."); | ||||
@@ -965,7 +965,8 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOm(const char *model_file, const char *js | |||||
} else { | } else { | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E10003", | ErrorManager::GetInstance().ATCReportErrMessage("E10003", | ||||
{"parameter", "value", "reason"}, {"om", model_file, "invalid om file"}); | {"parameter", "value", "reason"}, {"om", model_file, "invalid om file"}); | ||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||||
"ParseModelContent failed because of invalid om file. Please check --om param."); | |||||
} | } | ||||
if (model.model_data != nullptr) { | if (model.model_data != nullptr) { | ||||
@@ -385,7 +385,8 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint | |||||
num_inputs_, | num_inputs_, | ||||
num_outputs_, | num_outputs_, | ||||
unknown_type_)); | unknown_type_)); | ||||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, "Malloc aicpu_ext_handle mem failed!"); | |||||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
"Malloc aicpu_ext_handle mem failed!"); | |||||
Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); | Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -423,7 +424,7 @@ Status AiCpuBaseTask::SetInputConst() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||||
Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||||
std::vector<GeTensorDesc> &output_desc, | std::vector<GeTensorDesc> &output_desc, | ||||
rtStream_t stream) { | rtStream_t stream) { | ||||
GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_); | GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_); | ||||
@@ -55,7 +55,8 @@ typedef void *OpTensor_t; | |||||
/// @return 0 for success / others for fail | /// @return 0 for success / others for fail | ||||
/// | /// | ||||
GE_FUNC_VISIBILITY extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num, | GE_FUNC_VISIBILITY extern Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num, | ||||
const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, const char *om_file); | |||||
const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, | |||||
const char *om_file); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -52,7 +52,8 @@ GE_FUNC_VISIBILITY Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_ME | |||||
/// \param var_info [in] host variable addr infos. | /// \param var_info [in] host variable addr infos. | ||||
/// \param mem_type [in] memory type for rdma pool. | /// \param mem_type [in] memory type for rdma pool. | ||||
/// \return Status result of function | /// \return Status result of function | ||||
GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t mem_type = RT_MEMORY_HBM); | |||||
GE_FUNC_VISIBILITY Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, | |||||
rtMemType_t mem_type = RT_MEMORY_HBM); | |||||
/// | /// | ||||
/// \param tensor_info [in] description for tensor stored shared memory. | /// \param tensor_info [in] description for tensor stored shared memory. | ||||