diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index 5585feec..e9881224 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -94,7 +94,7 @@ Status HybridModelAsyncExecutor::Init() { executor_ = std::unique_ptr(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_)); GE_CHECK_NOTNULL(executor_); GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine"); - GE_CHK_STATUS_RET(InitInputTensors(), "Failed to init input tensors"); + GE_CHK_STATUS_RET(InitInputDesc(), "Failed to init input tensors"); return SUCCESS; } @@ -199,50 +199,66 @@ Status HybridModelAsyncExecutor::SyncVarData() { } Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args) { - args.inputs.resize(input_tensors_.size()); + if (current_data.blobs.size() < input_tensor_desc_.size()) { + GELOGE(PARAM_INVALID, "Blob size mismatches, expect at least %zu, but got %zu", + input_tensor_desc_.size(), current_data.blobs.size()); + return PARAM_INVALID; + } + + auto allocator = NpuMemoryAllocator::GetAllocator(device_id_); + GE_CHECK_NOTNULL(allocator); args.input_desc.resize(input_tensor_desc_.size()); const std::vector &blobs = current_data.blobs; - for (const auto &it : input_tensors_) { - auto input_index = it.first; - auto input_tensor = it.second; - auto data_size = input_tensor.GetSize(); - GELOGD("To copy input data for input[%u]", input_index); - if (input_index >= blobs.size()) { - GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", - blobs.size(), model_->input_nodes_.size(), input_index, data_size); - return FAILED; + for (size_t input_index = 0; input_index < input_tensor_desc_.size(); ++input_index) { + auto tensor_size = input_sizes_[input_index]; + if (is_input_dynamic_[input_index]) { + if (input_index >= current_data.shapes.size()) { + GELOGE(PARAM_INVALID, "Shape index out of range, index = %zu, shape size = %zu", + input_index, current_data.shapes.size()); + return PARAM_INVALID; + } + auto &tensor_desc = input_tensor_desc_[input_index]; + tensor_desc->SetShape(GeShape(current_data.shapes[input_index])); + args.input_desc[input_index] = tensor_desc; + GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str()); + GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size), + "Failed to calc tensor size, index = %zu, shape = [%s]", + input_index, + tensor_desc->GetShape().ToString().c_str()); + GELOGD("Input tensor[%zu] size = %zu", input_index, tensor_size); } + GE_CHECK_GE(tensor_size, 0); + auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size); + GE_CHECK_NOTNULL(tensor_buffer); + args.inputs.emplace_back(std::shared_ptr(tensor_buffer.release())); + + GELOGD("To copy input data for input[%u]", input_index); const DataBuffer &data_buf = blobs[input_index]; - auto mem_size = static_cast(data_size); + auto mem_size = static_cast(tensor_size); GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length, PARAM_INVALID, - "input data size(%lu) does not match model required size(%u), ret failed.", + "input data size(%lu) does not match model required size(%lu), ret failed.", data_buf.length, mem_size); GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%u] datasize[%lu]", - model_->root_runtime_param_.graph_id, input_index, input_tensor.GetData(), mem_size, data_buf.length); - GE_CHK_RT_RET(rtMemcpy(input_tensor.MutableData(), + model_->root_runtime_param_.graph_id, + input_index, + args.inputs[input_index].GetData(), + mem_size, + data_buf.length); + GE_CHK_RT_RET(rtMemcpy(args.inputs[input_index].MutableData(), mem_size, data_buf.data, data_buf.length, RT_MEMCPY_HOST_TO_DEVICE)); - args.inputs[input_index] = input_tensor; - if (is_input_dynamic_[input_index]) { - auto &tensor_desc = input_tensor_desc_[input_index]; - tensor_desc->SetShape(GeShape(current_data.shapes[input_index])); - args.input_desc[input_index] = tensor_desc; - GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str()); - } } return SUCCESS; } -Status HybridModelAsyncExecutor::InitInputTensors() { - auto allocator = NpuMemoryAllocator::GetAllocator(device_id_); - GE_CHECK_NOTNULL(allocator); +Status HybridModelAsyncExecutor::InitInputDesc() { int input_index = 0; for (const auto &input_node : model_->GetRootGraphItem()->GetInputNodes()) { GELOGD("Init input[%u], node = %s, is_dynamic = %d", @@ -251,21 +267,21 @@ Status HybridModelAsyncExecutor::InitInputTensors() { input_node->is_dynamic); auto output_desc = input_node->MutableOutputDesc(kDataOutputIndex); GE_CHECK_NOTNULL(output_desc); - int64_t tensor_size = 0; - GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size), - "Failed to get size from %s", - input_node->NodeName().c_str()); - if (tensor_size == 0) { - GELOGW("[%s] Tensor size == 0", input_node->NodeName().c_str()); - GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*output_desc, tensor_size), - "Failed to calc tensor size"); - GELOGD("[%s] Tensor size updated to %ld", input_node->NodeName().c_str(), tensor_size); + int64_t tensor_size = -1; + if (!input_node->is_dynamic) { + GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size), + "Failed to get size from %s", + input_node->NodeName().c_str()); + + if (tensor_size == 0) { + GELOGW("[%s] Tensor size == 0", input_node->NodeName().c_str()); + GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*output_desc, tensor_size), + "Failed to calc tensor size"); + GELOGD("[%s] Tensor size updated to %ld", input_node->NodeName().c_str(), tensor_size); + } } - auto buffer = TensorBuffer::Create(allocator, tensor_size); - GE_CHECK_NOTNULL(buffer); - TensorValue tensor(shared_ptr(buffer.release())); - tensor.SetName("Input_" + input_node->NodeName()); - input_tensors_.emplace(input_index, tensor); + + input_sizes_.emplace(input_index, tensor_size); input_tensor_desc_.emplace(input_index, output_desc); is_input_dynamic_.push_back(input_node->is_dynamic); input_index += 1; diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index ad39cac5..21d2d033 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -53,7 +53,7 @@ class HybridModelAsyncExecutor { Status EnqueueData(const std::shared_ptr &data); private: - Status InitInputTensors(); + Status InitInputDesc(); Status RunInternal(); @@ -85,7 +85,7 @@ class HybridModelAsyncExecutor { uint64_t iterator_count_ = 0; rtStream_t stream_ = nullptr; - std::map input_tensors_; + std::map input_sizes_; std::map input_tensor_desc_; std::vector is_input_dynamic_; std::shared_ptr listener_; diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index 93c6c58c..171ddaf3 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -61,7 +61,7 @@ Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target tensor_desc->SetShape(target.GetShape()); tensor_desc->SetOriginShape(target.GetOriginShape()); (void) TensorUtils::SetSize(*tensor_desc, tensor_size); - if (--num_pending_shapes_ == 0) { + if (--num_pending_shapes_ <= 0) { ready_cv_.notify_all(); } diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc index eb00f509..acc250ef 100644 --- a/ge/hybrid/model/node_item.cc +++ b/ge/hybrid/model/node_item.cc @@ -163,6 +163,25 @@ Status NodeItem::ResolveDynamicState() { Status NodeItem::ResolveStaticInputsAndOutputs() { for (int i = 0; i < num_inputs; ++i) { + // Data has unconnected input but set by framework + if (node_type != DATA) { + int origin_index = i; + if (has_optional_inputs) { + origin_index = input_desc_indices_[i]; + } + auto in_data_anchor = node->GetInDataAnchor(origin_index); + GE_CHECK_NOTNULL(in_data_anchor); + + // If no node was connected to the current input anchor + // increase num_static_input_shapes in case dead wait in ShapeInferenceState::AwaitShapesReady + if (in_data_anchor->GetPeerOutAnchor() == nullptr || + in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) { + num_static_input_shapes++; + is_input_shape_static_.push_back(true); + GELOGW("[%s] Peer node of input[%d] is empty", NodeName().c_str(), i); + continue; + } + } const auto &input_desc = MutableInputDesc(i); GE_CHECK_NOTNULL(input_desc); if (input_desc->MutableShape().IsUnknownShape()) {