From: @xchu42 Reviewed-by: @ji_chen Signed-off-by:tags/v1.2.0
| @@ -18,6 +18,12 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| namespace { | |||||
| const uint32_t kEndOfSequence = 0x0704000a; | |||||
| const uint32_t kEndOfSequenceNew = 507005; | |||||
| const int32_t kModelAbortNormal = 0x0704000e; | |||||
| const int32_t kModelAbortNormalNew = 507024; | |||||
| } // namespace | |||||
| void GraphExecutionContext::SetErrorCode(Status error_code) { | void GraphExecutionContext::SetErrorCode(Status error_code) { | ||||
| std::lock_guard<std::mutex> lk(mu); | std::lock_guard<std::mutex> lk(mu); | ||||
| this->status = error_code; | this->status = error_code; | ||||
| @@ -27,5 +33,26 @@ Status GraphExecutionContext::GetStatus() const { | |||||
| std::lock_guard<std::mutex> lk(mu); | std::lock_guard<std::mutex> lk(mu); | ||||
| return this->status; | return this->status; | ||||
| } | } | ||||
| Status GraphExecutionContext::Synchronize(rtStream_t rt_stream) { | |||||
| auto rt_ret = rtStreamSynchronize(rt_stream); | |||||
| if (rt_ret == RT_ERROR_NONE) { | |||||
| return SUCCESS; | |||||
| } | |||||
| if (rt_ret == kEndOfSequence || rt_ret == kEndOfSequenceNew) { | |||||
| GELOGI("Got end of sequence"); | |||||
| is_eos_ = true; | |||||
| return END_OF_SEQUENCE; | |||||
| } | |||||
| if (rt_ret == kModelAbortNormal || rt_ret == kModelAbortNormalNew) { | |||||
| GELOGI("The model with multiple datasets aborts normally"); | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGE(RT_FAILED, "Failed to invoke rtStreamSynchronize, ret = %d", rt_ret); | |||||
| return RT_FAILED; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -31,11 +31,26 @@ | |||||
| #include "hybrid/executor/rt_callback_manager.h" | #include "hybrid/executor/rt_callback_manager.h" | ||||
| #include "hybrid/model/hybrid_model.h" | #include "hybrid/model/hybrid_model.h" | ||||
| // If expr is not SUCCESS, print the log and return the same value | |||||
| #define HYBRID_CHK_STATUS_RET(expr, ...) \ | |||||
| do { \ | |||||
| const ge::Status _status = (expr); \ | |||||
| if (_status != ge::SUCCESS) { \ | |||||
| if (_status == ge::END_OF_SEQUENCE) { \ | |||||
| GELOGD("Got end of sequence"); \ | |||||
| } else { \ | |||||
| GELOGE(_status, __VA_ARGS__); \ | |||||
| } \ | |||||
| return _status; \ | |||||
| } \ | |||||
| } while (0) | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| struct GraphExecutionContext { | struct GraphExecutionContext { | ||||
| void SetErrorCode(Status error_code); | void SetErrorCode(Status error_code); | ||||
| Status GetStatus() const; | Status GetStatus() const; | ||||
| Status Synchronize(rtStream_t rt_stream); | |||||
| uint64_t session_id = 0; | uint64_t session_id = 0; | ||||
| const HybridModel *model = nullptr; | const HybridModel *model = nullptr; | ||||
| @@ -49,6 +64,7 @@ struct GraphExecutionContext { | |||||
| DumpProperties dump_properties; | DumpProperties dump_properties; | ||||
| bool trace_enabled = false; | bool trace_enabled = false; | ||||
| bool dump_enabled = false; | bool dump_enabled = false; | ||||
| std::atomic_bool is_eos_; | |||||
| long profiling_level = 0; | long profiling_level = 0; | ||||
| long iteration = 0; | long iteration = 0; | ||||
| Status status = SUCCESS; | Status status = SUCCESS; | ||||
| @@ -24,7 +24,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| namespace { | namespace { | ||||
| int kDataOutputIndex = 0; | |||||
| const int kDataOutputIndex = 0; | |||||
| } | } | ||||
| HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model) | HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model) | ||||
| : model_(model), run_flag_(false) { | : model_(model), run_flag_(false) { | ||||
| @@ -157,9 +157,10 @@ Status HybridModelAsyncExecutor::HandleResult(Status exec_ret, | |||||
| OutputData *output_data) { | OutputData *output_data) { | ||||
| GELOGD("Start to handle result. model id = %u, data index = %u, execution ret = %u", model_id_, data_id, exec_ret); | GELOGD("Start to handle result. model id = %u, data index = %u, execution ret = %u", model_id_, data_id, exec_ret); | ||||
| std::vector<ge::OutputTensorInfo> output_tensor_info_list; | std::vector<ge::OutputTensorInfo> output_tensor_info_list; | ||||
| if (exec_ret == END_OF_SEQUENCE) { | |||||
| GELOGW("End of sequence, model id = %u", model_id_); | |||||
| return OnComputeDone(data_id, END_OF_SEQUENCE, output_tensor_info_list); | |||||
| if (args.is_eos) { | |||||
| GELOGI("End of sequence, model id = %u", model_id_); | |||||
| GE_CHK_STATUS_RET_NOLOG(OnComputeDone(data_id, END_OF_SEQUENCE, output_tensor_info_list)); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| if (exec_ret != SUCCESS) { | if (exec_ret != SUCCESS) { | ||||
| @@ -50,15 +50,18 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { | |||||
| auto ret = ExecuteGraphInternal(executor, args); | auto ret = ExecuteGraphInternal(executor, args); | ||||
| Cleanup(); | Cleanup(); | ||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[Cleanup] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[Cleanup] End"); | ||||
| GE_CHK_STATUS_RET(ret, "Failed to execute model"); | |||||
| GELOGD("Model executed successfully."); | GELOGD("Model executed successfully."); | ||||
| if (context_.profiler != nullptr) { | if (context_.profiler != nullptr) { | ||||
| context_.profiler->Dump(std::cout); | context_.profiler->Dump(std::cout); | ||||
| context_.profiler->Reset(); | context_.profiler->Reset(); | ||||
| } | } | ||||
| context_.iteration += 1; | context_.iteration += 1; | ||||
| if (ret == END_OF_SEQUENCE) { | |||||
| args.is_eos = true; | |||||
| } else { | |||||
| GE_CHK_STATUS_RET(ret, "Failed to execute model"); | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -68,13 +71,13 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||||
| GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); | GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); | ||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); | ||||
| GE_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc), "Failed to execute partitioned call."); | |||||
| HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc), "Failed to execute partitioned call."); | |||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | ||||
| GE_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | |||||
| HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | |||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | ||||
| GE_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); | |||||
| HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); | |||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -94,6 +97,7 @@ Status HybridModelExecutor::InitExecutionContext() { | |||||
| context_.stream = stream_; | context_.stream = stream_; | ||||
| context_.model = model_; | context_.model = model_; | ||||
| context_.is_eos_ = false; | |||||
| context_.session_id = ::ge::GetContext().SessionId(); | context_.session_id = ::ge::GetContext().SessionId(); | ||||
| context_.ge_context = &GetThreadLocalContext(); | context_.ge_context = &GetThreadLocalContext(); | ||||
| GELOGD("session id from model = %lu, from context = %lu", model_->GetSessionId(), context_.session_id); | GELOGD("session id from model = %lu, from context = %lu", model_->GetSessionId(), context_.session_id); | ||||
| @@ -31,6 +31,7 @@ class HybridModelExecutor { | |||||
| std::vector<ConstGeTensorDescPtr> input_desc; | std::vector<ConstGeTensorDescPtr> input_desc; | ||||
| std::vector<TensorValue> outputs; | std::vector<TensorValue> outputs; | ||||
| std::vector<ConstGeTensorDescPtr> output_desc; | std::vector<ConstGeTensorDescPtr> output_desc; | ||||
| bool is_eos = false; | |||||
| }; | }; | ||||
| HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream); | HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream); | ||||
| @@ -98,6 +98,11 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex | |||||
| break; | break; | ||||
| } | } | ||||
| if (context.is_eos_) { | |||||
| GELOGD("[%s] Await pending shape cancelled due to end of sequence", node_item.NodeName().c_str()); | |||||
| return END_OF_SEQUENCE; | |||||
| } | |||||
| if (context.GetStatus() != SUCCESS) { | if (context.GetStatus() != SUCCESS) { | ||||
| GELOGE(FAILED, "[%s] Await pending shape cancelled", node_item.NodeName().c_str()); | GELOGE(FAILED, "[%s] Await pending shape cancelled", node_item.NodeName().c_str()); | ||||
| break; | break; | ||||
| @@ -114,7 +119,8 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex | |||||
| auto idx = p.first; | auto idx = p.first; | ||||
| auto &future = p.second; | auto &future = p.second; | ||||
| RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx); | RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx); | ||||
| auto src_tensor_desc = future.GetTensorDesc(); | |||||
| GeTensorDescPtr src_tensor_desc; | |||||
| GE_CHK_STATUS_RET_NOLOG(future.GetTensorDesc(src_tensor_desc)); | |||||
| GE_CHECK_NOTNULL(src_tensor_desc); | GE_CHECK_NOTNULL(src_tensor_desc); | ||||
| RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx); | RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx); | ||||
| @@ -156,10 +162,11 @@ Status NodeState::AwaitInputTensors(GraphExecutionContext &context) const { | |||||
| node_item_->NodeName().c_str(), | node_item_->NodeName().c_str(), | ||||
| "[AwaitNodeDone] [%s] Start", | "[AwaitNodeDone] [%s] Start", | ||||
| src_node->GetName().c_str()); | src_node->GetName().c_str()); | ||||
| if (!subgraph_context_->Await(src_node)) { | |||||
| GELOGE(INTERNAL_ERROR, "[%s] Await node [%s] failed.", GetName().c_str(), src_node->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| HYBRID_CHK_STATUS_RET(subgraph_context_->Await(src_node), | |||||
| "[%s] Await node [%s] failed.", | |||||
| GetName().c_str(), | |||||
| src_node->GetName().c_str()); | |||||
| RECORD_EXECUTION_EVENT(&context, | RECORD_EXECUTION_EVENT(&context, | ||||
| node_item_->NodeName().c_str(), | node_item_->NodeName().c_str(), | ||||
| @@ -183,24 +190,18 @@ Status NodeState::WaitForPrepareDone() { | |||||
| Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { | Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { | ||||
| GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); | GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); | ||||
| if (!subgraph_context_->Await(src_node_)) { | |||||
| GELOGE(INTERNAL_ERROR, "cancelled"); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| HYBRID_CHK_STATUS_RET(subgraph_context_->Await(src_node_), "cancelled"); | |||||
| shape = src_node_->GetOpDesc()->MutableOutputDesc(src_index_)->MutableShape(); | shape = src_node_->GetOpDesc()->MutableOutputDesc(src_index_)->MutableShape(); | ||||
| ori_shape = src_node_->GetOpDesc()->MutableOutputDesc(src_index_)->GetOriginShape(); | ori_shape = src_node_->GetOpDesc()->MutableOutputDesc(src_index_)->GetOriginShape(); | ||||
| GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str()); | GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| GeTensorDescPtr ShapeFuture::GetTensorDesc() { | |||||
| Status ShapeFuture::GetTensorDesc(GeTensorDescPtr &tensor_desc) { | |||||
| GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); | GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); | ||||
| if (!subgraph_context_->Await(src_node_)) { | |||||
| GELOGE(INTERNAL_ERROR, "cancelled"); | |||||
| return nullptr; | |||||
| } | |||||
| return src_node_->GetOpDesc()->MutableOutputDesc(src_index_); | |||||
| HYBRID_CHK_STATUS_RET(subgraph_context_->Await(src_node_), "cancelled"); | |||||
| tensor_desc = src_node_->GetOpDesc()->MutableOutputDesc(src_index_); | |||||
| return SUCCESS; | |||||
| } | } | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -35,7 +35,7 @@ class ShapeFuture { | |||||
| ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context); | ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context); | ||||
| ~ShapeFuture() = default; | ~ShapeFuture() = default; | ||||
| Status Get(GeShape &ori_shape, GeShape &shape); | Status Get(GeShape &ori_shape, GeShape &shape); | ||||
| GeTensorDescPtr GetTensorDesc(); | |||||
| Status GetTensorDesc(GeTensorDescPtr &tensor_desc); | |||||
| private: | private: | ||||
| NodePtr src_node_; | NodePtr src_node_; | ||||
| @@ -17,11 +17,12 @@ | |||||
| #include "subgraph_context.h" | #include "subgraph_context.h" | ||||
| #include "common/debug/log.h" | #include "common/debug/log.h" | ||||
| #include "hybrid/executor/hybrid_model_executor.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| SubgraphContext::SubgraphContext(const GraphItem *graph_item) : graph_item_(graph_item) { | |||||
| SubgraphContext::SubgraphContext(const GraphItem *graph_item, const GraphExecutionContext *execution_context) | |||||
| : graph_item_(graph_item), execution_context_(execution_context) { | |||||
| } | } | ||||
| Status SubgraphContext::Init() { | Status SubgraphContext::Init() { | ||||
| @@ -111,12 +112,22 @@ Status SubgraphContext::GetOutputs(std::vector<TensorValue> &outputs) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| bool SubgraphContext::Await(const NodePtr &node) { | |||||
| return node_done_manager_.Await(node); | |||||
| Status SubgraphContext::Await(const NodePtr &node) { | |||||
| if (node_done_manager_.Await(node)) { | |||||
| return SUCCESS; | |||||
| } | |||||
| if (execution_context_->is_eos_) { | |||||
| return END_OF_SEQUENCE; | |||||
| } | |||||
| return FAILED; | |||||
| } | } | ||||
| void SubgraphContext::OnError(Status error) { | void SubgraphContext::OnError(Status error) { | ||||
| GELOGE(error, "[%s] Error occurred while executing graph.", graph_item_->GetName().c_str()); | |||||
| if (error != END_OF_SEQUENCE) { | |||||
| GELOGE(error, "[%s] Error occurred while executing graph.", graph_item_->GetName().c_str()); | |||||
| } | |||||
| node_done_manager_.Destroy(); | node_done_manager_.Destroy(); | ||||
| } | } | ||||
| @@ -20,6 +20,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "hybrid/common/tensor_value.h" | #include "hybrid/common/tensor_value.h" | ||||
| #include "hybrid/executor/hybrid_execution_context.h" | |||||
| #include "hybrid/executor/node_state.h" | #include "hybrid/executor/node_state.h" | ||||
| #include "hybrid/executor/node_done_manager.h" | #include "hybrid/executor/node_done_manager.h" | ||||
| #include "hybrid/model/graph_item.h" | #include "hybrid/model/graph_item.h" | ||||
| @@ -29,7 +30,7 @@ namespace ge { | |||||
| namespace hybrid { | namespace hybrid { | ||||
| class SubgraphContext { | class SubgraphContext { | ||||
| public: | public: | ||||
| explicit SubgraphContext(const GraphItem *graph_item); | |||||
| explicit SubgraphContext(const GraphItem *graph_item, const GraphExecutionContext *execution_context); | |||||
| ~SubgraphContext() = default; | ~SubgraphContext() = default; | ||||
| Status Init(); | Status Init(); | ||||
| @@ -43,12 +44,13 @@ class SubgraphContext { | |||||
| Status GetInput(int index, TensorValue &tensor); | Status GetInput(int index, TensorValue &tensor); | ||||
| Status GetOutputs(std::vector<TensorValue> &outputs); | Status GetOutputs(std::vector<TensorValue> &outputs); | ||||
| bool Await(const NodePtr &node); | |||||
| Status Await(const NodePtr &node); | |||||
| void NodeDone(const NodePtr &node); | void NodeDone(const NodePtr &node); | ||||
| private: | private: | ||||
| friend class TaskContext; | friend class TaskContext; | ||||
| const GraphItem *graph_item_; | const GraphItem *graph_item_; | ||||
| const GraphExecutionContext *execution_context_; | |||||
| std::mutex mu_; | std::mutex mu_; | ||||
| std::vector<TensorValue> all_inputs_; | std::vector<TensorValue> all_inputs_; | ||||
| std::vector<TensorValue> all_outputs_; | std::vector<TensorValue> all_outputs_; | ||||
| @@ -40,7 +40,7 @@ SubgraphExecutor::~SubgraphExecutor() { | |||||
| Status SubgraphExecutor::Init(const std::vector<TensorValue> &inputs, | Status SubgraphExecutor::Init(const std::vector<TensorValue> &inputs, | ||||
| const std::vector<ConstGeTensorDescPtr> &input_desc) { | const std::vector<ConstGeTensorDescPtr> &input_desc) { | ||||
| subgraph_context_.reset(new(std::nothrow)SubgraphContext(graph_item_)); | |||||
| subgraph_context_.reset(new(std::nothrow)SubgraphContext(graph_item_, context_)); | |||||
| GE_CHECK_NOTNULL(subgraph_context_); | GE_CHECK_NOTNULL(subgraph_context_); | ||||
| GE_CHK_STATUS_RET(subgraph_context_->Init(), "[%s] Failed to init subgraph context.", graph_item_->GetName().c_str()); | GE_CHK_STATUS_RET(subgraph_context_->Init(), "[%s] Failed to init subgraph context.", graph_item_->GetName().c_str()); | ||||
| @@ -139,7 +139,7 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | |||||
| return ExecuteAsyncForKnownShape(inputs); | return ExecuteAsyncForKnownShape(inputs); | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(ScheduleTasks(), "[%s] Failed to execute tasks.", graph_item_->GetName().c_str()); | |||||
| HYBRID_CHK_STATUS_RET(ScheduleTasks(), "[%s] Failed to execute tasks.", graph_item_->GetName().c_str()); | |||||
| GELOGD("[%s] Done executing subgraph successfully.", graph_item_->GetName().c_str()); | GELOGD("[%s] Done executing subgraph successfully.", graph_item_->GetName().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -163,10 +163,10 @@ Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue | |||||
| known_shape_task_context_ = TaskContext::Create(*node_item, context_, subgraph_context_.get()); | known_shape_task_context_ = TaskContext::Create(*node_item, context_, subgraph_context_.get()); | ||||
| GE_CHECK_NOTNULL(known_shape_task_context_); | GE_CHECK_NOTNULL(known_shape_task_context_); | ||||
| GE_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, known_shape_task_context_, *context_), | |||||
| "[%s] Failed to execute node [%s] for known subgraph.", | |||||
| graph_item_->GetName().c_str(), | |||||
| known_shape_task_context_->GetNodeName()); | |||||
| HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, known_shape_task_context_, *context_), | |||||
| "[%s] Failed to execute node [%s] for known subgraph.", | |||||
| graph_item_->GetName().c_str(), | |||||
| known_shape_task_context_->GetNodeName()); | |||||
| GELOGD("[%s] Done execute non-dynamic subgraph successfully.", graph_item_->GetName().c_str()); | GELOGD("[%s] Done execute non-dynamic subgraph successfully.", graph_item_->GetName().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -211,35 +211,34 @@ Status SubgraphExecutor::PrepareNodes() { | |||||
| GE_CHECK_NOTNULL(node_state); | GE_CHECK_NOTNULL(node_state); | ||||
| auto p_node_state = node_state.get(); | auto p_node_state = node_state.get(); | ||||
| if (node_item.node_type == NETOUTPUT) { | |||||
| // Wait for all inputs become valid | |||||
| // after PrepareNodes returned. all output tensors and shapes are valid | |||||
| GE_CHK_STATUS_RET_NOLOG(p_node_state->GetShapeInferenceState().AwaitShapesReady(*context_)); | |||||
| GE_CHK_STATUS_RET_NOLOG(p_node_state->AwaitInputTensors(*context_)); | |||||
| continue; | |||||
| } | |||||
| if (node_item.node_type != NETOUTPUT) { | |||||
| // only do shape inference and compilation for nodes with dynamic shapes. | |||||
| if (node_item.is_dynamic) { | |||||
| auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status { | |||||
| GetContext().SetSessionId(context_->session_id); | |||||
| GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state)); | |||||
| return PrepareForExecution(context_, *p_node_state); | |||||
| }); | |||||
| // only do shape inference and compilation for nodes with dynamic shapes. | |||||
| if (node_item.is_dynamic) { | |||||
| auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status { | |||||
| GetContext().SetSessionId(context_->session_id); | |||||
| GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state)); | |||||
| return PrepareForExecution(context_, *p_node_state); | |||||
| }); | |||||
| p_node_state->SetPrepareFuture(std::move(prepare_future)); | |||||
| } else { | |||||
| GELOGD("[%s] Skipping shape inference and compilation for node with static shape.", node_item.NodeName().c_str()); | |||||
| if (node_item.kernel_task == nullptr) { | |||||
| GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str()); | |||||
| GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_), | |||||
| "[%s] Failed to create task.", p_node_state->GetName().c_str()); | |||||
| p_node_state->SetPrepareFuture(std::move(prepare_future)); | |||||
| } else { | } else { | ||||
| node_state->SetKernelTask(node_item.kernel_task); | |||||
| GELOGD("[%s] Skipping shape inference and compilation for node with static shape.", | |||||
| node_item.NodeName().c_str()); | |||||
| if (node_item.kernel_task == nullptr) { | |||||
| GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str()); | |||||
| GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_), | |||||
| "[%s] Failed to create task.", p_node_state->GetName().c_str()); | |||||
| } else { | |||||
| node_state->SetKernelTask(node_item.kernel_task); | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| if (!ready_queue_.Push(p_node_state)) { | if (!ready_queue_.Push(p_node_state)) { | ||||
| if (context_->is_eos_) { | |||||
| GELOGD("Got end of sequence"); | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGE(INTERNAL_ERROR, "[%s] Error occurs while launching tasks. quit from preparing nodes.", | GELOGE(INTERNAL_ERROR, "[%s] Error occurs while launching tasks. quit from preparing nodes.", | ||||
| graph_item_->GetName().c_str()); | graph_item_->GetName().c_str()); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| @@ -253,10 +252,10 @@ Status SubgraphExecutor::PrepareNodes() { | |||||
| Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) { | Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) { | ||||
| const auto &node_item = *node_state.GetNodeItem(); | const auto &node_item = *node_state.GetNodeItem(); | ||||
| GE_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), | |||||
| "[%s] Failed to InferShape.", node_state.GetName().c_str()); | |||||
| GE_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_item), | |||||
| "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); | |||||
| HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), | |||||
| "[%s] Failed to InferShape.", node_state.GetName().c_str()); | |||||
| HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_item), | |||||
| "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -284,6 +283,15 @@ Status SubgraphExecutor::LaunchTasks() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| if (node_state->GetType() == NETOUTPUT) { | |||||
| // Wait for all inputs become valid | |||||
| // after PrepareNodes returned. all output tensors and shapes are valid | |||||
| GE_CHK_STATUS_RET_NOLOG(node_state->GetShapeInferenceState().AwaitShapesReady(*context_)); | |||||
| GE_CHK_STATUS_RET_NOLOG(node_state->AwaitInputTensors(*context_)); | |||||
| GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str()); | |||||
| continue; | |||||
| } | |||||
| GE_CHK_STATUS_RET_NOLOG(node_state->WaitForPrepareDone()); | GE_CHK_STATUS_RET_NOLOG(node_state->WaitForPrepareDone()); | ||||
| GELOGD("[%s] Start to execute.", node_state->GetName().c_str()); | GELOGD("[%s] Start to execute.", node_state->GetName().c_str()); | ||||
| @@ -291,10 +299,9 @@ Status SubgraphExecutor::LaunchTasks() { | |||||
| GE_CHECK_NOTNULL(task_context); | GE_CHECK_NOTNULL(task_context); | ||||
| task_context->SetForceInferShape(force_infer_shape_); | task_context->SetForceInferShape(force_infer_shape_); | ||||
| auto shared_task_context = std::shared_ptr<TaskContext>(task_context.release()); | auto shared_task_context = std::shared_ptr<TaskContext>(task_context.release()); | ||||
| GE_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_), | |||||
| "[%s] Execute node failed.", | |||||
| node_state->GetName().c_str()); | |||||
| HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_), | |||||
| "[%s] Execute node failed.", | |||||
| node_state->GetName().c_str()); | |||||
| GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str()); | GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str()); | ||||
| } | } | ||||
| } | } | ||||
| @@ -311,7 +318,6 @@ Status SubgraphExecutor::ScheduleTasks() { | |||||
| GELOGD("[%s] Start to execute subgraph.", graph_item_->GetName().c_str()); | GELOGD("[%s] Start to execute subgraph.", graph_item_->GetName().c_str()); | ||||
| auto ret = LaunchTasks(); | auto ret = LaunchTasks(); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "[%s] Failed to execute subgraph.", graph_item_->GetName().c_str()); | |||||
| subgraph_context_->OnError(ret); | subgraph_context_->OnError(ret); | ||||
| context_->SetErrorCode(ret); | context_->SetErrorCode(ret); | ||||
| ready_queue_.Stop(); | ready_queue_.Stop(); | ||||
| @@ -350,7 +356,7 @@ Status SubgraphExecutor::GetOutputs(vector<TensorValue> &outputs, std::vector<Co | |||||
| Status SubgraphExecutor::Synchronize() { | Status SubgraphExecutor::Synchronize() { | ||||
| GELOGD("[%s] Synchronize start.", graph_item_->GetName().c_str()); | GELOGD("[%s] Synchronize start.", graph_item_->GetName().c_str()); | ||||
| GE_CHK_RT_RET(rtStreamSynchronize(context_->stream)); | |||||
| GE_CHK_STATUS_RET_NOLOG(context_->Synchronize(context_->stream)); | |||||
| GELOGD("[%s] Done synchronizing successfully.", graph_item_->GetName().c_str()); | GELOGD("[%s] Done synchronizing successfully.", graph_item_->GetName().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -408,9 +408,9 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, | |||||
| // Wait for dependent nodes(DEPEND_COMPUTE), so that the input tensors are valid. | // Wait for dependent nodes(DEPEND_COMPUTE), so that the input tensors are valid. | ||||
| RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[AwaitDependents] Start"); | RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[AwaitDependents] Start"); | ||||
| GE_CHK_STATUS_RET(node_state.AwaitInputTensors(context), | |||||
| "[%s] Failed to wait for dependent nodes.", | |||||
| node_state.GetName().c_str()); | |||||
| HYBRID_CHK_STATUS_RET(node_state.AwaitInputTensors(context), | |||||
| "[%s] Failed to wait for dependent nodes.", | |||||
| node_state.GetName().c_str()); | |||||
| const auto &node_item = *node_state.GetNodeItem(); | const auto &node_item = *node_state.GetNodeItem(); | ||||
| auto executor = node_item.node_executor; | auto executor = node_item.node_executor; | ||||
| @@ -440,9 +440,9 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, | |||||
| }); | }); | ||||
| } | } | ||||
| RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[ExecuteTask] Start"); | RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[ExecuteTask] Start"); | ||||
| GE_CHK_STATUS_RET(node_item.node_executor->ExecuteTask(*task, task_context, callback), | |||||
| "[%s] Failed to execute task", | |||||
| node_state.GetName().c_str()); | |||||
| HYBRID_CHK_STATUS_RET(node_item.node_executor->ExecuteTask(*task, task_context, callback), | |||||
| "[%s] Failed to execute task", | |||||
| node_state.GetName().c_str()); | |||||
| RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[ExecuteTask] End"); | RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[ExecuteTask] End"); | ||||
| GELOGD("[%s] Done task launch successfully.", node_state.GetName().c_str()); | GELOGD("[%s] Done task launch successfully.", node_state.GetName().c_str()); | ||||
| @@ -99,11 +99,7 @@ Status ShapeInferenceEngine::AwaitDependentNodes(NodeState &node_state) { | |||||
| node_item.NodeName().c_str(), | node_item.NodeName().c_str(), | ||||
| "[AwaitNodeDone] [%s] Start", | "[AwaitNodeDone] [%s] Start", | ||||
| src_node->GetName().c_str()); | src_node->GetName().c_str()); | ||||
| if (!subgraph_context_->Await(src_node)) { | |||||
| GELOGE(INTERNAL_ERROR, "[%s] Await node failed.", src_node->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| HYBRID_CHK_STATUS_RET(subgraph_context_->Await(src_node), "[%s] Await node failed.", src_node->GetName().c_str()); | |||||
| RECORD_SHAPE_INFERENCE_EVENT(execution_context_, | RECORD_SHAPE_INFERENCE_EVENT(execution_context_, | ||||
| node_item.NodeName().c_str(), | node_item.NodeName().c_str(), | ||||
| "[AwaitNodeDone] [%s] End", | "[AwaitNodeDone] [%s] End", | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include "common/formats/formats.h" | #include "common/formats/formats.h" | ||||
| #include "aicpu/common/aicpu_task_struct.h" | #include "aicpu/common/aicpu_task_struct.h" | ||||
| #include "graph/load/new_model_manager/model_manager.h" | #include "graph/load/new_model_manager/model_manager.h" | ||||
| #include "graph/utils/node_utils.h" | |||||
| #include "hybrid/executor/hybrid_execution_context.h" | #include "hybrid/executor/hybrid_execution_context.h" | ||||
| #include "hybrid/model/hybrid_model.h" | #include "hybrid/model/hybrid_model.h" | ||||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | #include "opskernel_manager/ops_kernel_builder_manager.h" | ||||
| @@ -187,7 +188,7 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AicpuNodeTaskBaseExecuteAsync] Start"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AicpuNodeTaskBaseExecuteAsync] Start"); | ||||
| GELOGD("Node[%s] execute async start. unknown_type=%d.", node_name_.c_str(), unknown_type_); | GELOGD("Node[%s] execute async start. unknown_type=%d.", node_name_.c_str(), unknown_type_); | ||||
| GE_CHK_STATUS_RET(LaunchTask(context)); | |||||
| HYBRID_CHK_STATUS_RET(LaunchTask(context), "[%s] Failed to launch task", node_name_.c_str()); | |||||
| uint32_t task_id = 0; | uint32_t task_id = 0; | ||||
| uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
| @@ -346,7 +347,11 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { | |||||
| GE_CHK_RT_RET(rtMemcpy(kernel_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), | GE_CHK_RT_RET(rtMemcpy(kernel_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), | ||||
| &fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), | &fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), | ||||
| RT_MEMCPY_HOST_TO_DEVICE)); | RT_MEMCPY_HOST_TO_DEVICE)); | ||||
| auto node_type = NodeUtils::GetNodeType(node_item_->node); | |||||
| if (node_type.find(GETNEXT) != string::npos) { | |||||
| GELOGD("[%s] Is GetNext, set need sync to true, node type = %s", node_name_.c_str(), node_type.c_str()); | |||||
| need_sync_ = true; | |||||
| } | |||||
| GELOGI("Node[%s] init end.", node_name_.c_str()); | GELOGI("Node[%s] init end.", node_name_.c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -616,6 +621,10 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) { | |||||
| GE_CHK_RT_RET(rtKernelLaunchEx(kernel_buf_->GetData(), kernel_buf_->GetSize(), flag, context.GetStream())); | GE_CHK_RT_RET(rtKernelLaunchEx(kernel_buf_->GetData(), kernel_buf_->GetSize(), flag, context.GetStream())); | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); | ||||
| GELOGD("Node[%s] launch end.", node_name_.c_str()); | GELOGD("Node[%s] launch end.", node_name_.c_str()); | ||||
| if (need_sync_) { | |||||
| GELOGD("[%s] Task needs sync", node_name_.c_str()); | |||||
| GE_CHK_STATUS_RET_NOLOG(context.Synchronize()); | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -144,6 +144,7 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { | |||||
| std::unique_ptr<TensorBuffer> copy_input_data_size_dev_; | std::unique_ptr<TensorBuffer> copy_input_data_size_dev_; | ||||
| std::unique_ptr<TensorBuffer> copy_input_src_dev_; | std::unique_ptr<TensorBuffer> copy_input_src_dev_; | ||||
| std::unique_ptr<TensorBuffer> copy_input_dst_dev_; | std::unique_ptr<TensorBuffer> copy_input_dst_dev_; | ||||
| bool need_sync_ = false; | |||||
| }; | }; | ||||
| class AicpuNodeTask : public AicpuNodeTaskBase { | class AicpuNodeTask : public AicpuNodeTaskBase { | ||||
| @@ -20,6 +20,7 @@ | |||||
| #include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
| #include "init/gelib.h" | #include "init/gelib.h" | ||||
| #include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
| #include "hybrid/executor/hybrid_execution_context.h" | |||||
| #include "hybrid/model/hybrid_model.h" | #include "hybrid/model/hybrid_model.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "opskernel_manager/ops_kernel_builder_manager.h" | #include "opskernel_manager/ops_kernel_builder_manager.h" | ||||
| @@ -44,9 +45,9 @@ Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { | |||||
| } | } | ||||
| Status NodeExecutor::ExecuteTask(NodeTask &task, TaskContext &context, const std::function<void()> &callback) const { | Status NodeExecutor::ExecuteTask(NodeTask &task, TaskContext &context, const std::function<void()> &callback) const { | ||||
| GE_CHK_STATUS_RET(task.ExecuteAsync(context, callback), | |||||
| "Failed to execute task. node = %s", | |||||
| context.GetNodeItem().NodeName().c_str()); | |||||
| HYBRID_CHK_STATUS_RET(task.ExecuteAsync(context, callback), | |||||
| "Failed to execute task. node = %s", | |||||
| context.GetNodeItem().NodeName().c_str()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -494,5 +494,9 @@ const DumpProperties &TaskContext::GetDumpProperties() const { | |||||
| bool TaskContext::NeedCallback() { | bool TaskContext::NeedCallback() { | ||||
| return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0; | return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0; | ||||
| } | } | ||||
| Status TaskContext::Synchronize() { | |||||
| return execution_context_->Synchronize(GetStream()); | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -102,6 +102,8 @@ class TaskContext { | |||||
| uint32_t GetStreamId() const; | uint32_t GetStreamId() const; | ||||
| void SetStreamId(uint32_t stream_id); | void SetStreamId(uint32_t stream_id); | ||||
| Status Synchronize(); | |||||
| bool IsForceInferShape() const; | bool IsForceInferShape() const; | ||||
| void SetForceInferShape(bool force_infer_shape); | void SetForceInferShape(bool force_infer_shape); | ||||
| void *handle_ = nullptr; | void *handle_ = nullptr; | ||||