| @@ -86,6 +86,24 @@ class AicpuNodeTaskBase : public NodeTask { | |||||
| Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | ||||
| Status UpdateEventIdForBlockingAicpuOp(); | Status UpdateEventIdForBlockingAicpuOp(); | ||||
| virtual Status CopyDataToHbm(TaskContext &context, | |||||
| const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm) = 0; | |||||
| /// | |||||
| /// read result summary and prepare copy task memory. | |||||
| /// @param context task context | |||||
| /// @param out_shape_hbm if scalar, TensorBuffer->data is null, size=0 | |||||
| /// @return SUCCESS:success other:failed | |||||
| /// | |||||
| Status ReadResultSummaryAndPrepareMemory(TaskContext &context, | |||||
| std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm); | |||||
| Status UpdateShapeByHbmBuffer(TaskContext &context, | |||||
| const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm); | |||||
| Status PrepareCopyInputs(const TaskContext &context, | |||||
| const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm); | |||||
| protected: | protected: | ||||
| const NodeItem *node_item_; | const NodeItem *node_item_; | ||||
| // just reference. | // just reference. | ||||
| @@ -114,6 +132,14 @@ class AicpuNodeTaskBase : public NodeTask { | |||||
| // for blocking aicpu op | // for blocking aicpu op | ||||
| bool is_blocking_aicpu_op_ = false; | bool is_blocking_aicpu_op_ = false; | ||||
| rtEvent_t rt_event_ = nullptr; | rtEvent_t rt_event_ = nullptr; | ||||
| std::vector<std::unique_ptr<TensorBuffer>> output_summary_; | |||||
| std::vector<aicpu::FWKAdapter::ResultSummary> output_summary_host_; | |||||
| std::unique_ptr<TensorBuffer> copy_input_release_flag_dev_; | |||||
| std::unique_ptr<TensorBuffer> copy_input_data_size_dev_; | |||||
| std::unique_ptr<TensorBuffer> copy_input_src_dev_; | |||||
| std::unique_ptr<TensorBuffer> copy_input_dst_dev_; | |||||
| }; | }; | ||||
| class AicpuTfNodeTask : public AicpuNodeTaskBase { | class AicpuTfNodeTask : public AicpuNodeTaskBase { | ||||
| @@ -178,20 +178,36 @@ class AiCpuBaseTask : public OpTask { | |||||
| rtStream_t stream); | rtStream_t stream); | ||||
| Status UpdateOutputShape(vector<GeTensorDesc> &output_desc); | Status UpdateOutputShape(vector<GeTensorDesc> &output_desc); | ||||
| Status UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc); | Status UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc); | ||||
| <<<<<<< HEAD | |||||
| Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, | Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, | ||||
| vector<DataBuffer> &outputs, | vector<DataBuffer> &outputs, | ||||
| rtStream_t stream); | rtStream_t stream); | ||||
| Status ReadResultSummaryAndPrepareMemory(); | Status ReadResultSummaryAndPrepareMemory(); | ||||
| ======= | |||||
| >>>>>>> 9f5ec82a (fix) | |||||
| // for blocking aicpu op | // for blocking aicpu op | ||||
| Status DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream); | Status DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream); | ||||
| Status UpdateEventIdForBlockingAicpuOp(); | Status UpdateEventIdForBlockingAicpuOp(); | ||||
| Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | ||||
| Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, | |||||
| vector<DataBuffer> &outputs, | |||||
| rtStream_t stream); | |||||
| Status ReadResultSummaryAndPrepareMemory(); | |||||
| Status PrepareCopyInputs(vector<DataBuffer> &outputs); | |||||
| Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc); | |||||
| <<<<<<< HEAD | |||||
| Status PrepareCopyInputs(vector<DataBuffer> &outputs); | Status PrepareCopyInputs(vector<DataBuffer> &outputs); | ||||
| Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc); | Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc); | ||||
| ======= | |||||
| >>>>>>> 9f5ec82a (fix) | |||||
| virtual Status CopyDataToHbm(vector<DataBuffer> &outputs, rtStream_t stream) = 0; | virtual Status CopyDataToHbm(vector<DataBuffer> &outputs, rtStream_t stream) = 0; | ||||
| protected: | protected: | ||||
| size_t num_inputs_ = 0; | size_t num_inputs_ = 0; | ||||
| @@ -213,6 +229,16 @@ class AiCpuBaseTask : public OpTask { | |||||
| // for blocking aicpu op | // for blocking aicpu op | ||||
| bool is_blocking_aicpu_op_ = false; | bool is_blocking_aicpu_op_ = false; | ||||
| rtEvent_t rt_event_ = nullptr; | rtEvent_t rt_event_ = nullptr; | ||||
| std::vector<void *> output_summary_; | |||||
| std::vector<aicpu::FWKAdapter::ResultSummary> output_summary_host_; | |||||
| void *copy_input_release_flag_dev_ = nullptr; | |||||
| void *copy_input_data_size_dev_ = nullptr; | |||||
| void *copy_input_src_dev_ = nullptr; | |||||
| void *copy_input_dst_dev_ = nullptr; | |||||
| vector<void *> out_shape_hbm_; | |||||
| }; | }; | ||||
| class AiCpuTask : public AiCpuBaseTask { | class AiCpuTask : public AiCpuBaseTask { | ||||
| @@ -394,3 +394,33 @@ TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_fail) { | |||||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | ||||
| EXPECT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); | EXPECT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); | ||||
| } | } | ||||
| TEST_F(UtestSingleOpTask, test_aicpu_task_launch_kernel) { | |||||
| AiCpuCCTask task; | |||||
| rtStream_t stream = nullptr; | |||||
| task.num_inputs_ = 2; | |||||
| task.num_outputs_ = 1; | |||||
| task.input_is_const_ = {true, false}; | |||||
| int total_addr = 3; | |||||
| uint32_t* addrs[total_addr] = {nullptr, nullptr, nullptr}; | |||||
| task.io_addr_ = reinterpret_cast<uintptr_t*>(addrs); | |||||
| task.io_addr_num_ = total_addr; | |||||
| ge::hybrid::AicpuExtInfo aicpu_ext_info; | |||||
| aicpu_ext_info.infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE; | |||||
| aicpu_ext_info.infoLen = sizeof(int32_t); | |||||
| int32_t type = ge::DEPEND_COMPUTE; | |||||
| memcpy_s(aicpu_ext_info.infoMsg, sizeof(int32_t), &type, sizeof(int32_t)); | |||||
| char *ext_mem = (char*)malloc(sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t)); | |||||
| memcpy_s(ext_mem, sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t), &aicpu_ext_info, | |||||
| sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t)); | |||||
| std::string ext_info_str(ext_mem, sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t)); | |||||
| vector<DataBuffer> inputs(2, DataBuffer()); | |||||
| vector<DataBuffer> outputs(1, DataBuffer()); | |||||
| vector<GeTensorDesc> inputs_desc(2, GeTensorDesc(GeShape(), FORMAT_NCHW, DT_FLOAT)); | |||||
| vector<GeTensorDesc> outputs_desc(1, GeTensorDesc(GeShape(), FORMAT_NCHW, DT_FLOAT)); | |||||
| ASSERT_EQ(task.SetExtInfoAndType(ext_info_str, 0), SUCCESS); | |||||
| task.unknown_type_ = ge::DEPEND_COMPUTE; | |||||
| task.num_outputs_ = 1; | |||||
| ASSERT_EQ(task.InitForSummaryAndCopy(), SUCCESS); | |||||
| ASSERT_EQ(task.LaunchKernel(inputs_desc, inputs, outputs_desc, outputs, stream), SUCCESS); | |||||
| } | |||||