| @@ -86,6 +86,24 @@ class AicpuNodeTaskBase : public NodeTask { | |||
| Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | |||
| Status UpdateEventIdForBlockingAicpuOp(); | |||
| virtual Status CopyDataToHbm(TaskContext &context, | |||
| const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm) = 0; | |||
| /// | |||
| /// read result summary and prepare copy task memory. | |||
| /// @param context task context | |||
| /// @param out_shape_hbm if scalar, TensorBuffer->data is null, size=0 | |||
| /// @return SUCCESS:success other:failed | |||
| /// | |||
| Status ReadResultSummaryAndPrepareMemory(TaskContext &context, | |||
| std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm); | |||
| Status UpdateShapeByHbmBuffer(TaskContext &context, | |||
| const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm); | |||
| Status PrepareCopyInputs(const TaskContext &context, | |||
| const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm); | |||
| protected: | |||
| const NodeItem *node_item_; | |||
| // just reference. | |||
| @@ -114,6 +132,14 @@ class AicpuNodeTaskBase : public NodeTask { | |||
| // for blocking aicpu op | |||
| bool is_blocking_aicpu_op_ = false; | |||
| rtEvent_t rt_event_ = nullptr; | |||
| std::vector<std::unique_ptr<TensorBuffer>> output_summary_; | |||
| std::vector<aicpu::FWKAdapter::ResultSummary> output_summary_host_; | |||
| std::unique_ptr<TensorBuffer> copy_input_release_flag_dev_; | |||
| std::unique_ptr<TensorBuffer> copy_input_data_size_dev_; | |||
| std::unique_ptr<TensorBuffer> copy_input_src_dev_; | |||
| std::unique_ptr<TensorBuffer> copy_input_dst_dev_; | |||
| }; | |||
| class AicpuTfNodeTask : public AicpuNodeTaskBase { | |||
| @@ -178,20 +178,36 @@ class AiCpuBaseTask : public OpTask { | |||
| rtStream_t stream); | |||
| Status UpdateOutputShape(vector<GeTensorDesc> &output_desc); | |||
| Status UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc); | |||
| <<<<<<< HEAD | |||
| Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, | |||
| vector<DataBuffer> &outputs, | |||
| rtStream_t stream); | |||
| Status ReadResultSummaryAndPrepareMemory(); | |||
| ======= | |||
| >>>>>>> 9f5ec82a (fix) | |||
| // for blocking aicpu op | |||
| Status DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream); | |||
| Status UpdateEventIdForBlockingAicpuOp(); | |||
| Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | |||
| Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, | |||
| vector<DataBuffer> &outputs, | |||
| rtStream_t stream); | |||
| Status ReadResultSummaryAndPrepareMemory(); | |||
| Status PrepareCopyInputs(vector<DataBuffer> &outputs); | |||
| Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc); | |||
| <<<<<<< HEAD | |||
| Status PrepareCopyInputs(vector<DataBuffer> &outputs); | |||
| Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc); | |||
| ======= | |||
| >>>>>>> 9f5ec82a (fix) | |||
| virtual Status CopyDataToHbm(vector<DataBuffer> &outputs, rtStream_t stream) = 0; | |||
| protected: | |||
| size_t num_inputs_ = 0; | |||
| @@ -213,6 +229,16 @@ class AiCpuBaseTask : public OpTask { | |||
| // for blocking aicpu op | |||
| bool is_blocking_aicpu_op_ = false; | |||
| rtEvent_t rt_event_ = nullptr; | |||
| std::vector<void *> output_summary_; | |||
| std::vector<aicpu::FWKAdapter::ResultSummary> output_summary_host_; | |||
| void *copy_input_release_flag_dev_ = nullptr; | |||
| void *copy_input_data_size_dev_ = nullptr; | |||
| void *copy_input_src_dev_ = nullptr; | |||
| void *copy_input_dst_dev_ = nullptr; | |||
| vector<void *> out_shape_hbm_; | |||
| }; | |||
| class AiCpuTask : public AiCpuBaseTask { | |||
| @@ -394,3 +394,33 @@ TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_fail) { | |||
| RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||
| EXPECT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); | |||
| } | |||
| TEST_F(UtestSingleOpTask, test_aicpu_task_launch_kernel) { | |||
| AiCpuCCTask task; | |||
| rtStream_t stream = nullptr; | |||
| task.num_inputs_ = 2; | |||
| task.num_outputs_ = 1; | |||
| task.input_is_const_ = {true, false}; | |||
| int total_addr = 3; | |||
| uint32_t* addrs[total_addr] = {nullptr, nullptr, nullptr}; | |||
| task.io_addr_ = reinterpret_cast<uintptr_t*>(addrs); | |||
| task.io_addr_num_ = total_addr; | |||
| ge::hybrid::AicpuExtInfo aicpu_ext_info; | |||
| aicpu_ext_info.infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE; | |||
| aicpu_ext_info.infoLen = sizeof(int32_t); | |||
| int32_t type = ge::DEPEND_COMPUTE; | |||
| memcpy_s(aicpu_ext_info.infoMsg, sizeof(int32_t), &type, sizeof(int32_t)); | |||
| char *ext_mem = (char*)malloc(sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t)); | |||
| memcpy_s(ext_mem, sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t), &aicpu_ext_info, | |||
| sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t)); | |||
| std::string ext_info_str(ext_mem, sizeof(ge::hybrid::AicpuExtInfo) + sizeof(int32_t)); | |||
| vector<DataBuffer> inputs(2, DataBuffer()); | |||
| vector<DataBuffer> outputs(1, DataBuffer()); | |||
| vector<GeTensorDesc> inputs_desc(2, GeTensorDesc(GeShape(), FORMAT_NCHW, DT_FLOAT)); | |||
| vector<GeTensorDesc> outputs_desc(1, GeTensorDesc(GeShape(), FORMAT_NCHW, DT_FLOAT)); | |||
| ASSERT_EQ(task.SetExtInfoAndType(ext_info_str, 0), SUCCESS); | |||
| task.unknown_type_ = ge::DEPEND_COMPUTE; | |||
| task.num_outputs_ = 1; | |||
| ASSERT_EQ(task.InitForSummaryAndCopy(), SUCCESS); | |||
| ASSERT_EQ(task.LaunchKernel(inputs_desc, inputs, outputs_desc, outputs, stream), SUCCESS); | |||
| } | |||