| @@ -3530,6 +3530,31 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void DavinciModel::BuildZeroCopyTasksLookupTable() { | |||||
| std::lock_guard<std::mutex> lk(lookup_table_build_lock_); | |||||
| if (lookup_table_built_) { | |||||
| return; | |||||
| } | |||||
| const auto default_label_hash = std::hash<std::string>{}(kDefaultBatchLable); | |||||
| for (auto &task : zero_copy_tasks_) { | |||||
| auto label_hash = std::hash<std::string>{}(task.GetBatchLabel()); | |||||
| auto addr2offsets = task.GetTaskArgsOffset(); | |||||
| label_hash2tasks_[label_hash].insert(&task); | |||||
| if (label_hash == default_label_hash) { | |||||
| for (auto &addr2offset : addr2offsets) { | |||||
| addr2default_label_tasks_[addr2offset.first].insert(&task); | |||||
| } | |||||
| } else { | |||||
| for (auto &addr2offset : addr2offsets) { | |||||
| addr2specific_label_tasks_[addr2offset.first].insert(&task); | |||||
| } | |||||
| } | |||||
| } | |||||
| lookup_table_built_ = true; | |||||
| } | |||||
| /// | /// | ||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Copy Data addr to model for direct use. | /// @brief Copy Data addr to model for direct use. | ||||
| @@ -3551,6 +3576,8 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> & | |||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| BuildZeroCopyTasksLookupTable(); | |||||
| for (const auto &data : data_info) { | for (const auto &data : data_info) { | ||||
| if (data.first >= blobs.size()) { // check data index. | if (data.first >= blobs.size()) { // check data index. | ||||
| REPORT_INNER_ERROR("E19999", "is_input:%d, data index:%u from model >= blobs.size:%zu from user, mode_id:%u" | REPORT_INNER_ERROR("E19999", "is_input:%d, data index:%u from model >= blobs.size:%zu from user, mode_id:%u" | ||||
| @@ -917,6 +917,7 @@ class DavinciModel { | |||||
| Status GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node); | Status GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node); | ||||
| Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node); | Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node); | ||||
| Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node); | Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node); | ||||
| void BuildZeroCopyTasksLookupTable(); | |||||
| bool is_weight_mem_has_inited_; | bool is_weight_mem_has_inited_; | ||||
| bool is_feature_map_mem_has_inited_; | bool is_feature_map_mem_has_inited_; | ||||
| @@ -1112,6 +1113,13 @@ class DavinciModel { | |||||
| // op name to attrs mapping | // op name to attrs mapping | ||||
| std::map<std::string, std::map<std::string, std::vector<std::string>>> op_name_to_attrs_; | std::map<std::string, std::map<std::string, std::vector<std::string>>> op_name_to_attrs_; | ||||
| // fields for build fast search hash table for zero copy tasks | |||||
| std::mutex lookup_table_build_lock_; | |||||
| bool lookup_table_built_{false}; | |||||
| std::unordered_map<size_t, std::unordered_set<ZeroCopyTask*>> label_hash2tasks_; | |||||
| std::unordered_map<uintptr_t, std::unordered_set<ZeroCopyTask*>> addr2specific_label_tasks_; | |||||
| std::unordered_map<uintptr_t, std::unordered_set<ZeroCopyTask*>> addr2default_label_tasks_; | |||||
| std::map<rtStream_t, rtEvent_t> stream_2_event_; | std::map<rtStream_t, rtEvent_t> stream_2_event_; | ||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -54,6 +54,10 @@ Status ZeroCopyTask::SetTaskArgsOffset(uintptr_t addr, size_t offset) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| const std::map<uintptr_t, std::set<size_t >>& ZeroCopyTask::GetTaskArgsOffset() const { | |||||
| return task_addr_offset_; | |||||
| } | |||||
| /** | /** | ||||
| * @ingroup ge | * @ingroup ge | ||||
| * @brief Save orignal data of task args. | * @brief Save orignal data of task args. | ||||
| @@ -46,6 +46,8 @@ class ZeroCopyTask { | |||||
| */ | */ | ||||
| ge::Status SetTaskArgsOffset(uintptr_t addr, size_t offset); | ge::Status SetTaskArgsOffset(uintptr_t addr, size_t offset); | ||||
| const std::map<uintptr_t, std::set<size_t >>& GetTaskArgsOffset() const; | |||||
| /** | /** | ||||
| * @ingroup ge | * @ingroup ge | ||||
| * @brief Is need zero copy. | * @brief Is need zero copy. | ||||