| @@ -103,6 +103,7 @@ set(TRAIN_SRC_LIST | |||
| "common/profiling/profiling_manager.cc" | |||
| "common/dump/dump_manager.cc" | |||
| "common/dump/dump_properties.cc" | |||
| "common/dump/opdebug_register.cc" | |||
| "common/dump/dump_op.cc" | |||
| "common/profiling/ge_profiling.cc" | |||
| "common/profiling/ge_runner_profiling.cc" | |||
| @@ -427,6 +428,7 @@ set(INFER_SRC_LIST | |||
| "common/dump/dump_properties.cc" | |||
| "common/dump/dump_manager.cc" | |||
| "common/dump/dump_op.cc" | |||
| "common/dump/opdebug_register.cc" | |||
| "common/dump/dump_server.cc" | |||
| "common/helper/model_cache_helper.cc" | |||
| "ge_local_engine/engine/host_cpu_engine.cc" | |||
| @@ -104,8 +104,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const DumpProperties &DumpManager::GetDumpProperties( | |||
| uint64_t session_id) { | |||
| std::lock_guard<std::mutex> lock(mutex_); | |||
| // If session_id is not found in dump_properties_map_, operator[] will insert one. | |||
| return dump_properties_map_[session_id]; | |||
| auto iter = dump_properties_map_.find(session_id); | |||
| if (iter != dump_properties_map_.end()) { | |||
| return iter->second; | |||
| } | |||
| static DumpProperties default_properties; | |||
| return default_properties; | |||
| } | |||
| FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpManager::AddDumpProperties( | |||
| @@ -219,9 +219,9 @@ Status DumpOp::LaunchDumpOp() { | |||
| op_mapping_info.set_dump_path(dump_path); | |||
| op_mapping_info.set_flag(kAicpuLoadFlag); | |||
| op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | |||
| if (!dynamic_model_name_.empty()) { | |||
| op_mapping_info.set_model_id(dynamic_model_id_); | |||
| if (!dynamic_model_name_.empty() && dump_properties_.IsDumpOpen()) { | |||
| op_mapping_info.set_model_name(dynamic_model_name_); | |||
| op_mapping_info.set_model_id(dynamic_model_id_); | |||
| } | |||
| SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | |||
| GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), | |||
| @@ -253,7 +253,7 @@ Status DumpOp::LaunchDumpOp() { | |||
| } | |||
| op_mapping_info.mutable_task()->Add(std::move(task)); | |||
| } | |||
| if (dump_properties_.GetDumpMode() == kDumpAll) { | |||
| if (dump_properties_.GetDumpMode() == kDumpAll || dump_properties_.IsOpDebugOpen()) { | |||
| auto ret = DumpOutput(task); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Dump output failed when in dumping all"); | |||
| @@ -81,11 +81,11 @@ class DumpProperties { | |||
| const std::string &GetEnableDumpDebug() const {return enable_dump_debug_;} | |||
| private: | |||
| void CopyFrom(const DumpProperties &other); | |||
| void SetDumpDebugOptions(); | |||
| std::string enable_dump_; | |||
| std::string enable_dump_debug_; | |||
| @@ -0,0 +1,148 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "opdebug_register.h" | |||
| namespace { | |||
| const size_t kOpDebugMemorySize = 2048UL; | |||
| const size_t kDebugP2pSize = 8UL; | |||
| } // namespace | |||
| namespace ge { | |||
| OpdebugRegister::~OpdebugRegister() {} | |||
| Status OpdebugRegister::RegisterDebugForModel(rtModel_t model_handle, uint32_t op_debug_mode, DataDumper &data_dumper) { | |||
| GELOGD("Start to register debug for model in overflow"); | |||
| auto ret = MallocMemForOpdebug(); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Malloc memory for opdebug in model overflow failed ,ret:0x%X", ret); | |||
| return ret; | |||
| } | |||
| uint32_t debug_stream_id = 0; | |||
| uint32_t debug_task_id = 0; | |||
| auto rt_ret = rtDebugRegister(model_handle, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| GELOGD("debug_task_id:%u, debug_stream_id:%u in model overflow", debug_task_id, debug_stream_id); | |||
| data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true); | |||
| return SUCCESS; | |||
| } | |||
| void OpdebugRegister::UnregisterDebugForModel(rtModel_t model_handle) { | |||
| rtError_t rt_ret = RT_ERROR_NONE; | |||
| if (model_handle != nullptr) { | |||
| GELOGD("start to call rtDebugUnRegister in model overflow."); | |||
| rt_ret = rtDebugUnRegister(model_handle); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret); | |||
| } | |||
| } | |||
| if (op_debug_addr_ != nullptr) { | |||
| rt_ret = rtFree(op_debug_addr_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
| } | |||
| op_debug_addr_ = nullptr; | |||
| } | |||
| if (p2p_debug_addr_ != nullptr) { | |||
| rt_ret = rtFree(p2p_debug_addr_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
| } | |||
| p2p_debug_addr_ = nullptr; | |||
| } | |||
| return; | |||
| } | |||
| Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_debug_mode, DataDumper &data_dumper) { | |||
| GELOGD("Start to register debug for stream in stream overflow"); | |||
| auto ret = MallocMemForOpdebug(); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret, "Malloc memory for opdebug in stream overflow ,ret:0x%X", ret); | |||
| return ret; | |||
| } | |||
| uint32_t debug_stream_id = 0; | |||
| uint32_t debug_task_id = 0; | |||
| #ifdef ONLY_COMPILE_OPEN_SRC | |||
| auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| #endif | |||
| GELOGD("debug_task_id:%u, debug_stream_id:%u in stream overflow.", debug_task_id, debug_stream_id); | |||
| data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true); | |||
| return SUCCESS; | |||
| } | |||
| void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) { | |||
| rtError_t rt_ret = RT_ERROR_NONE; | |||
| #ifdef ONLY_COMPILE_OPEN_SRC | |||
| if (stream != nullptr) { | |||
| GELOGD("start call rtDebugUnRegisterForStream in unknown shape over flow."); | |||
| rt_ret = rtDebugUnRegisterForStream(stream); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtDebugUnRegisterForStream failed, ret: 0x%X", rt_ret); | |||
| } | |||
| } | |||
| #endif | |||
| if (op_debug_addr_ != nullptr) { | |||
| rt_ret = rtFree(op_debug_addr_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
| } | |||
| op_debug_addr_ = nullptr; | |||
| } | |||
| if (p2p_debug_addr_ != nullptr) { | |||
| rt_ret = rtFree(p2p_debug_addr_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
| } | |||
| p2p_debug_addr_ = nullptr; | |||
| } | |||
| return; | |||
| } | |||
| Status OpdebugRegister::MallocMemForOpdebug() { | |||
| rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| uint64_t debug_addrs_tmp = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr_)); | |||
| // For data dump, aicpu needs the pointer to pointer that save the real debug address. | |||
| rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace ge | |||
| @@ -0,0 +1,44 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef GE_COMMON_DUMP_OPDEBUG_REGISTER_H_ | |||
| #define GE_COMMON_DUMP_OPDEBUG_REGISTER_H_ | |||
| #include <map> | |||
| #include "common/debug/ge_log.h" | |||
| #include "common/debug/log.h" | |||
| #include "graph/load/model_manager/data_dumper.h" | |||
| namespace ge { | |||
| class OpdebugRegister { | |||
| public: | |||
| OpdebugRegister() = default; | |||
| ~OpdebugRegister(); | |||
| Status RegisterDebugForModel(rtModel_t model_handle, uint32_t op_debug_mode, DataDumper &data_dumper); | |||
| void UnregisterDebugForModel(rtModel_t model_handle); | |||
| Status RegisterDebugForStream(rtStream_t stream, uint32_t op_debug_mode, DataDumper &data_dumper); | |||
| void UnregisterDebugForStream(rtStream_t stream); | |||
| private: | |||
| Status MallocMemForOpdebug(); | |||
| void *op_debug_addr_ = nullptr; | |||
| void *p2p_debug_addr_ = nullptr; | |||
| }; | |||
| } // namespace ge | |||
| #endif // GE_COMMON_DUMP_OPDEBUG_REGISTER_H_ | |||
| @@ -17,6 +17,7 @@ set(SRC_LIST | |||
| "../common/dump/dump_properties.cc" | |||
| "../common/dump/dump_manager.cc" | |||
| "../common/dump/dump_op.cc" | |||
| "../common/dump/opdebug_register.cc" | |||
| "../common/profiling/ge_profiling.cc" | |||
| "../graph/load/graph_loader.cc" | |||
| "../graph/execute/graph_execute.cc" | |||
| @@ -36,21 +36,9 @@ | |||
| namespace ge { | |||
| class DataDumper { | |||
| public: | |||
| explicit DataDumper(const RuntimeParam &rsh) | |||
| : model_name_(), | |||
| model_id_(0), | |||
| runtime_param_(rsh), | |||
| dev_mem_load_(nullptr), | |||
| dev_mem_unload_(nullptr), | |||
| op_list_(), | |||
| input_map_(), | |||
| load_flag_(false), | |||
| device_id_(0), | |||
| global_step_(0), | |||
| loop_per_iter_(0), | |||
| loop_cond_(0), | |||
| compute_graph_(nullptr), | |||
| ref_info_() {} | |||
| DataDumper() : runtime_param_{} {} | |||
| explicit DataDumper(const RuntimeParam &rsh) : runtime_param_(rsh) {} | |||
| ~DataDumper(); | |||
| @@ -105,10 +93,10 @@ class DataDumper { | |||
| // for inference data dump | |||
| std::string om_name_; | |||
| uint32_t model_id_; | |||
| uint32_t model_id_ = 0; | |||
| const RuntimeParam &runtime_param_; | |||
| void *dev_mem_load_; | |||
| void *dev_mem_unload_; | |||
| void *dev_mem_load_ = nullptr; | |||
| void *dev_mem_unload_ = nullptr; | |||
| struct InnerDumpInfo; | |||
| struct InnerInputMapping; | |||
| @@ -119,16 +107,15 @@ class DataDumper { | |||
| uint32_t end_graph_stream_id_ = 0; | |||
| bool is_end_graph_ = false; | |||
| std::multimap<std::string, InnerInputMapping> input_map_; // release after DavinciModel::Init | |||
| bool load_flag_; | |||
| uint32_t device_id_; | |||
| uintptr_t global_step_; | |||
| uintptr_t loop_per_iter_; | |||
| uintptr_t loop_cond_; | |||
| ComputeGraphPtr compute_graph_; // release after DavinciModel::Init | |||
| std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init | |||
| bool load_flag_ = false; | |||
| uint32_t device_id_ = 0; | |||
| uintptr_t global_step_ = 0; | |||
| uintptr_t loop_per_iter_ = 0; | |||
| uintptr_t loop_cond_ = 0; | |||
| ComputeGraphPtr compute_graph_ = nullptr; // release after DavinciModel::Init | |||
| std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init | |||
| void *l1_fusion_addr_ = nullptr; | |||
| uint32_t op_debug_task_id_ = 0; | |||
| uint32_t op_debug_stream_id_ = 0; | |||
| void *op_debug_addr_ = nullptr; | |||
| @@ -144,20 +131,16 @@ class DataDumper { | |||
| Status DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); | |||
| Status DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); | |||
| Status DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i, | |||
| const std::string &node_name_index); | |||
| const std::string &node_name_index); | |||
| Status ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); | |||
| void SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, aicpu::dump::OpMappingInfo &op_mapping_info); | |||
| void SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, | |||
| aicpu::dump::OpMappingInfo &op_mapping_info); | |||
| Status ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); | |||
| Status GenerateInput(aicpu::dump::Input &input, | |||
| const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
| const uintptr_t &addr, | |||
| size_t index); | |||
| Status GenerateOutput(aicpu::dump::Output &output, | |||
| const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
| const uintptr_t &addr, | |||
| size_t index); | |||
| Status GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
| const uintptr_t &addr, size_t index); | |||
| Status GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
| const uintptr_t &addr, size_t index); | |||
| void GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task); | |||
| }; | |||
| struct DataDumper::InnerDumpInfo { | |||
| @@ -232,6 +232,8 @@ DavinciModel::~DavinciModel() { | |||
| FreeP2PMem(); | |||
| OpDebugUnRegister(); | |||
| if (l1_fusion_addr_ != nullptr) { | |||
| GE_CHK_RT(rtFree(l1_fusion_addr_)); | |||
| } | |||
| @@ -242,8 +244,6 @@ DavinciModel::~DavinciModel() { | |||
| } | |||
| } | |||
| OpDebugUnRegister(); | |||
| ReleaseTask(); | |||
| CleanTbeHandle(); | |||
| @@ -568,77 +568,21 @@ Status DavinciModel::SetTSDevice() { | |||
| } | |||
| Status DavinciModel::OpDebugRegister() { | |||
| bool is_op_debug = false; | |||
| (void)ge::AttrUtils::GetBool(ge_model_, ATTR_OP_DEBUG_FLAG, is_op_debug); | |||
| GELOGD("The value of op debug in ge_model is %d.", is_op_debug); | |||
| if (is_op_debug) { | |||
| debug_reg_mutex_.lock(); | |||
| rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| uint64_t debug_addrs_tmp = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr_)); | |||
| // For data dump, aicpu needs the pointer to pointer that save the real debug address. | |||
| rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| uint32_t op_debug_mode = 0; | |||
| (void)ge::AttrUtils::GetInt(ge_model_, ATTR_OP_DEBUG_MODE, op_debug_mode); | |||
| GELOGD("The value of op_debug_mode in ge_model_ is %u.", op_debug_mode); | |||
| uint32_t debug_task_id = 0; | |||
| uint32_t debug_stream_id = 0; | |||
| rt_ret = rtDebugRegister(rt_model_handle_, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| if (GetDumpProperties().IsOpDebugOpen()) { | |||
| uint32_t op_debug_mode = GetDumpProperties().GetOpDebugMode(); | |||
| auto ret = opdebug_register_.RegisterDebugForModel(rt_model_handle_, op_debug_mode, data_dumper_); | |||
| if (ret != SUCCESS) { | |||
| GELOGE(ret,"Register known shape op debug failed, ret: 0x%X",ret); | |||
| return ret; | |||
| } | |||
| GELOGI("debug_task_id:%d, debug_stream_id:%u", debug_task_id, debug_stream_id); | |||
| is_op_debug_reg_ = true; | |||
| data_dumper_.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, is_op_debug); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| void DavinciModel::OpDebugUnRegister() { | |||
| if (is_op_debug_reg_) { | |||
| debug_reg_mutex_.unlock(); | |||
| rtError_t rt_ret = RT_ERROR_NONE; | |||
| if (rt_model_handle_ != nullptr) { | |||
| GELOGD("start call debug_unregister."); | |||
| rt_ret = rtDebugUnRegister(rt_model_handle_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret); | |||
| } | |||
| } | |||
| if (op_debug_addr_ != nullptr) { | |||
| rt_ret = rtFree(op_debug_addr_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
| } | |||
| op_debug_addr_ = nullptr; | |||
| } | |||
| if (p2p_debug_addr_ != nullptr) { | |||
| rt_ret = rtFree(p2p_debug_addr_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
| } | |||
| p2p_debug_addr_ = nullptr; | |||
| } | |||
| opdebug_register_.UnregisterDebugForModel(rt_model_handle_); | |||
| is_op_debug_reg_ = false; | |||
| } | |||
| return; | |||
| @@ -29,6 +29,7 @@ | |||
| #include "common/helper/om_file_helper.h" | |||
| #include "common/opskernel/ge_task_info.h" | |||
| #include "common/properties_manager.h" | |||
| #include "common/dump/opdebug_register.h" | |||
| #include "common/types.h" | |||
| #include "framework/common/util.h" | |||
| #include "graph/debug/ge_attr_define.h" | |||
| @@ -984,6 +985,7 @@ class DavinciModel { | |||
| int64_t maxDumpOpNum_; | |||
| // for data dump | |||
| DataDumper data_dumper_; | |||
| OpdebugRegister opdebug_register_; | |||
| uint64_t iterator_count_; | |||
| bool is_l1_fusion_enable_; | |||
| map<OpDescPtr, void *> saved_task_addrs_; // release after DavinciModel::Init | |||
| @@ -1021,8 +1023,6 @@ class DavinciModel { | |||
| // for op debug | |||
| mutex debug_reg_mutex_; | |||
| bool is_op_debug_reg_ = false; | |||
| void *op_debug_addr_ = nullptr; | |||
| void *p2p_debug_addr_ = nullptr; | |||
| bool is_online_infer_dynamic_ = false; | |||
| bool is_getnext_sink_dynamic_ = false; | |||
| vector<int32_t> cur_dynamic_dims_; | |||
| @@ -85,6 +85,10 @@ Status HybridModelAsyncExecutor::Stop() { | |||
| ret = future_.get(); | |||
| } | |||
| if (is_op_debug_reg_) { | |||
| op_debug_register_.UnregisterDebugForStream(stream_); | |||
| } | |||
| if (stream_ != nullptr) { | |||
| GE_CHK_RT(rtStreamDestroy(stream_)); | |||
| stream_ = nullptr; | |||
| @@ -101,6 +105,7 @@ Status HybridModelAsyncExecutor::Init() { | |||
| executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_)); | |||
| GE_CHECK_NOTNULL(executor_); | |||
| GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine"); | |||
| GE_CHK_STATUS_RET(DumpOpDebug(),"Dump op debug failed in hybrid engine"); | |||
| GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups()); | |||
| if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) { | |||
| @@ -508,5 +513,40 @@ Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector< | |||
| return SUCCESS; | |||
| } | |||
| Status HybridModelAsyncExecutor::DumpOpDebug() { | |||
| const DumpProperties &dump_properties = executor_->GetContext()->dump_properties; | |||
| if (dump_properties.IsOpDebugOpen()) { | |||
| GELOGD("Opdebug is open in hybrid engine"); | |||
| uint32_t op_debug_mode = dump_properties.GetOpDebugMode(); | |||
| GE_CHK_RT_RET(op_debug_register_.RegisterDebugForStream(stream_, op_debug_mode, data_dumper_)); | |||
| is_op_debug_reg_ = true; | |||
| data_dumper_.SetDumpProperties(dump_properties); | |||
| data_dumper_.SetModelName(model_->GetModelName()); | |||
| data_dumper_.SetModelId(model_->GetModelId()); | |||
| data_dumper_.SetDeviceId(model_->GetDeviceId()); | |||
| void *global_step = nullptr; | |||
| TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP); | |||
| if (varible_global_step != nullptr) { | |||
| global_step = const_cast<void *>(varible_global_step->GetData()); | |||
| } | |||
| void *loop_per_iter = nullptr; | |||
| TensorValue *varible_loop_per_iter = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER); | |||
| if (varible_loop_per_iter != nullptr) { | |||
| loop_per_iter = const_cast<void *>(varible_loop_per_iter->GetData()); | |||
| } | |||
| void *loop_cond = nullptr; | |||
| TensorValue *varible_loop_cond = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_COND); | |||
| if (varible_loop_cond != nullptr) { | |||
| loop_cond = const_cast<void *>(varible_loop_cond->GetData()); | |||
| } | |||
| data_dumper_.SetLoopAddr(global_step, loop_per_iter, loop_cond); | |||
| GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "LoadDumpInfo failed in hybrid engine"); | |||
| GELOGD("Dump op debug SUCCESS in hybrid engine"); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -21,7 +21,9 @@ | |||
| #include <future> | |||
| #include "external/ge/ge_api_error_codes.h" | |||
| #include "external/ge/ge_api_types.h" | |||
| #include "common/dump/opdebug_register.h" | |||
| #include "graph/load/model_manager/data_inputer.h" | |||
| #include "graph/load/model_manager/data_dumper.h" | |||
| #include "hybrid/executor/hybrid_model_executor.h" | |||
| #include "hybrid/executor/hybrid_model_pipeline_executor.h" | |||
| #include "runtime/stream.h" | |||
| @@ -77,6 +79,8 @@ class HybridModelAsyncExecutor { | |||
| Status PrepareInputs(const InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args); | |||
| Status DumpOpDebug(); | |||
| std::mutex mu_; | |||
| HybridModel *model_; | |||
| uint32_t device_id_ = 0U; | |||
| @@ -94,6 +98,9 @@ class HybridModelAsyncExecutor { | |||
| std::vector<bool> is_input_dynamic_; | |||
| std::shared_ptr<ModelListener> listener_; | |||
| string om_name_; | |||
| DataDumper data_dumper_; | |||
| bool is_op_debug_reg_ = false; | |||
| OpdebugRegister op_debug_register_; | |||
| }; | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -266,9 +266,9 @@ Status NodeDoneCallback::OnNodeDone() { | |||
| RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[Compute] End"); | |||
| RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[Callback] Start"); | |||
| auto dump_path = context_->GetDumpProperties().GetDumpPath(); | |||
| if (!dump_path.empty()) { | |||
| GELOGI("Start to dump dynamic shape,dump_path is %s", dump_path.c_str()); | |||
| const DumpProperties &dump_properties = context_->GetDumpProperties(); | |||
| if (dump_properties.IsDumpOpen() || context_->IsOverFlow()) { | |||
| GELOGI("Start to dump dynamic shape op"); | |||
| GE_CHK_STATUS_RET(DumpDynamicNode(), "Failed to dump dynamic node"); | |||
| } | |||
| @@ -61,6 +61,10 @@ class HybridModel { | |||
| device_id_ = device_id; | |||
| } | |||
| uint32_t GetDeviceId() { | |||
| return device_id_; | |||
| } | |||
| void SetModelId(uint32_t model_id) { | |||
| model_id_ = model_id; | |||
| } | |||
| @@ -17,6 +17,7 @@ | |||
| #include "aicore_node_executor.h" | |||
| #include "framework/common/taskdown_common.h" | |||
| #include "hybrid/executor/hybrid_execution_context.h" | |||
| #include "external/runtime/rt_error_codes.h" | |||
| namespace ge { | |||
| namespace hybrid { | |||
| @@ -189,6 +190,7 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||
| } | |||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | |||
| GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | |||
| GE_CHK_STATUS_RET_NOLOG(CheckOverflow(context)); | |||
| // save profiling data | |||
| uint32_t task_id = 0; | |||
| uint32_t stream_id = 0; | |||
| @@ -259,6 +261,25 @@ void AiCoreNodeTask::SetWorkspaceSizes(const vector<int64_t> &workspace_sizes) { | |||
| workspace_sizes_ = workspace_sizes; | |||
| } | |||
| Status AiCoreNodeTask::CheckOverflow(TaskContext &context) { | |||
| const DumpProperties &dump_properties = context.GetDumpProperties(); | |||
| if (dump_properties.IsOpDebugOpen()) { | |||
| GELOGD("Op %s is doing overflow check in hybrid engine", context.GetNodeName()); | |||
| auto rt_ret = rtStreamSynchronize(context.GetStream()); | |||
| if (rt_ret == ACL_ERROR_RT_AICORE_OVER_FLOW) { | |||
| context.SetOverFlow(true); | |||
| GELOGW("Dynamic shape op %s is over flow", context.GetNodeName()); | |||
| return SUCCESS; | |||
| } else if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(rt_ret, "rtstreamsynchronize failed"); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| GELOGD("Opdebug is not open in hybrid engine"); | |||
| return SUCCESS; | |||
| } | |||
| TaskCompilerFactory &TaskCompilerFactory::GetInstance() { | |||
| static TaskCompilerFactory instance; | |||
| return instance; | |||
| @@ -62,6 +62,7 @@ class AiCoreNodeTask : public NodeTask { | |||
| const vector<int64_t> &GetWorkspaceSizes() const; | |||
| void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes); | |||
| private: | |||
| Status CheckOverflow(TaskContext &context); | |||
| std::vector<std::unique_ptr<AiCoreOpTask>> tasks_; | |||
| std::vector<int64_t> workspace_sizes_; | |||
| }; | |||
| @@ -124,7 +124,7 @@ Status KnownNodeTask::Init(TaskContext &context) { | |||
| } | |||
| if (!load_flag_) { | |||
| auto dump_properties = context.GetDumpProperties(); | |||
| if (dump_properties.IsDumpOpen()) { | |||
| if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { | |||
| davinci_model_->SetDumpProperties(dump_properties); | |||
| void *global_step = nullptr; | |||
| TensorValue *varible_global_step = context.GetVariable(NODE_NAME_GLOBAL_STEP); | |||
| @@ -350,6 +350,14 @@ void TaskContext::SetStreamId(uint32_t stream_id) { | |||
| stream_id_ = stream_id; | |||
| } | |||
| void TaskContext::SetOverFlow(bool is_over_flow) { | |||
| is_over_flow_ = is_over_flow; | |||
| } | |||
| bool TaskContext::IsOverFlow() { | |||
| return is_over_flow_; | |||
| } | |||
| Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) { | |||
| GE_CHECK_NOTNULL(buffer); | |||
| if (ori_addr == nullptr) { | |||
| @@ -65,6 +65,7 @@ class TaskContext { | |||
| int64_t GetSessionId() const; | |||
| uint64_t GetIterationNumber() const; | |||
| void NodeDone(); | |||
| void OnError(Status error); | |||
| @@ -106,6 +107,9 @@ class TaskContext { | |||
| uint32_t GetStreamId() const; | |||
| void SetStreamId(uint32_t stream_id); | |||
| void SetOverFlow(bool is_over_flow); | |||
| bool IsOverFlow(); | |||
| Status Synchronize(); | |||
| bool IsForceInferShape() const; | |||
| @@ -138,6 +142,7 @@ class TaskContext { | |||
| uint32_t task_id_ = 0; | |||
| uint32_t stream_id_ = 0; | |||
| std::vector<TaskDescInfo> task_desc_info; | |||
| bool is_over_flow_ = false; | |||
| }; | |||
| } // namespace hybrid | |||
| } // namespace ge | |||
| @@ -491,21 +491,18 @@ Status AiCpuBaseTask::UpdateOutputShape(vector<GeTensorDesc> &output_desc) { | |||
| } | |||
| GELOGD("Start to update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape."); | |||
| GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(), | |||
| aicpu_ext_handle_->GetExtInfoLen(), | |||
| ext_info_addr_dev_, | |||
| aicpu_ext_handle_->GetExtInfoLen(), | |||
| RT_MEMCPY_DEVICE_TO_HOST)); | |||
| GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), ext_info_addr_dev_, | |||
| aicpu_ext_handle_->GetExtInfoLen(), RT_MEMCPY_DEVICE_TO_HOST)); | |||
| for (size_t i = 0; i < num_outputs_; ++i) { | |||
| GeShape shape; | |||
| DataType data_type; | |||
| aicpu_ext_handle_->GetOutputShapeAndType(i, shape, data_type); | |||
| GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), | |||
| "AiCpuCCTask Update [%zu]th output shape failed.", i); | |||
| GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), "AiCpuCCTask Update [%zu]th output shape failed.", | |||
| i); | |||
| if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) { | |||
| GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), | |||
| "AiCpuCCTask Update [%zu]th output desc failed.", i); | |||
| GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "AiCpuCCTask Update [%zu]th output desc failed.", | |||
| i); | |||
| } | |||
| } | |||
| GELOGD("Update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape finished."); | |||
| @@ -697,10 +694,10 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) { | |||
| const auto &shape_hbm = out_shape_hbm_[i]; | |||
| uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); | |||
| std::unique_ptr<int64_t[]> shape_addr(new(std::nothrow) int64_t[dim_num]()); | |||
| std::unique_ptr<int64_t[]> shape_addr(new (std::nothrow) int64_t[dim_num]()); | |||
| GE_CHECK_NOTNULL(shape_addr); | |||
| GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, | |||
| shape_hbm, result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||
| GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm, | |||
| result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||
| for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { | |||
| shape_dims.emplace_back(shape_addr[dim_idx]); | |||
| @@ -711,13 +708,14 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) { | |||
| GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]), | |||
| "AiCpuTask update [%zu]th output shape failed.", i); | |||
| if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) { | |||
| GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), | |||
| "AiCpuTask update [%zu]th output desc failed.", i); | |||
| GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "AiCpuTask update [%zu]th output desc failed.", | |||
| i); | |||
| } | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, | |||
| vector<DataBuffer> &outputs, | |||
| rtStream_t stream) { | |||
| @@ -431,3 +431,7 @@ rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) | |||
| { | |||
| return RT_ERROR_NONE; | |||
| } | |||
| rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, const void *addr, uint32_t *streamId, uint32_t *taskId) { | |||
| return RT_ERROR_NONE; | |||
| } | |||
| @@ -162,6 +162,7 @@ set(COMMON_SRC_FILES | |||
| "${GE_CODE_DIR}/ge/common/dump/dump_properties.cc" | |||
| "${GE_CODE_DIR}/ge/common/helper/model_helper.cc" | |||
| "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" | |||
| "${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc" | |||
| "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" | |||
| "${GE_CODE_DIR}/ge/model/ge_root_model.cc" | |||
| "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" | |||
| @@ -733,6 +734,7 @@ set(MULTI_PARTS_TEST_FILES | |||
| "graph/transop_util_unittest.cc" | |||
| "common/datatype_transfer_unittest.cc" | |||
| "common/dump_manager_unittest.cc" | |||
| "common/opdebug_register_unittest.cc" | |||
| "common/format_transfer_unittest.cc" | |||
| "common/format_transfer_transpose_unittest.cc" | |||
| "common/format_transfer_nchw_5d_unittest.cc" | |||
| @@ -0,0 +1,51 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <gtest/gtest.h> | |||
| #include "common/dump/opdebug_register.h" | |||
| #include "common/debug/log.h" | |||
| #include "common/ge_inner_error_codes.h" | |||
| namespace ge { | |||
| class UTEST_opdebug_register : public testing::Test { | |||
| protected: | |||
| void SetUp() {} | |||
| void TearDown() {} | |||
| }; | |||
| TEST_F(UTEST_opdebug_register, register_debug_for_model_success) { | |||
| OpdebugRegister opdebug_register; | |||
| rtModel_t model_handle = (void*)0x111; | |||
| uint32_t op_debug_mode = 1; | |||
| DataDumper data_dumper; | |||
| auto ret = opdebug_register.RegisterDebugForModel(model_handle, op_debug_mode, data_dumper); | |||
| opdebug_register.UnregisterDebugForModel(model_handle); | |||
| EXPECT_EQ(ret, ge::SUCCESS); | |||
| } | |||
| TEST_F(UTEST_opdebug_register, register_debug_for_stream_success) { | |||
| OpdebugRegister opdebug_register; | |||
| rtStream_t stream = (void*)0x111; | |||
| uint32_t op_debug_mode = 1; | |||
| DataDumper data_dumper; | |||
| auto ret = opdebug_register.RegisterDebugForStream(stream, op_debug_mode, data_dumper); | |||
| opdebug_register.UnregisterDebugForStream(stream); | |||
| EXPECT_EQ(ret, ge::SUCCESS); | |||
| } | |||
| } // namespace ge | |||