From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chentags/v1.2.0
@@ -103,6 +103,7 @@ set(TRAIN_SRC_LIST | |||
"common/profiling/profiling_manager.cc" | |||
"common/dump/dump_manager.cc" | |||
"common/dump/dump_properties.cc" | |||
"common/dump/opdebug_register.cc" | |||
"common/dump/dump_op.cc" | |||
"common/profiling/ge_profiling.cc" | |||
"common/profiling/ge_runner_profiling.cc" | |||
@@ -427,6 +428,7 @@ set(INFER_SRC_LIST | |||
"common/dump/dump_properties.cc" | |||
"common/dump/dump_manager.cc" | |||
"common/dump/dump_op.cc" | |||
"common/dump/opdebug_register.cc" | |||
"common/dump/dump_server.cc" | |||
"common/helper/model_cache_helper.cc" | |||
"ge_local_engine/engine/host_cpu_engine.cc" | |||
@@ -104,8 +104,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const DumpProperties &DumpManager::GetDumpProperties( | |||
uint64_t session_id) { | |||
std::lock_guard<std::mutex> lock(mutex_); | |||
// If session_id is not found in dump_properties_map_, operator[] will insert one. | |||
return dump_properties_map_[session_id]; | |||
auto iter = dump_properties_map_.find(session_id); | |||
if (iter != dump_properties_map_.end()) { | |||
return iter->second; | |||
} | |||
static DumpProperties default_properties; | |||
return default_properties; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpManager::AddDumpProperties( | |||
@@ -219,9 +219,9 @@ Status DumpOp::LaunchDumpOp() { | |||
op_mapping_info.set_dump_path(dump_path); | |||
op_mapping_info.set_flag(kAicpuLoadFlag); | |||
op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | |||
if (!dynamic_model_name_.empty()) { | |||
op_mapping_info.set_model_id(dynamic_model_id_); | |||
if (!dynamic_model_name_.empty() && dump_properties_.IsDumpOpen()) { | |||
op_mapping_info.set_model_name(dynamic_model_name_); | |||
op_mapping_info.set_model_id(dynamic_model_id_); | |||
} | |||
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | |||
GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), | |||
@@ -253,7 +253,7 @@ Status DumpOp::LaunchDumpOp() { | |||
} | |||
op_mapping_info.mutable_task()->Add(std::move(task)); | |||
} | |||
if (dump_properties_.GetDumpMode() == kDumpAll) { | |||
if (dump_properties_.GetDumpMode() == kDumpAll || dump_properties_.IsOpDebugOpen()) { | |||
auto ret = DumpOutput(task); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Dump output failed when in dumping all"); | |||
@@ -81,11 +81,11 @@ class DumpProperties { | |||
const std::string &GetEnableDumpDebug() const {return enable_dump_debug_;} | |||
private: | |||
void CopyFrom(const DumpProperties &other); | |||
void SetDumpDebugOptions(); | |||
std::string enable_dump_; | |||
std::string enable_dump_debug_; | |||
@@ -0,0 +1,148 @@ | |||
/** | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "opdebug_register.h" | |||
namespace { | |||
const size_t kOpDebugMemorySize = 2048UL; | |||
const size_t kDebugP2pSize = 8UL; | |||
} // namespace | |||
namespace ge { | |||
OpdebugRegister::~OpdebugRegister() {} | |||
Status OpdebugRegister::RegisterDebugForModel(rtModel_t model_handle, uint32_t op_debug_mode, DataDumper &data_dumper) { | |||
GELOGD("Start to register debug for model in overflow"); | |||
auto ret = MallocMemForOpdebug(); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Malloc memory for opdebug in model overflow failed ,ret:0x%X", ret); | |||
return ret; | |||
} | |||
uint32_t debug_stream_id = 0; | |||
uint32_t debug_task_id = 0; | |||
auto rt_ret = rtDebugRegister(model_handle, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GELOGD("debug_task_id:%u, debug_stream_id:%u in model overflow", debug_task_id, debug_stream_id); | |||
data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true); | |||
return SUCCESS; | |||
} | |||
void OpdebugRegister::UnregisterDebugForModel(rtModel_t model_handle) { | |||
rtError_t rt_ret = RT_ERROR_NONE; | |||
if (model_handle != nullptr) { | |||
GELOGD("start to call rtDebugUnRegister in model overflow."); | |||
rt_ret = rtDebugUnRegister(model_handle); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret); | |||
} | |||
} | |||
if (op_debug_addr_ != nullptr) { | |||
rt_ret = rtFree(op_debug_addr_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
} | |||
op_debug_addr_ = nullptr; | |||
} | |||
if (p2p_debug_addr_ != nullptr) { | |||
rt_ret = rtFree(p2p_debug_addr_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
} | |||
p2p_debug_addr_ = nullptr; | |||
} | |||
return; | |||
} | |||
Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_debug_mode, DataDumper &data_dumper) { | |||
GELOGD("Start to register debug for stream in stream overflow"); | |||
auto ret = MallocMemForOpdebug(); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Malloc memory for opdebug in stream overflow ,ret:0x%X", ret); | |||
return ret; | |||
} | |||
uint32_t debug_stream_id = 0; | |||
uint32_t debug_task_id = 0; | |||
#ifdef ONLY_COMPILE_OPEN_SRC | |||
auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
#endif | |||
GELOGD("debug_task_id:%u, debug_stream_id:%u in stream overflow.", debug_task_id, debug_stream_id); | |||
data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true); | |||
return SUCCESS; | |||
} | |||
void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) { | |||
rtError_t rt_ret = RT_ERROR_NONE; | |||
#ifdef ONLY_COMPILE_OPEN_SRC | |||
if (stream != nullptr) { | |||
GELOGD("start call rtDebugUnRegisterForStream in unknown shape over flow."); | |||
rt_ret = rtDebugUnRegisterForStream(stream); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("rtDebugUnRegisterForStream failed, ret: 0x%X", rt_ret); | |||
} | |||
} | |||
#endif | |||
if (op_debug_addr_ != nullptr) { | |||
rt_ret = rtFree(op_debug_addr_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
} | |||
op_debug_addr_ = nullptr; | |||
} | |||
if (p2p_debug_addr_ != nullptr) { | |||
rt_ret = rtFree(p2p_debug_addr_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
} | |||
p2p_debug_addr_ = nullptr; | |||
} | |||
return; | |||
} | |||
Status OpdebugRegister::MallocMemForOpdebug() { | |||
rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
uint64_t debug_addrs_tmp = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr_)); | |||
// For data dump, aicpu needs the pointer to pointer that save the real debug address. | |||
rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -0,0 +1,44 @@ | |||
/** | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef GE_COMMON_DUMP_OPDEBUG_REGISTER_H_ | |||
#define GE_COMMON_DUMP_OPDEBUG_REGISTER_H_ | |||
#include <map> | |||
#include "common/debug/ge_log.h" | |||
#include "common/debug/log.h" | |||
#include "graph/load/model_manager/data_dumper.h" | |||
namespace ge { | |||
class OpdebugRegister { | |||
public: | |||
OpdebugRegister() = default; | |||
~OpdebugRegister(); | |||
Status RegisterDebugForModel(rtModel_t model_handle, uint32_t op_debug_mode, DataDumper &data_dumper); | |||
void UnregisterDebugForModel(rtModel_t model_handle); | |||
Status RegisterDebugForStream(rtStream_t stream, uint32_t op_debug_mode, DataDumper &data_dumper); | |||
void UnregisterDebugForStream(rtStream_t stream); | |||
private: | |||
Status MallocMemForOpdebug(); | |||
void *op_debug_addr_ = nullptr; | |||
void *p2p_debug_addr_ = nullptr; | |||
}; | |||
} // namespace ge | |||
#endif // GE_COMMON_DUMP_OPDEBUG_REGISTER_H_ |
@@ -17,6 +17,7 @@ set(SRC_LIST | |||
"../common/dump/dump_properties.cc" | |||
"../common/dump/dump_manager.cc" | |||
"../common/dump/dump_op.cc" | |||
"../common/dump/opdebug_register.cc" | |||
"../common/profiling/ge_profiling.cc" | |||
"../graph/load/graph_loader.cc" | |||
"../graph/execute/graph_execute.cc" | |||
@@ -36,21 +36,9 @@ | |||
namespace ge { | |||
class DataDumper { | |||
public: | |||
explicit DataDumper(const RuntimeParam &rsh) | |||
: model_name_(), | |||
model_id_(0), | |||
runtime_param_(rsh), | |||
dev_mem_load_(nullptr), | |||
dev_mem_unload_(nullptr), | |||
op_list_(), | |||
input_map_(), | |||
load_flag_(false), | |||
device_id_(0), | |||
global_step_(0), | |||
loop_per_iter_(0), | |||
loop_cond_(0), | |||
compute_graph_(nullptr), | |||
ref_info_() {} | |||
DataDumper() : runtime_param_{} {} | |||
explicit DataDumper(const RuntimeParam &rsh) : runtime_param_(rsh) {} | |||
~DataDumper(); | |||
@@ -105,10 +93,10 @@ class DataDumper { | |||
// for inference data dump | |||
std::string om_name_; | |||
uint32_t model_id_; | |||
uint32_t model_id_ = 0; | |||
const RuntimeParam &runtime_param_; | |||
void *dev_mem_load_; | |||
void *dev_mem_unload_; | |||
void *dev_mem_load_ = nullptr; | |||
void *dev_mem_unload_ = nullptr; | |||
struct InnerDumpInfo; | |||
struct InnerInputMapping; | |||
@@ -119,16 +107,15 @@ class DataDumper { | |||
uint32_t end_graph_stream_id_ = 0; | |||
bool is_end_graph_ = false; | |||
std::multimap<std::string, InnerInputMapping> input_map_; // release after DavinciModel::Init | |||
bool load_flag_; | |||
uint32_t device_id_; | |||
uintptr_t global_step_; | |||
uintptr_t loop_per_iter_; | |||
uintptr_t loop_cond_; | |||
ComputeGraphPtr compute_graph_; // release after DavinciModel::Init | |||
std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init | |||
bool load_flag_ = false; | |||
uint32_t device_id_ = 0; | |||
uintptr_t global_step_ = 0; | |||
uintptr_t loop_per_iter_ = 0; | |||
uintptr_t loop_cond_ = 0; | |||
ComputeGraphPtr compute_graph_ = nullptr; // release after DavinciModel::Init | |||
std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init | |||
void *l1_fusion_addr_ = nullptr; | |||
uint32_t op_debug_task_id_ = 0; | |||
uint32_t op_debug_stream_id_ = 0; | |||
void *op_debug_addr_ = nullptr; | |||
@@ -144,20 +131,16 @@ class DataDumper { | |||
Status DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); | |||
Status DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); | |||
Status DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i, | |||
const std::string &node_name_index); | |||
const std::string &node_name_index); | |||
Status ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); | |||
void SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, aicpu::dump::OpMappingInfo &op_mapping_info); | |||
void SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, | |||
aicpu::dump::OpMappingInfo &op_mapping_info); | |||
Status ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); | |||
Status GenerateInput(aicpu::dump::Input &input, | |||
const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
const uintptr_t &addr, | |||
size_t index); | |||
Status GenerateOutput(aicpu::dump::Output &output, | |||
const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
const uintptr_t &addr, | |||
size_t index); | |||
Status GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
const uintptr_t &addr, size_t index); | |||
Status GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
const uintptr_t &addr, size_t index); | |||
void GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task); | |||
}; | |||
struct DataDumper::InnerDumpInfo { | |||
@@ -232,6 +232,8 @@ DavinciModel::~DavinciModel() { | |||
FreeP2PMem(); | |||
OpDebugUnRegister(); | |||
if (l1_fusion_addr_ != nullptr) { | |||
GE_CHK_RT(rtFree(l1_fusion_addr_)); | |||
} | |||
@@ -242,8 +244,6 @@ DavinciModel::~DavinciModel() { | |||
} | |||
} | |||
OpDebugUnRegister(); | |||
ReleaseTask(); | |||
CleanTbeHandle(); | |||
@@ -568,77 +568,21 @@ Status DavinciModel::SetTSDevice() { | |||
} | |||
Status DavinciModel::OpDebugRegister() { | |||
bool is_op_debug = false; | |||
(void)ge::AttrUtils::GetBool(ge_model_, ATTR_OP_DEBUG_FLAG, is_op_debug); | |||
GELOGD("The value of op debug in ge_model is %d.", is_op_debug); | |||
if (is_op_debug) { | |||
debug_reg_mutex_.lock(); | |||
rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
uint64_t debug_addrs_tmp = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr_)); | |||
// For data dump, aicpu needs the pointer to pointer that save the real debug address. | |||
rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
uint32_t op_debug_mode = 0; | |||
(void)ge::AttrUtils::GetInt(ge_model_, ATTR_OP_DEBUG_MODE, op_debug_mode); | |||
GELOGD("The value of op_debug_mode in ge_model_ is %u.", op_debug_mode); | |||
uint32_t debug_task_id = 0; | |||
uint32_t debug_stream_id = 0; | |||
rt_ret = rtDebugRegister(rt_model_handle_, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
if (GetDumpProperties().IsOpDebugOpen()) { | |||
uint32_t op_debug_mode = GetDumpProperties().GetOpDebugMode(); | |||
auto ret = opdebug_register_.RegisterDebugForModel(rt_model_handle_, op_debug_mode, data_dumper_); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret,"Register known shape op debug failed, ret: 0x%X",ret); | |||
return ret; | |||
} | |||
GELOGI("debug_task_id:%d, debug_stream_id:%u", debug_task_id, debug_stream_id); | |||
is_op_debug_reg_ = true; | |||
data_dumper_.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, is_op_debug); | |||
} | |||
return SUCCESS; | |||
} | |||
void DavinciModel::OpDebugUnRegister() { | |||
if (is_op_debug_reg_) { | |||
debug_reg_mutex_.unlock(); | |||
rtError_t rt_ret = RT_ERROR_NONE; | |||
if (rt_model_handle_ != nullptr) { | |||
GELOGD("start call debug_unregister."); | |||
rt_ret = rtDebugUnRegister(rt_model_handle_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret); | |||
} | |||
} | |||
if (op_debug_addr_ != nullptr) { | |||
rt_ret = rtFree(op_debug_addr_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
} | |||
op_debug_addr_ = nullptr; | |||
} | |||
if (p2p_debug_addr_ != nullptr) { | |||
rt_ret = rtFree(p2p_debug_addr_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||
} | |||
p2p_debug_addr_ = nullptr; | |||
} | |||
opdebug_register_.UnregisterDebugForModel(rt_model_handle_); | |||
is_op_debug_reg_ = false; | |||
} | |||
return; | |||
@@ -29,6 +29,7 @@ | |||
#include "common/helper/om_file_helper.h" | |||
#include "common/opskernel/ge_task_info.h" | |||
#include "common/properties_manager.h" | |||
#include "common/dump/opdebug_register.h" | |||
#include "common/types.h" | |||
#include "framework/common/util.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
@@ -984,6 +985,7 @@ class DavinciModel { | |||
int64_t maxDumpOpNum_; | |||
// for data dump | |||
DataDumper data_dumper_; | |||
OpdebugRegister opdebug_register_; | |||
uint64_t iterator_count_; | |||
bool is_l1_fusion_enable_; | |||
map<OpDescPtr, void *> saved_task_addrs_; // release after DavinciModel::Init | |||
@@ -1021,8 +1023,6 @@ class DavinciModel { | |||
// for op debug | |||
mutex debug_reg_mutex_; | |||
bool is_op_debug_reg_ = false; | |||
void *op_debug_addr_ = nullptr; | |||
void *p2p_debug_addr_ = nullptr; | |||
bool is_online_infer_dynamic_ = false; | |||
bool is_getnext_sink_dynamic_ = false; | |||
vector<int32_t> cur_dynamic_dims_; | |||
@@ -85,6 +85,10 @@ Status HybridModelAsyncExecutor::Stop() { | |||
ret = future_.get(); | |||
} | |||
if (is_op_debug_reg_) { | |||
op_debug_register_.UnregisterDebugForStream(stream_); | |||
} | |||
if (stream_ != nullptr) { | |||
GE_CHK_RT(rtStreamDestroy(stream_)); | |||
stream_ = nullptr; | |||
@@ -101,6 +105,7 @@ Status HybridModelAsyncExecutor::Init() { | |||
executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_)); | |||
GE_CHECK_NOTNULL(executor_); | |||
GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine"); | |||
GE_CHK_STATUS_RET(DumpOpDebug(),"Dump op debug failed in hybrid engine"); | |||
GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups()); | |||
if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) { | |||
@@ -508,5 +513,40 @@ Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector< | |||
return SUCCESS; | |||
} | |||
Status HybridModelAsyncExecutor::DumpOpDebug() { | |||
const DumpProperties &dump_properties = executor_->GetContext()->dump_properties; | |||
if (dump_properties.IsOpDebugOpen()) { | |||
GELOGD("Opdebug is open in hybrid engine"); | |||
uint32_t op_debug_mode = dump_properties.GetOpDebugMode(); | |||
GE_CHK_RT_RET(op_debug_register_.RegisterDebugForStream(stream_, op_debug_mode, data_dumper_)); | |||
is_op_debug_reg_ = true; | |||
data_dumper_.SetDumpProperties(dump_properties); | |||
data_dumper_.SetModelName(model_->GetModelName()); | |||
data_dumper_.SetModelId(model_->GetModelId()); | |||
data_dumper_.SetDeviceId(model_->GetDeviceId()); | |||
void *global_step = nullptr; | |||
TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP); | |||
if (varible_global_step != nullptr) { | |||
global_step = const_cast<void *>(varible_global_step->GetData()); | |||
} | |||
void *loop_per_iter = nullptr; | |||
TensorValue *varible_loop_per_iter = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER); | |||
if (varible_loop_per_iter != nullptr) { | |||
loop_per_iter = const_cast<void *>(varible_loop_per_iter->GetData()); | |||
} | |||
void *loop_cond = nullptr; | |||
TensorValue *varible_loop_cond = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_COND); | |||
if (varible_loop_cond != nullptr) { | |||
loop_cond = const_cast<void *>(varible_loop_cond->GetData()); | |||
} | |||
data_dumper_.SetLoopAddr(global_step, loop_per_iter, loop_cond); | |||
GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "LoadDumpInfo failed in hybrid engine"); | |||
GELOGD("Dump op debug SUCCESS in hybrid engine"); | |||
} | |||
return SUCCESS; | |||
} | |||
} // namespace hybrid | |||
} // namespace ge |
@@ -21,7 +21,9 @@ | |||
#include <future> | |||
#include "external/ge/ge_api_error_codes.h" | |||
#include "external/ge/ge_api_types.h" | |||
#include "common/dump/opdebug_register.h" | |||
#include "graph/load/model_manager/data_inputer.h" | |||
#include "graph/load/model_manager/data_dumper.h" | |||
#include "hybrid/executor/hybrid_model_executor.h" | |||
#include "hybrid/executor/hybrid_model_pipeline_executor.h" | |||
#include "runtime/stream.h" | |||
@@ -77,6 +79,8 @@ class HybridModelAsyncExecutor { | |||
Status PrepareInputs(const InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args); | |||
Status DumpOpDebug(); | |||
std::mutex mu_; | |||
HybridModel *model_; | |||
uint32_t device_id_ = 0U; | |||
@@ -94,6 +98,9 @@ class HybridModelAsyncExecutor { | |||
std::vector<bool> is_input_dynamic_; | |||
std::shared_ptr<ModelListener> listener_; | |||
string om_name_; | |||
DataDumper data_dumper_; | |||
bool is_op_debug_reg_ = false; | |||
OpdebugRegister op_debug_register_; | |||
}; | |||
} // namespace hybrid | |||
} // namespace ge | |||
@@ -266,9 +266,9 @@ Status NodeDoneCallback::OnNodeDone() { | |||
RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[Compute] End"); | |||
RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[Callback] Start"); | |||
auto dump_path = context_->GetDumpProperties().GetDumpPath(); | |||
if (!dump_path.empty()) { | |||
GELOGI("Start to dump dynamic shape,dump_path is %s", dump_path.c_str()); | |||
const DumpProperties &dump_properties = context_->GetDumpProperties(); | |||
if (dump_properties.IsDumpOpen() || context_->IsOverFlow()) { | |||
GELOGI("Start to dump dynamic shape op"); | |||
GE_CHK_STATUS_RET(DumpDynamicNode(), "Failed to dump dynamic node"); | |||
} | |||
@@ -61,6 +61,10 @@ class HybridModel { | |||
device_id_ = device_id; | |||
} | |||
uint32_t GetDeviceId() { | |||
return device_id_; | |||
} | |||
void SetModelId(uint32_t model_id) { | |||
model_id_ = model_id; | |||
} | |||
@@ -17,6 +17,7 @@ | |||
#include "aicore_node_executor.h" | |||
#include "framework/common/taskdown_common.h" | |||
#include "hybrid/executor/hybrid_execution_context.h" | |||
#include "external/runtime/rt_error_codes.h" | |||
namespace ge { | |||
namespace hybrid { | |||
@@ -189,6 +190,7 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||
} | |||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | |||
GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | |||
GE_CHK_STATUS_RET_NOLOG(CheckOverflow(context)); | |||
// save profiling data | |||
uint32_t task_id = 0; | |||
uint32_t stream_id = 0; | |||
@@ -259,6 +261,25 @@ void AiCoreNodeTask::SetWorkspaceSizes(const vector<int64_t> &workspace_sizes) { | |||
workspace_sizes_ = workspace_sizes; | |||
} | |||
Status AiCoreNodeTask::CheckOverflow(TaskContext &context) { | |||
const DumpProperties &dump_properties = context.GetDumpProperties(); | |||
if (dump_properties.IsOpDebugOpen()) { | |||
GELOGD("Op %s is doing overflow check in hybrid engine", context.GetNodeName()); | |||
auto rt_ret = rtStreamSynchronize(context.GetStream()); | |||
if (rt_ret == ACL_ERROR_RT_AICORE_OVER_FLOW) { | |||
context.SetOverFlow(true); | |||
GELOGW("Dynamic shape op %s is over flow", context.GetNodeName()); | |||
return SUCCESS; | |||
} else if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "rtstreamsynchronize failed"); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
return SUCCESS; | |||
} | |||
GELOGD("Opdebug is not open in hybrid engine"); | |||
return SUCCESS; | |||
} | |||
TaskCompilerFactory &TaskCompilerFactory::GetInstance() { | |||
static TaskCompilerFactory instance; | |||
return instance; | |||
@@ -62,6 +62,7 @@ class AiCoreNodeTask : public NodeTask { | |||
const vector<int64_t> &GetWorkspaceSizes() const; | |||
void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes); | |||
private: | |||
Status CheckOverflow(TaskContext &context); | |||
std::vector<std::unique_ptr<AiCoreOpTask>> tasks_; | |||
std::vector<int64_t> workspace_sizes_; | |||
}; | |||
@@ -124,7 +124,7 @@ Status KnownNodeTask::Init(TaskContext &context) { | |||
} | |||
if (!load_flag_) { | |||
auto dump_properties = context.GetDumpProperties(); | |||
if (dump_properties.IsDumpOpen()) { | |||
if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { | |||
davinci_model_->SetDumpProperties(dump_properties); | |||
void *global_step = nullptr; | |||
TensorValue *varible_global_step = context.GetVariable(NODE_NAME_GLOBAL_STEP); | |||
@@ -350,6 +350,14 @@ void TaskContext::SetStreamId(uint32_t stream_id) { | |||
stream_id_ = stream_id; | |||
} | |||
void TaskContext::SetOverFlow(bool is_over_flow) { | |||
is_over_flow_ = is_over_flow; | |||
} | |||
bool TaskContext::IsOverFlow() { | |||
return is_over_flow_; | |||
} | |||
Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) { | |||
GE_CHECK_NOTNULL(buffer); | |||
if (ori_addr == nullptr) { | |||
@@ -65,6 +65,7 @@ class TaskContext { | |||
int64_t GetSessionId() const; | |||
uint64_t GetIterationNumber() const; | |||
void NodeDone(); | |||
void OnError(Status error); | |||
@@ -106,6 +107,9 @@ class TaskContext { | |||
uint32_t GetStreamId() const; | |||
void SetStreamId(uint32_t stream_id); | |||
void SetOverFlow(bool is_over_flow); | |||
bool IsOverFlow(); | |||
Status Synchronize(); | |||
bool IsForceInferShape() const; | |||
@@ -138,6 +142,7 @@ class TaskContext { | |||
uint32_t task_id_ = 0; | |||
uint32_t stream_id_ = 0; | |||
std::vector<TaskDescInfo> task_desc_info; | |||
bool is_over_flow_ = false; | |||
}; | |||
} // namespace hybrid | |||
} // namespace ge | |||
@@ -491,21 +491,18 @@ Status AiCpuBaseTask::UpdateOutputShape(vector<GeTensorDesc> &output_desc) { | |||
} | |||
GELOGD("Start to update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape."); | |||
GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(), | |||
aicpu_ext_handle_->GetExtInfoLen(), | |||
ext_info_addr_dev_, | |||
aicpu_ext_handle_->GetExtInfoLen(), | |||
RT_MEMCPY_DEVICE_TO_HOST)); | |||
GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), ext_info_addr_dev_, | |||
aicpu_ext_handle_->GetExtInfoLen(), RT_MEMCPY_DEVICE_TO_HOST)); | |||
for (size_t i = 0; i < num_outputs_; ++i) { | |||
GeShape shape; | |||
DataType data_type; | |||
aicpu_ext_handle_->GetOutputShapeAndType(i, shape, data_type); | |||
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), | |||
"AiCpuCCTask Update [%zu]th output shape failed.", i); | |||
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), "AiCpuCCTask Update [%zu]th output shape failed.", | |||
i); | |||
if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) { | |||
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), | |||
"AiCpuCCTask Update [%zu]th output desc failed.", i); | |||
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "AiCpuCCTask Update [%zu]th output desc failed.", | |||
i); | |||
} | |||
} | |||
GELOGD("Update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape finished."); | |||
@@ -697,10 +694,10 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) { | |||
const auto &shape_hbm = out_shape_hbm_[i]; | |||
uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); | |||
std::unique_ptr<int64_t[]> shape_addr(new(std::nothrow) int64_t[dim_num]()); | |||
std::unique_ptr<int64_t[]> shape_addr(new (std::nothrow) int64_t[dim_num]()); | |||
GE_CHECK_NOTNULL(shape_addr); | |||
GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, | |||
shape_hbm, result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||
GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm, | |||
result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||
for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { | |||
shape_dims.emplace_back(shape_addr[dim_idx]); | |||
@@ -711,13 +708,14 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) { | |||
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]), | |||
"AiCpuTask update [%zu]th output shape failed.", i); | |||
if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) { | |||
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), | |||
"AiCpuTask update [%zu]th output desc failed.", i); | |||
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "AiCpuTask update [%zu]th output desc failed.", | |||
i); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, | |||
vector<DataBuffer> &outputs, | |||
rtStream_t stream) { | |||
@@ -431,3 +431,7 @@ rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) | |||
{ | |||
return RT_ERROR_NONE; | |||
} | |||
rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, const void *addr, uint32_t *streamId, uint32_t *taskId) { | |||
return RT_ERROR_NONE; | |||
} |
@@ -162,6 +162,7 @@ set(COMMON_SRC_FILES | |||
"${GE_CODE_DIR}/ge/common/dump/dump_properties.cc" | |||
"${GE_CODE_DIR}/ge/common/helper/model_helper.cc" | |||
"${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" | |||
"${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc" | |||
"${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" | |||
"${GE_CODE_DIR}/ge/model/ge_root_model.cc" | |||
"${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" | |||
@@ -734,6 +735,7 @@ set(MULTI_PARTS_TEST_FILES | |||
"graph/transop_util_unittest.cc" | |||
"common/datatype_transfer_unittest.cc" | |||
"common/dump_manager_unittest.cc" | |||
"common/opdebug_register_unittest.cc" | |||
"common/format_transfer_unittest.cc" | |||
"common/format_transfer_transpose_unittest.cc" | |||
"common/format_transfer_nchw_5d_unittest.cc" | |||
@@ -0,0 +1,51 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include <gtest/gtest.h> | |||
#include "common/dump/opdebug_register.h" | |||
#include "common/debug/log.h" | |||
#include "common/ge_inner_error_codes.h" | |||
namespace ge { | |||
class UTEST_opdebug_register : public testing::Test { | |||
protected: | |||
void SetUp() {} | |||
void TearDown() {} | |||
}; | |||
TEST_F(UTEST_opdebug_register, register_debug_for_model_success) { | |||
OpdebugRegister opdebug_register; | |||
rtModel_t model_handle = (void*)0x111; | |||
uint32_t op_debug_mode = 1; | |||
DataDumper data_dumper; | |||
auto ret = opdebug_register.RegisterDebugForModel(model_handle, op_debug_mode, data_dumper); | |||
opdebug_register.UnregisterDebugForModel(model_handle); | |||
EXPECT_EQ(ret, ge::SUCCESS); | |||
} | |||
TEST_F(UTEST_opdebug_register, register_debug_for_stream_success) { | |||
OpdebugRegister opdebug_register; | |||
rtStream_t stream = (void*)0x111; | |||
uint32_t op_debug_mode = 1; | |||
DataDumper data_dumper; | |||
auto ret = opdebug_register.RegisterDebugForStream(stream, op_debug_mode, data_dumper); | |||
opdebug_register.UnregisterDebugForStream(stream); | |||
EXPECT_EQ(ret, ge::SUCCESS); | |||
} | |||
} // namespace ge |