From: @zhou_chao1993 Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chentags/v1.2.0
@@ -103,6 +103,7 @@ set(TRAIN_SRC_LIST | |||||
"common/profiling/profiling_manager.cc" | "common/profiling/profiling_manager.cc" | ||||
"common/dump/dump_manager.cc" | "common/dump/dump_manager.cc" | ||||
"common/dump/dump_properties.cc" | "common/dump/dump_properties.cc" | ||||
"common/dump/opdebug_register.cc" | |||||
"common/dump/dump_op.cc" | "common/dump/dump_op.cc" | ||||
"common/profiling/ge_profiling.cc" | "common/profiling/ge_profiling.cc" | ||||
"common/profiling/ge_runner_profiling.cc" | "common/profiling/ge_runner_profiling.cc" | ||||
@@ -427,6 +428,7 @@ set(INFER_SRC_LIST | |||||
"common/dump/dump_properties.cc" | "common/dump/dump_properties.cc" | ||||
"common/dump/dump_manager.cc" | "common/dump/dump_manager.cc" | ||||
"common/dump/dump_op.cc" | "common/dump/dump_op.cc" | ||||
"common/dump/opdebug_register.cc" | |||||
"common/dump/dump_server.cc" | "common/dump/dump_server.cc" | ||||
"common/helper/model_cache_helper.cc" | "common/helper/model_cache_helper.cc" | ||||
"ge_local_engine/engine/host_cpu_engine.cc" | "ge_local_engine/engine/host_cpu_engine.cc" | ||||
@@ -104,8 +104,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const DumpProperties &DumpManager::GetDumpProperties( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const DumpProperties &DumpManager::GetDumpProperties( | ||||
uint64_t session_id) { | uint64_t session_id) { | ||||
std::lock_guard<std::mutex> lock(mutex_); | std::lock_guard<std::mutex> lock(mutex_); | ||||
// If session_id is not found in dump_properties_map_, operator[] will insert one. | |||||
return dump_properties_map_[session_id]; | |||||
auto iter = dump_properties_map_.find(session_id); | |||||
if (iter != dump_properties_map_.end()) { | |||||
return iter->second; | |||||
} | |||||
static DumpProperties default_properties; | |||||
return default_properties; | |||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpManager::AddDumpProperties( | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpManager::AddDumpProperties( | ||||
@@ -219,9 +219,9 @@ Status DumpOp::LaunchDumpOp() { | |||||
op_mapping_info.set_dump_path(dump_path); | op_mapping_info.set_dump_path(dump_path); | ||||
op_mapping_info.set_flag(kAicpuLoadFlag); | op_mapping_info.set_flag(kAicpuLoadFlag); | ||||
op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | ||||
if (!dynamic_model_name_.empty()) { | |||||
op_mapping_info.set_model_id(dynamic_model_id_); | |||||
if (!dynamic_model_name_.empty() && dump_properties_.IsDumpOpen()) { | |||||
op_mapping_info.set_model_name(dynamic_model_name_); | op_mapping_info.set_model_name(dynamic_model_name_); | ||||
op_mapping_info.set_model_id(dynamic_model_id_); | |||||
} | } | ||||
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | ||||
GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), | GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), | ||||
@@ -253,7 +253,7 @@ Status DumpOp::LaunchDumpOp() { | |||||
} | } | ||||
op_mapping_info.mutable_task()->Add(std::move(task)); | op_mapping_info.mutable_task()->Add(std::move(task)); | ||||
} | } | ||||
if (dump_properties_.GetDumpMode() == kDumpAll) { | |||||
if (dump_properties_.GetDumpMode() == kDumpAll || dump_properties_.IsOpDebugOpen()) { | |||||
auto ret = DumpOutput(task); | auto ret = DumpOutput(task); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Dump output failed when in dumping all"); | GELOGE(ret, "Dump output failed when in dumping all"); | ||||
@@ -81,11 +81,11 @@ class DumpProperties { | |||||
const std::string &GetEnableDumpDebug() const {return enable_dump_debug_;} | const std::string &GetEnableDumpDebug() const {return enable_dump_debug_;} | ||||
private: | private: | ||||
void CopyFrom(const DumpProperties &other); | void CopyFrom(const DumpProperties &other); | ||||
void SetDumpDebugOptions(); | void SetDumpDebugOptions(); | ||||
std::string enable_dump_; | std::string enable_dump_; | ||||
std::string enable_dump_debug_; | std::string enable_dump_debug_; | ||||
@@ -0,0 +1,148 @@ | |||||
/** | |||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include "opdebug_register.h" | |||||
namespace { | |||||
const size_t kOpDebugMemorySize = 2048UL; | |||||
const size_t kDebugP2pSize = 8UL; | |||||
} // namespace | |||||
namespace ge { | |||||
OpdebugRegister::~OpdebugRegister() {} | |||||
Status OpdebugRegister::RegisterDebugForModel(rtModel_t model_handle, uint32_t op_debug_mode, DataDumper &data_dumper) { | |||||
GELOGD("Start to register debug for model in overflow"); | |||||
auto ret = MallocMemForOpdebug(); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Malloc memory for opdebug in model overflow failed ,ret:0x%X", ret); | |||||
return ret; | |||||
} | |||||
uint32_t debug_stream_id = 0; | |||||
uint32_t debug_task_id = 0; | |||||
auto rt_ret = rtDebugRegister(model_handle, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
GELOGD("debug_task_id:%u, debug_stream_id:%u in model overflow", debug_task_id, debug_stream_id); | |||||
data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true); | |||||
return SUCCESS; | |||||
} | |||||
void OpdebugRegister::UnregisterDebugForModel(rtModel_t model_handle) { | |||||
rtError_t rt_ret = RT_ERROR_NONE; | |||||
if (model_handle != nullptr) { | |||||
GELOGD("start to call rtDebugUnRegister in model overflow."); | |||||
rt_ret = rtDebugUnRegister(model_handle); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret); | |||||
} | |||||
} | |||||
if (op_debug_addr_ != nullptr) { | |||||
rt_ret = rtFree(op_debug_addr_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||||
} | |||||
op_debug_addr_ = nullptr; | |||||
} | |||||
if (p2p_debug_addr_ != nullptr) { | |||||
rt_ret = rtFree(p2p_debug_addr_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||||
} | |||||
p2p_debug_addr_ = nullptr; | |||||
} | |||||
return; | |||||
} | |||||
Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_debug_mode, DataDumper &data_dumper) { | |||||
GELOGD("Start to register debug for stream in stream overflow"); | |||||
auto ret = MallocMemForOpdebug(); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Malloc memory for opdebug in stream overflow ,ret:0x%X", ret); | |||||
return ret; | |||||
} | |||||
uint32_t debug_stream_id = 0; | |||||
uint32_t debug_task_id = 0; | |||||
#ifdef ONLY_COMPILE_OPEN_SRC | |||||
auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
#endif | |||||
GELOGD("debug_task_id:%u, debug_stream_id:%u in stream overflow.", debug_task_id, debug_stream_id); | |||||
data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true); | |||||
return SUCCESS; | |||||
} | |||||
void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) { | |||||
rtError_t rt_ret = RT_ERROR_NONE; | |||||
#ifdef ONLY_COMPILE_OPEN_SRC | |||||
if (stream != nullptr) { | |||||
GELOGD("start call rtDebugUnRegisterForStream in unknown shape over flow."); | |||||
rt_ret = rtDebugUnRegisterForStream(stream); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGW("rtDebugUnRegisterForStream failed, ret: 0x%X", rt_ret); | |||||
} | |||||
} | |||||
#endif | |||||
if (op_debug_addr_ != nullptr) { | |||||
rt_ret = rtFree(op_debug_addr_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||||
} | |||||
op_debug_addr_ = nullptr; | |||||
} | |||||
if (p2p_debug_addr_ != nullptr) { | |||||
rt_ret = rtFree(p2p_debug_addr_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||||
} | |||||
p2p_debug_addr_ = nullptr; | |||||
} | |||||
return; | |||||
} | |||||
Status OpdebugRegister::MallocMemForOpdebug() { | |||||
rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
uint64_t debug_addrs_tmp = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr_)); | |||||
// For data dump, aicpu needs the pointer to pointer that save the real debug address. | |||||
rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
} // namespace ge |
@@ -0,0 +1,44 @@ | |||||
/** | |||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef GE_COMMON_DUMP_OPDEBUG_REGISTER_H_ | |||||
#define GE_COMMON_DUMP_OPDEBUG_REGISTER_H_ | |||||
#include <map> | |||||
#include "common/debug/ge_log.h" | |||||
#include "common/debug/log.h" | |||||
#include "graph/load/model_manager/data_dumper.h" | |||||
namespace ge { | |||||
class OpdebugRegister { | |||||
public: | |||||
OpdebugRegister() = default; | |||||
~OpdebugRegister(); | |||||
Status RegisterDebugForModel(rtModel_t model_handle, uint32_t op_debug_mode, DataDumper &data_dumper); | |||||
void UnregisterDebugForModel(rtModel_t model_handle); | |||||
Status RegisterDebugForStream(rtStream_t stream, uint32_t op_debug_mode, DataDumper &data_dumper); | |||||
void UnregisterDebugForStream(rtStream_t stream); | |||||
private: | |||||
Status MallocMemForOpdebug(); | |||||
void *op_debug_addr_ = nullptr; | |||||
void *p2p_debug_addr_ = nullptr; | |||||
}; | |||||
} // namespace ge | |||||
#endif // GE_COMMON_DUMP_OPDEBUG_REGISTER_H_ |
@@ -17,6 +17,7 @@ set(SRC_LIST | |||||
"../common/dump/dump_properties.cc" | "../common/dump/dump_properties.cc" | ||||
"../common/dump/dump_manager.cc" | "../common/dump/dump_manager.cc" | ||||
"../common/dump/dump_op.cc" | "../common/dump/dump_op.cc" | ||||
"../common/dump/opdebug_register.cc" | |||||
"../common/profiling/ge_profiling.cc" | "../common/profiling/ge_profiling.cc" | ||||
"../graph/load/graph_loader.cc" | "../graph/load/graph_loader.cc" | ||||
"../graph/execute/graph_execute.cc" | "../graph/execute/graph_execute.cc" | ||||
@@ -36,21 +36,9 @@ | |||||
namespace ge { | namespace ge { | ||||
class DataDumper { | class DataDumper { | ||||
public: | public: | ||||
explicit DataDumper(const RuntimeParam &rsh) | |||||
: model_name_(), | |||||
model_id_(0), | |||||
runtime_param_(rsh), | |||||
dev_mem_load_(nullptr), | |||||
dev_mem_unload_(nullptr), | |||||
op_list_(), | |||||
input_map_(), | |||||
load_flag_(false), | |||||
device_id_(0), | |||||
global_step_(0), | |||||
loop_per_iter_(0), | |||||
loop_cond_(0), | |||||
compute_graph_(nullptr), | |||||
ref_info_() {} | |||||
DataDumper() : runtime_param_{} {} | |||||
explicit DataDumper(const RuntimeParam &rsh) : runtime_param_(rsh) {} | |||||
~DataDumper(); | ~DataDumper(); | ||||
@@ -105,10 +93,10 @@ class DataDumper { | |||||
// for inference data dump | // for inference data dump | ||||
std::string om_name_; | std::string om_name_; | ||||
uint32_t model_id_; | |||||
uint32_t model_id_ = 0; | |||||
const RuntimeParam &runtime_param_; | const RuntimeParam &runtime_param_; | ||||
void *dev_mem_load_; | |||||
void *dev_mem_unload_; | |||||
void *dev_mem_load_ = nullptr; | |||||
void *dev_mem_unload_ = nullptr; | |||||
struct InnerDumpInfo; | struct InnerDumpInfo; | ||||
struct InnerInputMapping; | struct InnerInputMapping; | ||||
@@ -119,16 +107,15 @@ class DataDumper { | |||||
uint32_t end_graph_stream_id_ = 0; | uint32_t end_graph_stream_id_ = 0; | ||||
bool is_end_graph_ = false; | bool is_end_graph_ = false; | ||||
std::multimap<std::string, InnerInputMapping> input_map_; // release after DavinciModel::Init | std::multimap<std::string, InnerInputMapping> input_map_; // release after DavinciModel::Init | ||||
bool load_flag_; | |||||
uint32_t device_id_; | |||||
uintptr_t global_step_; | |||||
uintptr_t loop_per_iter_; | |||||
uintptr_t loop_cond_; | |||||
ComputeGraphPtr compute_graph_; // release after DavinciModel::Init | |||||
std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init | |||||
bool load_flag_ = false; | |||||
uint32_t device_id_ = 0; | |||||
uintptr_t global_step_ = 0; | |||||
uintptr_t loop_per_iter_ = 0; | |||||
uintptr_t loop_cond_ = 0; | |||||
ComputeGraphPtr compute_graph_ = nullptr; // release after DavinciModel::Init | |||||
std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init | |||||
void *l1_fusion_addr_ = nullptr; | void *l1_fusion_addr_ = nullptr; | ||||
uint32_t op_debug_task_id_ = 0; | uint32_t op_debug_task_id_ = 0; | ||||
uint32_t op_debug_stream_id_ = 0; | uint32_t op_debug_stream_id_ = 0; | ||||
void *op_debug_addr_ = nullptr; | void *op_debug_addr_ = nullptr; | ||||
@@ -144,20 +131,16 @@ class DataDumper { | |||||
Status DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); | Status DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); | ||||
Status DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); | Status DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); | ||||
Status DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i, | Status DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i, | ||||
const std::string &node_name_index); | |||||
const std::string &node_name_index); | |||||
Status ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); | Status ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); | ||||
void SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, aicpu::dump::OpMappingInfo &op_mapping_info); | void SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, aicpu::dump::OpMappingInfo &op_mapping_info); | ||||
void SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, | void SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, | ||||
aicpu::dump::OpMappingInfo &op_mapping_info); | aicpu::dump::OpMappingInfo &op_mapping_info); | ||||
Status ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); | Status ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); | ||||
Status GenerateInput(aicpu::dump::Input &input, | |||||
const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||||
const uintptr_t &addr, | |||||
size_t index); | |||||
Status GenerateOutput(aicpu::dump::Output &output, | |||||
const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||||
const uintptr_t &addr, | |||||
size_t index); | |||||
Status GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||||
const uintptr_t &addr, size_t index); | |||||
Status GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||||
const uintptr_t &addr, size_t index); | |||||
void GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task); | void GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task); | ||||
}; | }; | ||||
struct DataDumper::InnerDumpInfo { | struct DataDumper::InnerDumpInfo { | ||||
@@ -232,6 +232,8 @@ DavinciModel::~DavinciModel() { | |||||
FreeP2PMem(); | FreeP2PMem(); | ||||
OpDebugUnRegister(); | |||||
if (l1_fusion_addr_ != nullptr) { | if (l1_fusion_addr_ != nullptr) { | ||||
GE_CHK_RT(rtFree(l1_fusion_addr_)); | GE_CHK_RT(rtFree(l1_fusion_addr_)); | ||||
} | } | ||||
@@ -242,8 +244,6 @@ DavinciModel::~DavinciModel() { | |||||
} | } | ||||
} | } | ||||
OpDebugUnRegister(); | |||||
ReleaseTask(); | ReleaseTask(); | ||||
CleanTbeHandle(); | CleanTbeHandle(); | ||||
@@ -568,77 +568,21 @@ Status DavinciModel::SetTSDevice() { | |||||
} | } | ||||
Status DavinciModel::OpDebugRegister() { | Status DavinciModel::OpDebugRegister() { | ||||
bool is_op_debug = false; | |||||
(void)ge::AttrUtils::GetBool(ge_model_, ATTR_OP_DEBUG_FLAG, is_op_debug); | |||||
GELOGD("The value of op debug in ge_model is %d.", is_op_debug); | |||||
if (is_op_debug) { | |||||
debug_reg_mutex_.lock(); | |||||
rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
uint64_t debug_addrs_tmp = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr_)); | |||||
// For data dump, aicpu needs the pointer to pointer that save the real debug address. | |||||
rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
uint32_t op_debug_mode = 0; | |||||
(void)ge::AttrUtils::GetInt(ge_model_, ATTR_OP_DEBUG_MODE, op_debug_mode); | |||||
GELOGD("The value of op_debug_mode in ge_model_ is %u.", op_debug_mode); | |||||
uint32_t debug_task_id = 0; | |||||
uint32_t debug_stream_id = 0; | |||||
rt_ret = rtDebugRegister(rt_model_handle_, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
if (GetDumpProperties().IsOpDebugOpen()) { | |||||
uint32_t op_debug_mode = GetDumpProperties().GetOpDebugMode(); | |||||
auto ret = opdebug_register_.RegisterDebugForModel(rt_model_handle_, op_debug_mode, data_dumper_); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret,"Register known shape op debug failed, ret: 0x%X",ret); | |||||
return ret; | |||||
} | } | ||||
GELOGI("debug_task_id:%d, debug_stream_id:%u", debug_task_id, debug_stream_id); | |||||
is_op_debug_reg_ = true; | is_op_debug_reg_ = true; | ||||
data_dumper_.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, is_op_debug); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void DavinciModel::OpDebugUnRegister() { | void DavinciModel::OpDebugUnRegister() { | ||||
if (is_op_debug_reg_) { | if (is_op_debug_reg_) { | ||||
debug_reg_mutex_.unlock(); | |||||
rtError_t rt_ret = RT_ERROR_NONE; | |||||
if (rt_model_handle_ != nullptr) { | |||||
GELOGD("start call debug_unregister."); | |||||
rt_ret = rtDebugUnRegister(rt_model_handle_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret); | |||||
} | |||||
} | |||||
if (op_debug_addr_ != nullptr) { | |||||
rt_ret = rtFree(op_debug_addr_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||||
} | |||||
op_debug_addr_ = nullptr; | |||||
} | |||||
if (p2p_debug_addr_ != nullptr) { | |||||
rt_ret = rtFree(p2p_debug_addr_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGW("rtFree failed, ret: 0x%X", rt_ret); | |||||
} | |||||
p2p_debug_addr_ = nullptr; | |||||
} | |||||
opdebug_register_.UnregisterDebugForModel(rt_model_handle_); | |||||
is_op_debug_reg_ = false; | is_op_debug_reg_ = false; | ||||
} | } | ||||
return; | return; | ||||
@@ -29,6 +29,7 @@ | |||||
#include "common/helper/om_file_helper.h" | #include "common/helper/om_file_helper.h" | ||||
#include "common/opskernel/ge_task_info.h" | #include "common/opskernel/ge_task_info.h" | ||||
#include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
#include "common/dump/opdebug_register.h" | |||||
#include "common/types.h" | #include "common/types.h" | ||||
#include "framework/common/util.h" | #include "framework/common/util.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
@@ -984,6 +985,7 @@ class DavinciModel { | |||||
int64_t maxDumpOpNum_; | int64_t maxDumpOpNum_; | ||||
// for data dump | // for data dump | ||||
DataDumper data_dumper_; | DataDumper data_dumper_; | ||||
OpdebugRegister opdebug_register_; | |||||
uint64_t iterator_count_; | uint64_t iterator_count_; | ||||
bool is_l1_fusion_enable_; | bool is_l1_fusion_enable_; | ||||
map<OpDescPtr, void *> saved_task_addrs_; // release after DavinciModel::Init | map<OpDescPtr, void *> saved_task_addrs_; // release after DavinciModel::Init | ||||
@@ -1021,8 +1023,6 @@ class DavinciModel { | |||||
// for op debug | // for op debug | ||||
mutex debug_reg_mutex_; | mutex debug_reg_mutex_; | ||||
bool is_op_debug_reg_ = false; | bool is_op_debug_reg_ = false; | ||||
void *op_debug_addr_ = nullptr; | |||||
void *p2p_debug_addr_ = nullptr; | |||||
bool is_online_infer_dynamic_ = false; | bool is_online_infer_dynamic_ = false; | ||||
bool is_getnext_sink_dynamic_ = false; | bool is_getnext_sink_dynamic_ = false; | ||||
vector<int32_t> cur_dynamic_dims_; | vector<int32_t> cur_dynamic_dims_; | ||||
@@ -85,6 +85,10 @@ Status HybridModelAsyncExecutor::Stop() { | |||||
ret = future_.get(); | ret = future_.get(); | ||||
} | } | ||||
if (is_op_debug_reg_) { | |||||
op_debug_register_.UnregisterDebugForStream(stream_); | |||||
} | |||||
if (stream_ != nullptr) { | if (stream_ != nullptr) { | ||||
GE_CHK_RT(rtStreamDestroy(stream_)); | GE_CHK_RT(rtStreamDestroy(stream_)); | ||||
stream_ = nullptr; | stream_ = nullptr; | ||||
@@ -101,6 +105,7 @@ Status HybridModelAsyncExecutor::Init() { | |||||
executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_)); | executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_)); | ||||
GE_CHECK_NOTNULL(executor_); | GE_CHECK_NOTNULL(executor_); | ||||
GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine"); | GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine"); | ||||
GE_CHK_STATUS_RET(DumpOpDebug(),"Dump op debug failed in hybrid engine"); | |||||
GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups()); | GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups()); | ||||
if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) { | if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) { | ||||
@@ -508,5 +513,40 @@ Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector< | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status HybridModelAsyncExecutor::DumpOpDebug() { | |||||
const DumpProperties &dump_properties = executor_->GetContext()->dump_properties; | |||||
if (dump_properties.IsOpDebugOpen()) { | |||||
GELOGD("Opdebug is open in hybrid engine"); | |||||
uint32_t op_debug_mode = dump_properties.GetOpDebugMode(); | |||||
GE_CHK_RT_RET(op_debug_register_.RegisterDebugForStream(stream_, op_debug_mode, data_dumper_)); | |||||
is_op_debug_reg_ = true; | |||||
data_dumper_.SetDumpProperties(dump_properties); | |||||
data_dumper_.SetModelName(model_->GetModelName()); | |||||
data_dumper_.SetModelId(model_->GetModelId()); | |||||
data_dumper_.SetDeviceId(model_->GetDeviceId()); | |||||
void *global_step = nullptr; | |||||
TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP); | |||||
if (varible_global_step != nullptr) { | |||||
global_step = const_cast<void *>(varible_global_step->GetData()); | |||||
} | |||||
void *loop_per_iter = nullptr; | |||||
TensorValue *varible_loop_per_iter = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER); | |||||
if (varible_loop_per_iter != nullptr) { | |||||
loop_per_iter = const_cast<void *>(varible_loop_per_iter->GetData()); | |||||
} | |||||
void *loop_cond = nullptr; | |||||
TensorValue *varible_loop_cond = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_COND); | |||||
if (varible_loop_cond != nullptr) { | |||||
loop_cond = const_cast<void *>(varible_loop_cond->GetData()); | |||||
} | |||||
data_dumper_.SetLoopAddr(global_step, loop_per_iter, loop_cond); | |||||
GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "LoadDumpInfo failed in hybrid engine"); | |||||
GELOGD("Dump op debug SUCCESS in hybrid engine"); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge |
@@ -21,7 +21,9 @@ | |||||
#include <future> | #include <future> | ||||
#include "external/ge/ge_api_error_codes.h" | #include "external/ge/ge_api_error_codes.h" | ||||
#include "external/ge/ge_api_types.h" | #include "external/ge/ge_api_types.h" | ||||
#include "common/dump/opdebug_register.h" | |||||
#include "graph/load/model_manager/data_inputer.h" | #include "graph/load/model_manager/data_inputer.h" | ||||
#include "graph/load/model_manager/data_dumper.h" | |||||
#include "hybrid/executor/hybrid_model_executor.h" | #include "hybrid/executor/hybrid_model_executor.h" | ||||
#include "hybrid/executor/hybrid_model_pipeline_executor.h" | #include "hybrid/executor/hybrid_model_pipeline_executor.h" | ||||
#include "runtime/stream.h" | #include "runtime/stream.h" | ||||
@@ -77,6 +79,8 @@ class HybridModelAsyncExecutor { | |||||
Status PrepareInputs(const InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args); | Status PrepareInputs(const InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args); | ||||
Status DumpOpDebug(); | |||||
std::mutex mu_; | std::mutex mu_; | ||||
HybridModel *model_; | HybridModel *model_; | ||||
uint32_t device_id_ = 0U; | uint32_t device_id_ = 0U; | ||||
@@ -94,6 +98,9 @@ class HybridModelAsyncExecutor { | |||||
std::vector<bool> is_input_dynamic_; | std::vector<bool> is_input_dynamic_; | ||||
std::shared_ptr<ModelListener> listener_; | std::shared_ptr<ModelListener> listener_; | ||||
string om_name_; | string om_name_; | ||||
DataDumper data_dumper_; | |||||
bool is_op_debug_reg_ = false; | |||||
OpdebugRegister op_debug_register_; | |||||
}; | }; | ||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge | ||||
@@ -266,9 +266,9 @@ Status NodeDoneCallback::OnNodeDone() { | |||||
RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[Compute] End"); | RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[Compute] End"); | ||||
RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[Callback] Start"); | RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[Callback] Start"); | ||||
auto dump_path = context_->GetDumpProperties().GetDumpPath(); | |||||
if (!dump_path.empty()) { | |||||
GELOGI("Start to dump dynamic shape,dump_path is %s", dump_path.c_str()); | |||||
const DumpProperties &dump_properties = context_->GetDumpProperties(); | |||||
if (dump_properties.IsDumpOpen() || context_->IsOverFlow()) { | |||||
GELOGI("Start to dump dynamic shape op"); | |||||
GE_CHK_STATUS_RET(DumpDynamicNode(), "Failed to dump dynamic node"); | GE_CHK_STATUS_RET(DumpDynamicNode(), "Failed to dump dynamic node"); | ||||
} | } | ||||
@@ -61,6 +61,10 @@ class HybridModel { | |||||
device_id_ = device_id; | device_id_ = device_id; | ||||
} | } | ||||
uint32_t GetDeviceId() { | |||||
return device_id_; | |||||
} | |||||
void SetModelId(uint32_t model_id) { | void SetModelId(uint32_t model_id) { | ||||
model_id_ = model_id; | model_id_ = model_id; | ||||
} | } | ||||
@@ -17,6 +17,7 @@ | |||||
#include "aicore_node_executor.h" | #include "aicore_node_executor.h" | ||||
#include "framework/common/taskdown_common.h" | #include "framework/common/taskdown_common.h" | ||||
#include "hybrid/executor/hybrid_execution_context.h" | #include "hybrid/executor/hybrid_execution_context.h" | ||||
#include "external/runtime/rt_error_codes.h" | |||||
namespace ge { | namespace ge { | ||||
namespace hybrid { | namespace hybrid { | ||||
@@ -189,6 +190,7 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
} | } | ||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); | ||||
GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); | ||||
GE_CHK_STATUS_RET_NOLOG(CheckOverflow(context)); | |||||
// save profiling data | // save profiling data | ||||
uint32_t task_id = 0; | uint32_t task_id = 0; | ||||
uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
@@ -259,6 +261,25 @@ void AiCoreNodeTask::SetWorkspaceSizes(const vector<int64_t> &workspace_sizes) { | |||||
workspace_sizes_ = workspace_sizes; | workspace_sizes_ = workspace_sizes; | ||||
} | } | ||||
Status AiCoreNodeTask::CheckOverflow(TaskContext &context) { | |||||
const DumpProperties &dump_properties = context.GetDumpProperties(); | |||||
if (dump_properties.IsOpDebugOpen()) { | |||||
GELOGD("Op %s is doing overflow check in hybrid engine", context.GetNodeName()); | |||||
auto rt_ret = rtStreamSynchronize(context.GetStream()); | |||||
if (rt_ret == ACL_ERROR_RT_AICORE_OVER_FLOW) { | |||||
context.SetOverFlow(true); | |||||
GELOGW("Dynamic shape op %s is over flow", context.GetNodeName()); | |||||
return SUCCESS; | |||||
} else if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "rtstreamsynchronize failed"); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
GELOGD("Opdebug is not open in hybrid engine"); | |||||
return SUCCESS; | |||||
} | |||||
TaskCompilerFactory &TaskCompilerFactory::GetInstance() { | TaskCompilerFactory &TaskCompilerFactory::GetInstance() { | ||||
static TaskCompilerFactory instance; | static TaskCompilerFactory instance; | ||||
return instance; | return instance; | ||||
@@ -62,6 +62,7 @@ class AiCoreNodeTask : public NodeTask { | |||||
const vector<int64_t> &GetWorkspaceSizes() const; | const vector<int64_t> &GetWorkspaceSizes() const; | ||||
void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes); | void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes); | ||||
private: | private: | ||||
Status CheckOverflow(TaskContext &context); | |||||
std::vector<std::unique_ptr<AiCoreOpTask>> tasks_; | std::vector<std::unique_ptr<AiCoreOpTask>> tasks_; | ||||
std::vector<int64_t> workspace_sizes_; | std::vector<int64_t> workspace_sizes_; | ||||
}; | }; | ||||
@@ -124,7 +124,7 @@ Status KnownNodeTask::Init(TaskContext &context) { | |||||
} | } | ||||
if (!load_flag_) { | if (!load_flag_) { | ||||
auto dump_properties = context.GetDumpProperties(); | auto dump_properties = context.GetDumpProperties(); | ||||
if (dump_properties.IsDumpOpen()) { | |||||
if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { | |||||
davinci_model_->SetDumpProperties(dump_properties); | davinci_model_->SetDumpProperties(dump_properties); | ||||
void *global_step = nullptr; | void *global_step = nullptr; | ||||
TensorValue *varible_global_step = context.GetVariable(NODE_NAME_GLOBAL_STEP); | TensorValue *varible_global_step = context.GetVariable(NODE_NAME_GLOBAL_STEP); | ||||
@@ -350,6 +350,14 @@ void TaskContext::SetStreamId(uint32_t stream_id) { | |||||
stream_id_ = stream_id; | stream_id_ = stream_id; | ||||
} | } | ||||
void TaskContext::SetOverFlow(bool is_over_flow) { | |||||
is_over_flow_ = is_over_flow; | |||||
} | |||||
bool TaskContext::IsOverFlow() { | |||||
return is_over_flow_; | |||||
} | |||||
Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) { | Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) { | ||||
GE_CHECK_NOTNULL(buffer); | GE_CHECK_NOTNULL(buffer); | ||||
if (ori_addr == nullptr) { | if (ori_addr == nullptr) { | ||||
@@ -65,6 +65,7 @@ class TaskContext { | |||||
int64_t GetSessionId() const; | int64_t GetSessionId() const; | ||||
uint64_t GetIterationNumber() const; | uint64_t GetIterationNumber() const; | ||||
void NodeDone(); | void NodeDone(); | ||||
void OnError(Status error); | void OnError(Status error); | ||||
@@ -106,6 +107,9 @@ class TaskContext { | |||||
uint32_t GetStreamId() const; | uint32_t GetStreamId() const; | ||||
void SetStreamId(uint32_t stream_id); | void SetStreamId(uint32_t stream_id); | ||||
void SetOverFlow(bool is_over_flow); | |||||
bool IsOverFlow(); | |||||
Status Synchronize(); | Status Synchronize(); | ||||
bool IsForceInferShape() const; | bool IsForceInferShape() const; | ||||
@@ -138,6 +142,7 @@ class TaskContext { | |||||
uint32_t task_id_ = 0; | uint32_t task_id_ = 0; | ||||
uint32_t stream_id_ = 0; | uint32_t stream_id_ = 0; | ||||
std::vector<TaskDescInfo> task_desc_info; | std::vector<TaskDescInfo> task_desc_info; | ||||
bool is_over_flow_ = false; | |||||
}; | }; | ||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge | ||||
@@ -491,21 +491,18 @@ Status AiCpuBaseTask::UpdateOutputShape(vector<GeTensorDesc> &output_desc) { | |||||
} | } | ||||
GELOGD("Start to update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape."); | GELOGD("Start to update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape."); | ||||
GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(), | |||||
aicpu_ext_handle_->GetExtInfoLen(), | |||||
ext_info_addr_dev_, | |||||
aicpu_ext_handle_->GetExtInfoLen(), | |||||
RT_MEMCPY_DEVICE_TO_HOST)); | |||||
GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), ext_info_addr_dev_, | |||||
aicpu_ext_handle_->GetExtInfoLen(), RT_MEMCPY_DEVICE_TO_HOST)); | |||||
for (size_t i = 0; i < num_outputs_; ++i) { | for (size_t i = 0; i < num_outputs_; ++i) { | ||||
GeShape shape; | GeShape shape; | ||||
DataType data_type; | DataType data_type; | ||||
aicpu_ext_handle_->GetOutputShapeAndType(i, shape, data_type); | aicpu_ext_handle_->GetOutputShapeAndType(i, shape, data_type); | ||||
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), | |||||
"AiCpuCCTask Update [%zu]th output shape failed.", i); | |||||
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), "AiCpuCCTask Update [%zu]th output shape failed.", | |||||
i); | |||||
if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) { | if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) { | ||||
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), | |||||
"AiCpuCCTask Update [%zu]th output desc failed.", i); | |||||
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "AiCpuCCTask Update [%zu]th output desc failed.", | |||||
i); | |||||
} | } | ||||
} | } | ||||
GELOGD("Update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape finished."); | GELOGD("Update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape finished."); | ||||
@@ -697,10 +694,10 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) { | |||||
const auto &shape_hbm = out_shape_hbm_[i]; | const auto &shape_hbm = out_shape_hbm_[i]; | ||||
uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); | uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); | ||||
std::unique_ptr<int64_t[]> shape_addr(new(std::nothrow) int64_t[dim_num]()); | |||||
std::unique_ptr<int64_t[]> shape_addr(new (std::nothrow) int64_t[dim_num]()); | |||||
GE_CHECK_NOTNULL(shape_addr); | GE_CHECK_NOTNULL(shape_addr); | ||||
GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, | |||||
shape_hbm, result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||||
GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm, | |||||
result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||||
for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { | for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { | ||||
shape_dims.emplace_back(shape_addr[dim_idx]); | shape_dims.emplace_back(shape_addr[dim_idx]); | ||||
@@ -711,13 +708,14 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) { | |||||
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]), | GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]), | ||||
"AiCpuTask update [%zu]th output shape failed.", i); | "AiCpuTask update [%zu]th output shape failed.", i); | ||||
if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) { | if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) { | ||||
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), | |||||
"AiCpuTask update [%zu]th output desc failed.", i); | |||||
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "AiCpuTask update [%zu]th output desc failed.", | |||||
i); | |||||
} | } | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, | Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, | ||||
vector<DataBuffer> &outputs, | vector<DataBuffer> &outputs, | ||||
rtStream_t stream) { | rtStream_t stream) { | ||||
@@ -431,3 +431,7 @@ rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId) | |||||
{ | { | ||||
return RT_ERROR_NONE; | return RT_ERROR_NONE; | ||||
} | } | ||||
rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, const void *addr, uint32_t *streamId, uint32_t *taskId) { | |||||
return RT_ERROR_NONE; | |||||
} |
@@ -162,6 +162,7 @@ set(COMMON_SRC_FILES | |||||
"${GE_CODE_DIR}/ge/common/dump/dump_properties.cc" | "${GE_CODE_DIR}/ge/common/dump/dump_properties.cc" | ||||
"${GE_CODE_DIR}/ge/common/helper/model_helper.cc" | "${GE_CODE_DIR}/ge/common/helper/model_helper.cc" | ||||
"${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" | "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" | ||||
"${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc" | |||||
"${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" | "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" | ||||
"${GE_CODE_DIR}/ge/model/ge_root_model.cc" | "${GE_CODE_DIR}/ge/model/ge_root_model.cc" | ||||
"${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" | "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" | ||||
@@ -734,6 +735,7 @@ set(MULTI_PARTS_TEST_FILES | |||||
"graph/transop_util_unittest.cc" | "graph/transop_util_unittest.cc" | ||||
"common/datatype_transfer_unittest.cc" | "common/datatype_transfer_unittest.cc" | ||||
"common/dump_manager_unittest.cc" | "common/dump_manager_unittest.cc" | ||||
"common/opdebug_register_unittest.cc" | |||||
"common/format_transfer_unittest.cc" | "common/format_transfer_unittest.cc" | ||||
"common/format_transfer_transpose_unittest.cc" | "common/format_transfer_transpose_unittest.cc" | ||||
"common/format_transfer_nchw_5d_unittest.cc" | "common/format_transfer_nchw_5d_unittest.cc" | ||||
@@ -0,0 +1,51 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include <gtest/gtest.h> | |||||
#include "common/dump/opdebug_register.h" | |||||
#include "common/debug/log.h" | |||||
#include "common/ge_inner_error_codes.h" | |||||
namespace ge { | |||||
class UTEST_opdebug_register : public testing::Test { | |||||
protected: | |||||
void SetUp() {} | |||||
void TearDown() {} | |||||
}; | |||||
TEST_F(UTEST_opdebug_register, register_debug_for_model_success) { | |||||
OpdebugRegister opdebug_register; | |||||
rtModel_t model_handle = (void*)0x111; | |||||
uint32_t op_debug_mode = 1; | |||||
DataDumper data_dumper; | |||||
auto ret = opdebug_register.RegisterDebugForModel(model_handle, op_debug_mode, data_dumper); | |||||
opdebug_register.UnregisterDebugForModel(model_handle); | |||||
EXPECT_EQ(ret, ge::SUCCESS); | |||||
} | |||||
TEST_F(UTEST_opdebug_register, register_debug_for_stream_success) { | |||||
OpdebugRegister opdebug_register; | |||||
rtStream_t stream = (void*)0x111; | |||||
uint32_t op_debug_mode = 1; | |||||
DataDumper data_dumper; | |||||
auto ret = opdebug_register.RegisterDebugForStream(stream, op_debug_mode, data_dumper); | |||||
opdebug_register.UnregisterDebugForStream(stream); | |||||
EXPECT_EQ(ret, ge::SUCCESS); | |||||
} | |||||
} // namespace ge |