Browse Source

!1188 dynamic shape over flow

From: @zhou_chao1993
Reviewed-by: @xchu42,@ji_chen
Signed-off-by: @ji_chen
tags/v1.2.0
mindspore-ci-bot Gitee 3 years ago
parent
commit
1b845b9ac2
23 changed files with 393 additions and 126 deletions
  1. +2
    -0
      ge/CMakeLists.txt
  2. +6
    -2
      ge/common/dump/dump_manager.cc
  3. +3
    -3
      ge/common/dump/dump_op.cc
  4. +1
    -1
      ge/common/dump/dump_properties.h
  5. +148
    -0
      ge/common/dump/opdebug_register.cc
  6. +44
    -0
      ge/common/dump/opdebug_register.h
  7. +1
    -0
      ge/executor/CMakeLists.txt
  8. +18
    -35
      ge/graph/load/model_manager/data_dumper.h
  9. +9
    -65
      ge/graph/load/model_manager/davinci_model.cc
  10. +2
    -2
      ge/graph/load/model_manager/davinci_model.h
  11. +40
    -0
      ge/hybrid/executor/hybrid_model_async_executor.cc
  12. +7
    -0
      ge/hybrid/executor/hybrid_model_async_executor.h
  13. +3
    -3
      ge/hybrid/executor/worker/execution_engine.cc
  14. +4
    -0
      ge/hybrid/model/hybrid_model.h
  15. +21
    -0
      ge/hybrid/node_executor/aicore/aicore_node_executor.cc
  16. +1
    -0
      ge/hybrid/node_executor/aicore/aicore_node_executor.h
  17. +1
    -1
      ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
  18. +8
    -0
      ge/hybrid/node_executor/task_context.cc
  19. +5
    -0
      ge/hybrid/node_executor/task_context.h
  20. +12
    -14
      ge/single_op/task/op_task.cc
  21. +4
    -0
      tests/depends/runtime/src/runtime_stub.cc
  22. +2
    -0
      tests/ut/ge/CMakeLists.txt
  23. +51
    -0
      tests/ut/ge/common/opdebug_register_unittest.cc

+ 2
- 0
ge/CMakeLists.txt View File

@@ -103,6 +103,7 @@ set(TRAIN_SRC_LIST
"common/profiling/profiling_manager.cc"
"common/dump/dump_manager.cc"
"common/dump/dump_properties.cc"
"common/dump/opdebug_register.cc"
"common/dump/dump_op.cc"
"common/profiling/ge_profiling.cc"
"common/profiling/ge_runner_profiling.cc"
@@ -427,6 +428,7 @@ set(INFER_SRC_LIST
"common/dump/dump_properties.cc"
"common/dump/dump_manager.cc"
"common/dump/dump_op.cc"
"common/dump/opdebug_register.cc"
"common/dump/dump_server.cc"
"common/helper/model_cache_helper.cc"
"ge_local_engine/engine/host_cpu_engine.cc"


+ 6
- 2
ge/common/dump/dump_manager.cc View File

@@ -104,8 +104,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const DumpProperties &DumpManager::GetDumpProperties(
uint64_t session_id) {
std::lock_guard<std::mutex> lock(mutex_);
// If session_id is not found in dump_properties_map_, operator[] will insert one.
return dump_properties_map_[session_id];
auto iter = dump_properties_map_.find(session_id);
if (iter != dump_properties_map_.end()) {
return iter->second;
}
static DumpProperties default_properties;
return default_properties;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpManager::AddDumpProperties(


+ 3
- 3
ge/common/dump/dump_op.cc View File

@@ -219,9 +219,9 @@ Status DumpOp::LaunchDumpOp() {
op_mapping_info.set_dump_path(dump_path);
op_mapping_info.set_flag(kAicpuLoadFlag);
op_mapping_info.set_dump_step(dump_properties_.GetDumpStep());
if (!dynamic_model_name_.empty()) {
op_mapping_info.set_model_id(dynamic_model_id_);
if (!dynamic_model_name_.empty() && dump_properties_.IsDumpOpen()) {
op_mapping_info.set_model_name(dynamic_model_name_);
op_mapping_info.set_model_id(dynamic_model_id_);
}
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(),
@@ -253,7 +253,7 @@ Status DumpOp::LaunchDumpOp() {
}
op_mapping_info.mutable_task()->Add(std::move(task));
}
if (dump_properties_.GetDumpMode() == kDumpAll) {
if (dump_properties_.GetDumpMode() == kDumpAll || dump_properties_.IsOpDebugOpen()) {
auto ret = DumpOutput(task);
if (ret != SUCCESS) {
GELOGE(ret, "Dump output failed when in dumping all");


+ 1
- 1
ge/common/dump/dump_properties.h View File

@@ -81,11 +81,11 @@ class DumpProperties {

const std::string &GetEnableDumpDebug() const {return enable_dump_debug_;}


private:
void CopyFrom(const DumpProperties &other);

void SetDumpDebugOptions();

std::string enable_dump_;
std::string enable_dump_debug_;



+ 148
- 0
ge/common/dump/opdebug_register.cc View File

@@ -0,0 +1,148 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "opdebug_register.h"

namespace {
const size_t kOpDebugMemorySize = 2048UL;
const size_t kDebugP2pSize = 8UL;
} // namespace
namespace ge {
OpdebugRegister::~OpdebugRegister() {}

Status OpdebugRegister::RegisterDebugForModel(rtModel_t model_handle, uint32_t op_debug_mode, DataDumper &data_dumper) {
GELOGD("Start to register debug for model in overflow");
auto ret = MallocMemForOpdebug();
if (ret != SUCCESS) {
GELOGE(ret, "Malloc memory for opdebug in model overflow failed ,ret:0x%X", ret);
return ret;
}
uint32_t debug_stream_id = 0;
uint32_t debug_task_id = 0;
auto rt_ret = rtDebugRegister(model_handle, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("debug_task_id:%u, debug_stream_id:%u in model overflow", debug_task_id, debug_stream_id);
data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true);
return SUCCESS;
}

void OpdebugRegister::UnregisterDebugForModel(rtModel_t model_handle) {
rtError_t rt_ret = RT_ERROR_NONE;
if (model_handle != nullptr) {
GELOGD("start to call rtDebugUnRegister in model overflow.");
rt_ret = rtDebugUnRegister(model_handle);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret);
}
}

if (op_debug_addr_ != nullptr) {
rt_ret = rtFree(op_debug_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree failed, ret: 0x%X", rt_ret);
}
op_debug_addr_ = nullptr;
}

if (p2p_debug_addr_ != nullptr) {
rt_ret = rtFree(p2p_debug_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree failed, ret: 0x%X", rt_ret);
}
p2p_debug_addr_ = nullptr;
}
return;
}

Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_debug_mode, DataDumper &data_dumper) {
GELOGD("Start to register debug for stream in stream overflow");
auto ret = MallocMemForOpdebug();
if (ret != SUCCESS) {
GELOGE(ret, "Malloc memory for opdebug in stream overflow ,ret:0x%X", ret);
return ret;
}

uint32_t debug_stream_id = 0;
uint32_t debug_task_id = 0;
#ifdef ONLY_COMPILE_OPEN_SRC
auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
#endif
GELOGD("debug_task_id:%u, debug_stream_id:%u in stream overflow.", debug_task_id, debug_stream_id);
data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true);
return SUCCESS;
}

void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) {
rtError_t rt_ret = RT_ERROR_NONE;
#ifdef ONLY_COMPILE_OPEN_SRC
if (stream != nullptr) {
GELOGD("start call rtDebugUnRegisterForStream in unknown shape over flow.");
rt_ret = rtDebugUnRegisterForStream(stream);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtDebugUnRegisterForStream failed, ret: 0x%X", rt_ret);
}
}
#endif

if (op_debug_addr_ != nullptr) {
rt_ret = rtFree(op_debug_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree failed, ret: 0x%X", rt_ret);
}
op_debug_addr_ = nullptr;
}

if (p2p_debug_addr_ != nullptr) {
rt_ret = rtFree(p2p_debug_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree failed, ret: 0x%X", rt_ret);
}
p2p_debug_addr_ = nullptr;
}
return;
}

Status OpdebugRegister::MallocMemForOpdebug() {
rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

uint64_t debug_addrs_tmp = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr_));
// For data dump, aicpu needs the pointer to pointer that save the real debug address.
rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

return SUCCESS;
}

} // namespace ge

+ 44
- 0
ge/common/dump/opdebug_register.h View File

@@ -0,0 +1,44 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_COMMON_DUMP_OPDEBUG_REGISTER_H_
#define GE_COMMON_DUMP_OPDEBUG_REGISTER_H_

#include <map>
#include "common/debug/ge_log.h"
#include "common/debug/log.h"
#include "graph/load/model_manager/data_dumper.h"

namespace ge {
class OpdebugRegister {
public:
OpdebugRegister() = default;
~OpdebugRegister();

Status RegisterDebugForModel(rtModel_t model_handle, uint32_t op_debug_mode, DataDumper &data_dumper);
void UnregisterDebugForModel(rtModel_t model_handle);

Status RegisterDebugForStream(rtStream_t stream, uint32_t op_debug_mode, DataDumper &data_dumper);
void UnregisterDebugForStream(rtStream_t stream);

private:
Status MallocMemForOpdebug();

void *op_debug_addr_ = nullptr;
void *p2p_debug_addr_ = nullptr;
};
} // namespace ge
#endif // GE_COMMON_DUMP_OPDEBUG_REGISTER_H_

+ 1
- 0
ge/executor/CMakeLists.txt View File

@@ -17,6 +17,7 @@ set(SRC_LIST
"../common/dump/dump_properties.cc"
"../common/dump/dump_manager.cc"
"../common/dump/dump_op.cc"
"../common/dump/opdebug_register.cc"
"../common/profiling/ge_profiling.cc"
"../graph/load/graph_loader.cc"
"../graph/execute/graph_execute.cc"


+ 18
- 35
ge/graph/load/model_manager/data_dumper.h View File

@@ -36,21 +36,9 @@
namespace ge {
class DataDumper {
public:
explicit DataDumper(const RuntimeParam &rsh)
: model_name_(),
model_id_(0),
runtime_param_(rsh),
dev_mem_load_(nullptr),
dev_mem_unload_(nullptr),
op_list_(),
input_map_(),
load_flag_(false),
device_id_(0),
global_step_(0),
loop_per_iter_(0),
loop_cond_(0),
compute_graph_(nullptr),
ref_info_() {}
DataDumper() : runtime_param_{} {}

explicit DataDumper(const RuntimeParam &rsh) : runtime_param_(rsh) {}

~DataDumper();

@@ -105,10 +93,10 @@ class DataDumper {
// for inference data dump
std::string om_name_;

uint32_t model_id_;
uint32_t model_id_ = 0;
const RuntimeParam &runtime_param_;
void *dev_mem_load_;
void *dev_mem_unload_;
void *dev_mem_load_ = nullptr;
void *dev_mem_unload_ = nullptr;

struct InnerDumpInfo;
struct InnerInputMapping;
@@ -119,16 +107,15 @@ class DataDumper {
uint32_t end_graph_stream_id_ = 0;
bool is_end_graph_ = false;
std::multimap<std::string, InnerInputMapping> input_map_; // release after DavinciModel::Init
bool load_flag_;
uint32_t device_id_;
uintptr_t global_step_;
uintptr_t loop_per_iter_;
uintptr_t loop_cond_;
ComputeGraphPtr compute_graph_; // release after DavinciModel::Init
std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init
bool load_flag_ = false;
uint32_t device_id_ = 0;
uintptr_t global_step_ = 0;
uintptr_t loop_per_iter_ = 0;
uintptr_t loop_cond_ = 0;
ComputeGraphPtr compute_graph_ = nullptr; // release after DavinciModel::Init
std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init
void *l1_fusion_addr_ = nullptr;


uint32_t op_debug_task_id_ = 0;
uint32_t op_debug_stream_id_ = 0;
void *op_debug_addr_ = nullptr;
@@ -144,20 +131,16 @@ class DataDumper {
Status DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task);
Status DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task);
Status DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i,
const std::string &node_name_index);
const std::string &node_name_index);
Status ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info);
void SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, aicpu::dump::OpMappingInfo &op_mapping_info);
void SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr,
aicpu::dump::OpMappingInfo &op_mapping_info);
Status ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info);
Status GenerateInput(aicpu::dump::Input &input,
const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
const uintptr_t &addr,
size_t index);
Status GenerateOutput(aicpu::dump::Output &output,
const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
const uintptr_t &addr,
size_t index);
Status GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
const uintptr_t &addr, size_t index);
Status GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
const uintptr_t &addr, size_t index);
void GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task);
};
struct DataDumper::InnerDumpInfo {


+ 9
- 65
ge/graph/load/model_manager/davinci_model.cc View File

@@ -232,6 +232,8 @@ DavinciModel::~DavinciModel() {

FreeP2PMem();

OpDebugUnRegister();

if (l1_fusion_addr_ != nullptr) {
GE_CHK_RT(rtFree(l1_fusion_addr_));
}
@@ -242,8 +244,6 @@ DavinciModel::~DavinciModel() {
}
}

OpDebugUnRegister();

ReleaseTask();
CleanTbeHandle();

@@ -568,77 +568,21 @@ Status DavinciModel::SetTSDevice() {
}

Status DavinciModel::OpDebugRegister() {
bool is_op_debug = false;
(void)ge::AttrUtils::GetBool(ge_model_, ATTR_OP_DEBUG_FLAG, is_op_debug);
GELOGD("The value of op debug in ge_model is %d.", is_op_debug);
if (is_op_debug) {
debug_reg_mutex_.lock();
rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

uint64_t debug_addrs_tmp = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr_));

// For data dump, aicpu needs the pointer to pointer that save the real debug address.
rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

uint32_t op_debug_mode = 0;
(void)ge::AttrUtils::GetInt(ge_model_, ATTR_OP_DEBUG_MODE, op_debug_mode);
GELOGD("The value of op_debug_mode in ge_model_ is %u.", op_debug_mode);
uint32_t debug_task_id = 0;
uint32_t debug_stream_id = 0;
rt_ret = rtDebugRegister(rt_model_handle_, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
if (GetDumpProperties().IsOpDebugOpen()) {
uint32_t op_debug_mode = GetDumpProperties().GetOpDebugMode();
auto ret = opdebug_register_.RegisterDebugForModel(rt_model_handle_, op_debug_mode, data_dumper_);
if (ret != SUCCESS) {
GELOGE(ret,"Register known shape op debug failed, ret: 0x%X",ret);
return ret;
}
GELOGI("debug_task_id:%d, debug_stream_id:%u", debug_task_id, debug_stream_id);
is_op_debug_reg_ = true;

data_dumper_.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, is_op_debug);
}

return SUCCESS;
}

void DavinciModel::OpDebugUnRegister() {
if (is_op_debug_reg_) {
debug_reg_mutex_.unlock();
rtError_t rt_ret = RT_ERROR_NONE;
if (rt_model_handle_ != nullptr) {
GELOGD("start call debug_unregister.");
rt_ret = rtDebugUnRegister(rt_model_handle_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret);
}
}

if (op_debug_addr_ != nullptr) {
rt_ret = rtFree(op_debug_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree failed, ret: 0x%X", rt_ret);
}
op_debug_addr_ = nullptr;
}

if (p2p_debug_addr_ != nullptr) {
rt_ret = rtFree(p2p_debug_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree failed, ret: 0x%X", rt_ret);
}
p2p_debug_addr_ = nullptr;
}
opdebug_register_.UnregisterDebugForModel(rt_model_handle_);
is_op_debug_reg_ = false;
}
return;


+ 2
- 2
ge/graph/load/model_manager/davinci_model.h View File

@@ -29,6 +29,7 @@
#include "common/helper/om_file_helper.h"
#include "common/opskernel/ge_task_info.h"
#include "common/properties_manager.h"
#include "common/dump/opdebug_register.h"
#include "common/types.h"
#include "framework/common/util.h"
#include "graph/debug/ge_attr_define.h"
@@ -984,6 +985,7 @@ class DavinciModel {
int64_t maxDumpOpNum_;
// for data dump
DataDumper data_dumper_;
OpdebugRegister opdebug_register_;
uint64_t iterator_count_;
bool is_l1_fusion_enable_;
map<OpDescPtr, void *> saved_task_addrs_; // release after DavinciModel::Init
@@ -1021,8 +1023,6 @@ class DavinciModel {
// for op debug
mutex debug_reg_mutex_;
bool is_op_debug_reg_ = false;
void *op_debug_addr_ = nullptr;
void *p2p_debug_addr_ = nullptr;
bool is_online_infer_dynamic_ = false;
bool is_getnext_sink_dynamic_ = false;
vector<int32_t> cur_dynamic_dims_;


+ 40
- 0
ge/hybrid/executor/hybrid_model_async_executor.cc View File

@@ -85,6 +85,10 @@ Status HybridModelAsyncExecutor::Stop() {
ret = future_.get();
}

if (is_op_debug_reg_) {
op_debug_register_.UnregisterDebugForStream(stream_);
}

if (stream_ != nullptr) {
GE_CHK_RT(rtStreamDestroy(stream_));
stream_ = nullptr;
@@ -101,6 +105,7 @@ Status HybridModelAsyncExecutor::Init() {
executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_));
GE_CHECK_NOTNULL(executor_);
GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine");
GE_CHK_STATUS_RET(DumpOpDebug(),"Dump op debug failed in hybrid engine");

GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups());
if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) {
@@ -508,5 +513,40 @@ Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<

return SUCCESS;
}
Status HybridModelAsyncExecutor::DumpOpDebug() {
const DumpProperties &dump_properties = executor_->GetContext()->dump_properties;
if (dump_properties.IsOpDebugOpen()) {
GELOGD("Opdebug is open in hybrid engine");
uint32_t op_debug_mode = dump_properties.GetOpDebugMode();
GE_CHK_RT_RET(op_debug_register_.RegisterDebugForStream(stream_, op_debug_mode, data_dumper_));
is_op_debug_reg_ = true;
data_dumper_.SetDumpProperties(dump_properties);
data_dumper_.SetModelName(model_->GetModelName());
data_dumper_.SetModelId(model_->GetModelId());
data_dumper_.SetDeviceId(model_->GetDeviceId());
void *global_step = nullptr;
TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP);
if (varible_global_step != nullptr) {
global_step = const_cast<void *>(varible_global_step->GetData());
}

void *loop_per_iter = nullptr;
TensorValue *varible_loop_per_iter = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER);
if (varible_loop_per_iter != nullptr) {
loop_per_iter = const_cast<void *>(varible_loop_per_iter->GetData());
}

void *loop_cond = nullptr;
TensorValue *varible_loop_cond = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_COND);
if (varible_loop_cond != nullptr) {
loop_cond = const_cast<void *>(varible_loop_cond->GetData());
}
data_dumper_.SetLoopAddr(global_step, loop_per_iter, loop_cond);
GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "LoadDumpInfo failed in hybrid engine");
GELOGD("Dump op debug SUCCESS in hybrid engine");
}
return SUCCESS;
}

} // namespace hybrid
} // namespace ge

+ 7
- 0
ge/hybrid/executor/hybrid_model_async_executor.h View File

@@ -21,7 +21,9 @@
#include <future>
#include "external/ge/ge_api_error_codes.h"
#include "external/ge/ge_api_types.h"
#include "common/dump/opdebug_register.h"
#include "graph/load/model_manager/data_inputer.h"
#include "graph/load/model_manager/data_dumper.h"
#include "hybrid/executor/hybrid_model_executor.h"
#include "hybrid/executor/hybrid_model_pipeline_executor.h"
#include "runtime/stream.h"
@@ -77,6 +79,8 @@ class HybridModelAsyncExecutor {

Status PrepareInputs(const InputData &current_data, HybridModelExecutor::ExecuteArgs &args);

Status DumpOpDebug();

std::mutex mu_;
HybridModel *model_;
uint32_t device_id_ = 0U;
@@ -94,6 +98,9 @@ class HybridModelAsyncExecutor {
std::vector<bool> is_input_dynamic_;
std::shared_ptr<ModelListener> listener_;
string om_name_;
DataDumper data_dumper_;
bool is_op_debug_reg_ = false;
OpdebugRegister op_debug_register_;
};
} // namespace hybrid
} // namespace ge


+ 3
- 3
ge/hybrid/executor/worker/execution_engine.cc View File

@@ -266,9 +266,9 @@ Status NodeDoneCallback::OnNodeDone() {
RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[Compute] End");
RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[Callback] Start");

auto dump_path = context_->GetDumpProperties().GetDumpPath();
if (!dump_path.empty()) {
GELOGI("Start to dump dynamic shape,dump_path is %s", dump_path.c_str());
const DumpProperties &dump_properties = context_->GetDumpProperties();
if (dump_properties.IsDumpOpen() || context_->IsOverFlow()) {
GELOGI("Start to dump dynamic shape op");
GE_CHK_STATUS_RET(DumpDynamicNode(), "Failed to dump dynamic node");
}



+ 4
- 0
ge/hybrid/model/hybrid_model.h View File

@@ -61,6 +61,10 @@ class HybridModel {
device_id_ = device_id;
}

uint32_t GetDeviceId() {
return device_id_;
}

void SetModelId(uint32_t model_id) {
model_id_ = model_id;
}


+ 21
- 0
ge/hybrid/node_executor/aicore/aicore_node_executor.cc View File

@@ -17,6 +17,7 @@
#include "aicore_node_executor.h"
#include "framework/common/taskdown_common.h"
#include "hybrid/executor/hybrid_execution_context.h"
#include "external/runtime/rt_error_codes.h"

namespace ge {
namespace hybrid {
@@ -189,6 +190,7 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
}
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start");
GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream()));
GE_CHK_STATUS_RET_NOLOG(CheckOverflow(context));
// save profiling data
uint32_t task_id = 0;
uint32_t stream_id = 0;
@@ -259,6 +261,25 @@ void AiCoreNodeTask::SetWorkspaceSizes(const vector<int64_t> &workspace_sizes) {
workspace_sizes_ = workspace_sizes;
}

Status AiCoreNodeTask::CheckOverflow(TaskContext &context) {
const DumpProperties &dump_properties = context.GetDumpProperties();
if (dump_properties.IsOpDebugOpen()) {
GELOGD("Op %s is doing overflow check in hybrid engine", context.GetNodeName());
auto rt_ret = rtStreamSynchronize(context.GetStream());
if (rt_ret == ACL_ERROR_RT_AICORE_OVER_FLOW) {
context.SetOverFlow(true);
GELOGW("Dynamic shape op %s is over flow", context.GetNodeName());
return SUCCESS;
} else if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtstreamsynchronize failed");
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
return SUCCESS;
}
GELOGD("Opdebug is not open in hybrid engine");
return SUCCESS;
}

TaskCompilerFactory &TaskCompilerFactory::GetInstance() {
static TaskCompilerFactory instance;
return instance;


+ 1
- 0
ge/hybrid/node_executor/aicore/aicore_node_executor.h View File

@@ -62,6 +62,7 @@ class AiCoreNodeTask : public NodeTask {
const vector<int64_t> &GetWorkspaceSizes() const;
void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes);
private:
Status CheckOverflow(TaskContext &context);
std::vector<std::unique_ptr<AiCoreOpTask>> tasks_;
std::vector<int64_t> workspace_sizes_;
};


+ 1
- 1
ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc View File

@@ -124,7 +124,7 @@ Status KnownNodeTask::Init(TaskContext &context) {
}
if (!load_flag_) {
auto dump_properties = context.GetDumpProperties();
if (dump_properties.IsDumpOpen()) {
if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) {
davinci_model_->SetDumpProperties(dump_properties);
void *global_step = nullptr;
TensorValue *varible_global_step = context.GetVariable(NODE_NAME_GLOBAL_STEP);


+ 8
- 0
ge/hybrid/node_executor/task_context.cc View File

@@ -350,6 +350,14 @@ void TaskContext::SetStreamId(uint32_t stream_id) {
stream_id_ = stream_id;
}

void TaskContext::SetOverFlow(bool is_over_flow) {
is_over_flow_ = is_over_flow;
}

bool TaskContext::IsOverFlow() {
return is_over_flow_;
}

Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) {
GE_CHECK_NOTNULL(buffer);
if (ori_addr == nullptr) {


+ 5
- 0
ge/hybrid/node_executor/task_context.h View File

@@ -65,6 +65,7 @@ class TaskContext {
int64_t GetSessionId() const;
uint64_t GetIterationNumber() const;


void NodeDone();
void OnError(Status error);

@@ -106,6 +107,9 @@ class TaskContext {
uint32_t GetStreamId() const;
void SetStreamId(uint32_t stream_id);

void SetOverFlow(bool is_over_flow);
bool IsOverFlow();

Status Synchronize();

bool IsForceInferShape() const;
@@ -138,6 +142,7 @@ class TaskContext {
uint32_t task_id_ = 0;
uint32_t stream_id_ = 0;
std::vector<TaskDescInfo> task_desc_info;
bool is_over_flow_ = false;
};
} // namespace hybrid
} // namespace ge


+ 12
- 14
ge/single_op/task/op_task.cc View File

@@ -491,21 +491,18 @@ Status AiCpuBaseTask::UpdateOutputShape(vector<GeTensorDesc> &output_desc) {
}
GELOGD("Start to update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape.");

GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(),
aicpu_ext_handle_->GetExtInfoLen(),
ext_info_addr_dev_,
aicpu_ext_handle_->GetExtInfoLen(),
RT_MEMCPY_DEVICE_TO_HOST));
GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), ext_info_addr_dev_,
aicpu_ext_handle_->GetExtInfoLen(), RT_MEMCPY_DEVICE_TO_HOST));

for (size_t i = 0; i < num_outputs_; ++i) {
GeShape shape;
DataType data_type;
aicpu_ext_handle_->GetOutputShapeAndType(i, shape, data_type);
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]),
"AiCpuCCTask Update [%zu]th output shape failed.", i);
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), "AiCpuCCTask Update [%zu]th output shape failed.",
i);
if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) {
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]),
"AiCpuCCTask Update [%zu]th output desc failed.", i);
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "AiCpuCCTask Update [%zu]th output desc failed.",
i);
}
}
GELOGD("Update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape finished.");
@@ -697,10 +694,10 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) {
const auto &shape_hbm = out_shape_hbm_[i];

uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t);
std::unique_ptr<int64_t[]> shape_addr(new(std::nothrow) int64_t[dim_num]());
std::unique_ptr<int64_t[]> shape_addr(new (std::nothrow) int64_t[dim_num]());
GE_CHECK_NOTNULL(shape_addr);
GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size,
shape_hbm, result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST));
GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm,
result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST));

for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) {
shape_dims.emplace_back(shape_addr[dim_idx]);
@@ -711,13 +708,14 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) {
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]),
"AiCpuTask update [%zu]th output shape failed.", i);
if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) {
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]),
"AiCpuTask update [%zu]th output desc failed.", i);
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "AiCpuTask update [%zu]th output desc failed.",
i);
}
}
return SUCCESS;
}


Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc,
vector<DataBuffer> &outputs,
rtStream_t stream) {


+ 4
- 0
tests/depends/runtime/src/runtime_stub.cc View File

@@ -431,3 +431,7 @@ rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId)
{
return RT_ERROR_NONE;
}

rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, const void *addr, uint32_t *streamId, uint32_t *taskId) {
return RT_ERROR_NONE;
}

+ 2
- 0
tests/ut/ge/CMakeLists.txt View File

@@ -162,6 +162,7 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/common/dump/dump_properties.cc"
"${GE_CODE_DIR}/ge/common/helper/model_helper.cc"
"${GE_CODE_DIR}/ge/common/dump/dump_manager.cc"
"${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc"
"${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc"
"${GE_CODE_DIR}/ge/model/ge_root_model.cc"
"${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc"
@@ -734,6 +735,7 @@ set(MULTI_PARTS_TEST_FILES
"graph/transop_util_unittest.cc"
"common/datatype_transfer_unittest.cc"
"common/dump_manager_unittest.cc"
"common/opdebug_register_unittest.cc"
"common/format_transfer_unittest.cc"
"common/format_transfer_transpose_unittest.cc"
"common/format_transfer_nchw_5d_unittest.cc"


+ 51
- 0
tests/ut/ge/common/opdebug_register_unittest.cc View File

@@ -0,0 +1,51 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>

#include "common/dump/opdebug_register.h"
#include "common/debug/log.h"
#include "common/ge_inner_error_codes.h"

namespace ge {
class UTEST_opdebug_register : public testing::Test {
protected:
void SetUp() {}
void TearDown() {}
};
TEST_F(UTEST_opdebug_register, register_debug_for_model_success) {
OpdebugRegister opdebug_register;
rtModel_t model_handle = (void*)0x111;
uint32_t op_debug_mode = 1;
DataDumper data_dumper;
auto ret = opdebug_register.RegisterDebugForModel(model_handle, op_debug_mode, data_dumper);
opdebug_register.UnregisterDebugForModel(model_handle);
EXPECT_EQ(ret, ge::SUCCESS);
}

TEST_F(UTEST_opdebug_register, register_debug_for_stream_success) {
OpdebugRegister opdebug_register;
rtStream_t stream = (void*)0x111;
uint32_t op_debug_mode = 1;
DataDumper data_dumper;
auto ret = opdebug_register.RegisterDebugForStream(stream, op_debug_mode, data_dumper);
opdebug_register.UnregisterDebugForStream(stream);
EXPECT_EQ(ret, ge::SUCCESS);
}


} // namespace ge

Loading…
Cancel
Save