| @@ -108,6 +108,7 @@ set(TRAIN_SRC_LIST | |||||
| "common/helper/model_cache_helper.cc" | "common/helper/model_cache_helper.cc" | ||||
| "common/profiling/profiling_manager.cc" | "common/profiling/profiling_manager.cc" | ||||
| "common/dump/dump_manager.cc" | "common/dump/dump_manager.cc" | ||||
| "common/dump/exception_dumper.cc" | |||||
| "common/dump/dump_properties.cc" | "common/dump/dump_properties.cc" | ||||
| "common/dump/opdebug_register.cc" | "common/dump/opdebug_register.cc" | ||||
| "common/dump/dump_op.cc" | "common/dump/dump_op.cc" | ||||
| @@ -437,6 +438,7 @@ set(INFER_SRC_LIST | |||||
| "common/formats/formats.cc" | "common/formats/formats.cc" | ||||
| "common/profiling/profiling_manager.cc" | "common/profiling/profiling_manager.cc" | ||||
| "common/dump/dump_properties.cc" | "common/dump/dump_properties.cc" | ||||
| "common/dump/exception_dumper.cc" | |||||
| "common/dump/dump_manager.cc" | "common/dump/dump_manager.cc" | ||||
| "common/dump/dump_op.cc" | "common/dump/dump_op.cc" | ||||
| "common/dump/opdebug_register.cc" | "common/dump/opdebug_register.cc" | ||||
| @@ -161,7 +161,7 @@ int MemoryDumper::OpenFile(const char *filename) { | |||||
| // Using the O_EXCL, if the file already exists,return failed to avoid privilege escalation vulnerability. | // Using the O_EXCL, if the file already exists,return failed to avoid privilege escalation vulnerability. | ||||
| mmMode_t mode = M_IRUSR | M_IWUSR; | mmMode_t mode = M_IRUSR | M_IWUSR; | ||||
| int32_t fd = mmOpen2(real_path.c_str(), M_RDWR | M_CREAT | O_TRUNC, mode); | |||||
| int32_t fd = mmOpen2(real_path.c_str(), M_RDWR | M_CREAT | M_APPEND, mode); | |||||
| if (fd == EN_ERROR || fd == EN_INVALID_PARAM) { | if (fd == EN_ERROR || fd == EN_INVALID_PARAM) { | ||||
| GELOGE(kInvalidFd, "[Open][File]Failed. errno = %d, error:%s, filename:%s.", | GELOGE(kInvalidFd, "[Open][File]Failed. errno = %d, error:%s, filename:%s.", | ||||
| fd, strerror(errno), filename); | fd, strerror(errno), filename); | ||||
| @@ -0,0 +1,241 @@ | |||||
| /** | |||||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "common/dump/exception_dumper.h" | |||||
| #include "common/ge/datatype_util.h" | |||||
| #include "common/debug/memory_dumper.h" | |||||
| #include "framework/common/debug/log.h" | |||||
| #include "graph/manager/util/debug.h" | |||||
| #include "graph/utils/tensor_utils.h" | |||||
| #include "graph/load/model_manager/model_utils.h" | |||||
| #include "proto/dump_task.pb.h" | |||||
| namespace { | |||||
| static uint64_t GetNowTime() { | |||||
| uint64_t ret = 0; | |||||
| mmTimeval tv; | |||||
| if (mmGetTimeOfDay(&tv, nullptr) == 0) { | |||||
| ret = tv.tv_sec * 1000000ULL + tv.tv_usec; | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| static void ReplaceStringElem(std::string &str) { | |||||
| for_each(str.begin(), str.end(), [](char &ch) { | |||||
| if ((ch == ' ') || (ch == '.') || (ch == '/') || (ch == '\\')) { | |||||
| ch = '_'; | |||||
| } | |||||
| }); | |||||
| } | |||||
| static void SetDumpData(const ge::OpDescInfo &op_desc_info, toolkit::dumpdata::DumpData &dump_data) { | |||||
| dump_data.set_version("2.0"); | |||||
| dump_data.set_dump_time(GetNowTime()); | |||||
| dump_data.set_op_name(op_desc_info.op_name); | |||||
| for (size_t i = 0; i < op_desc_info.input_format.size(); ++i) { | |||||
| toolkit::dumpdata::OpInput input; | |||||
| input.set_data_type(toolkit::dumpdata::OutputDataType( | |||||
| ge::DataTypeUtil::GetIrDataType(op_desc_info.input_data_type[i]))); | |||||
| input.set_format(toolkit::dumpdata::OutputFormat(op_desc_info.input_format[i])); | |||||
| for (auto dim : op_desc_info.input_shape[i]) { | |||||
| input.mutable_shape()->add_dim(dim); | |||||
| } | |||||
| input.set_size(op_desc_info.input_size[i]); | |||||
| GELOGI("[Set][DumpData] The input size int exception is %ld", op_desc_info.input_size[i]); | |||||
| dump_data.mutable_input()->Add(std::move(input)); | |||||
| } | |||||
| for (size_t j = 0; j < op_desc_info.output_format.size(); ++j) { | |||||
| toolkit::dumpdata::OpOutput output; | |||||
| output.set_data_type(toolkit::dumpdata::OutputDataType( | |||||
| ge::DataTypeUtil::GetIrDataType(op_desc_info.output_data_type[j]))); | |||||
| output.set_format(toolkit::dumpdata::OutputFormat(op_desc_info.output_format[j])); | |||||
| for (auto dim : op_desc_info.output_shape[j]) { | |||||
| output.mutable_shape()->add_dim(dim); | |||||
| } | |||||
| output.set_size(op_desc_info.output_size[j]); | |||||
| GELOGI("[Set][DumpData] The output size int exception is %ld", op_desc_info.output_size[j]); | |||||
| dump_data.mutable_output()->Add(std::move(output)); | |||||
| } | |||||
| } | |||||
| } // namespace | |||||
| namespace ge { | |||||
| ExceptionDumper::~ExceptionDumper() {} | |||||
| void ExceptionDumper::SaveDumpOpInfo(const OpDescPtr &op, uint32_t task_id, uint32_t stream_id, | |||||
| vector<void *> &input_addrs, vector<void *> &output_addrs) { | |||||
| OpDescInfo op_desc_info; | |||||
| SaveOpDescInfo(op, task_id, stream_id, op_desc_info); | |||||
| op_desc_info.input_addrs = input_addrs; | |||||
| op_desc_info.output_addrs = output_addrs; | |||||
| op_desc_info_.emplace_back(std::move(op_desc_info)); | |||||
| } | |||||
| void ExceptionDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, | |||||
| uint32_t task_id, uint32_t stream_id) { | |||||
| OpDescInfo op_desc_info; | |||||
| SaveOpDescInfo(op, task_id, stream_id, op_desc_info); | |||||
| op_desc_info.input_addrs = ModelUtils::GetInputDataAddrs(model_param, op); | |||||
| op_desc_info.output_addrs = ModelUtils::GetOutputDataAddrs(model_param, op); | |||||
| op_desc_info_.emplace_back(std::move(op_desc_info)); | |||||
| } | |||||
| void ExceptionDumper::SaveOpDescInfo(const OpDescPtr &op, uint32_t task_id, uint32_t stream_id, | |||||
| OpDescInfo &op_desc_info) { | |||||
| if (op == nullptr) { | |||||
| GELOGW("[Save][OpExceptionInfo] op desc ptr is null."); | |||||
| return; | |||||
| } | |||||
| GELOGD("[Save][OpExceptionInfo] Start to save dump op [%s] info of task_id: %u, stream_id: %u", | |||||
| op->GetName().c_str(), task_id, stream_id); | |||||
| op_desc_info.op_name = op->GetName(); | |||||
| op_desc_info.op_type = op->GetType(); | |||||
| op_desc_info.task_id = task_id; | |||||
| op_desc_info.stream_id = stream_id; | |||||
| for (size_t i = 0; i < op->GetAllInputsSize(); ++i) { | |||||
| GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i); | |||||
| if (input_tensor_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| op_desc_info.input_format.emplace_back(input_tensor_desc->GetFormat()); | |||||
| op_desc_info.input_shape.emplace_back(input_tensor_desc->GetShape().GetDims()); | |||||
| op_desc_info.input_data_type.emplace_back(input_tensor_desc->GetDataType()); | |||||
| int64_t input_size = 0; | |||||
| if (TensorUtils::GetTensorSizeInBytes(*input_tensor_desc, input_size) != SUCCESS) { | |||||
| GELOGW("[Save][OpExceptionInfo] Op [%s] get input size failed.", op->GetName().c_str()); | |||||
| return; | |||||
| } | |||||
| GELOGD("[Save][OpExceptionInfo] Save dump op info, the input size is %ld", input_size); | |||||
| op_desc_info.input_size.emplace_back(input_size); | |||||
| } | |||||
| for (size_t j = 0; j < op->GetOutputsSize(); ++j) { | |||||
| GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j); | |||||
| if (output_tensor_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| op_desc_info.output_format.emplace_back(output_tensor_desc->GetFormat()); | |||||
| op_desc_info.output_shape.emplace_back(output_tensor_desc->GetShape().GetDims()); | |||||
| op_desc_info.output_data_type.emplace_back(output_tensor_desc->GetDataType()); | |||||
| int64_t output_size = 0; | |||||
| if (TensorUtils::GetTensorSizeInBytes(*output_tensor_desc, output_size) != SUCCESS) { | |||||
| GELOGW("[Save][OpExceptionInfo] Op [%s] get output size failed.", op->GetName().c_str()); | |||||
| return; | |||||
| } | |||||
| GELOGD("[Save][OpExceptionInfo] Save dump op info, the output size is %ld.", output_size); | |||||
| op_desc_info.output_size.emplace_back(output_size); | |||||
| } | |||||
| } | |||||
| Status ExceptionDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> &exception_infos) const { | |||||
| GELOGI("[Dump][Exception] Start to dump exception info"); | |||||
| for (const rtExceptionInfo &iter : exception_infos) { | |||||
| OpDescInfo op_desc_info; | |||||
| if (GetOpDescInfo(iter.streamid, iter.taskid, op_desc_info)) { | |||||
| toolkit::dumpdata::DumpData dump_data; | |||||
| SetDumpData(op_desc_info, dump_data); | |||||
| uint64_t now_time = GetNowTime(); | |||||
| std::string op_name = op_desc_info.op_name; | |||||
| std::string op_type = op_desc_info.op_type; | |||||
| ReplaceStringElem(op_name); | |||||
| ReplaceStringElem(op_type); | |||||
| string dump_file_path = | |||||
| "./" + op_type + "." + op_name + "." + std::to_string(op_desc_info.task_id) + "." + std::to_string(now_time); | |||||
| GELOGI("[Dump][Exception] The exception dump file path is %s", dump_file_path.c_str()); | |||||
| uint64_t proto_size = dump_data.ByteSizeLong(); | |||||
| std::unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]); | |||||
| bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size); | |||||
| if (!ret || proto_size == 0) { | |||||
| REPORT_INNER_ERROR("E19999", "Serialize proto to string fail"); | |||||
| GELOGE(PARAM_INVALID, "[Dump][Exception] Dump data proto serialize failed"); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| GE_CHK_STATUS_RET(MemoryDumper::DumpToFile(dump_file_path.c_str(), &proto_size, sizeof(uint64_t)), | |||||
| "Failed to dump proto size"); | |||||
| GE_CHK_STATUS_RET(MemoryDumper::DumpToFile(dump_file_path.c_str(), proto_msg.get(), proto_size), | |||||
| "Failed to dump proto msg"); | |||||
| if (DumpExceptionInput(op_desc_info, dump_file_path) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "[Dump][Exception] Dump exception input failed"); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| if (DumpExceptionOutput(op_desc_info, dump_file_path) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "[Dump][Exception] Dump exception output failed"); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| GELOGI("[Dump][Exception] Dump exception info SUCCESS"); | |||||
| } else { | |||||
| GELOGE(PARAM_INVALID, "[Dump][Exception] Get op desc info failed,task id:%u,stream id:%u", | |||||
| iter.taskid, iter.streamid); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| bool ExceptionDumper::GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { | |||||
| GELOGI("[Get][OpDescInfo] There are %zu op need to dump.", op_desc_info_.size()); | |||||
| for (size_t index = 0; index < op_desc_info_.size(); ++index) { | |||||
| OpDescInfo dump_op_info = op_desc_info_.at(index); | |||||
| if (dump_op_info.task_id == task_id && dump_op_info.stream_id == stream_id) { | |||||
| GELOGI("[Get][OpDescInfo] Find exception op [%s] of task_id: %u, stream_id: %u.", | |||||
| dump_op_info.op_name.c_str(), task_id, stream_id); | |||||
| op_desc_info = dump_op_info; | |||||
| return true; | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| Status ExceptionDumper::DumpExceptionInput(const OpDescInfo &op_desc_info, const string &dump_file) const { | |||||
| GELOGI("[Dump][ExceptionInput] Start to dump exception input"); | |||||
| for (size_t i = 0; i < op_desc_info.input_addrs.size(); i++) { | |||||
| if (Debug::DumpDevMem(dump_file.data(), op_desc_info.input_addrs.at(i), op_desc_info.input_size.at(i)) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "[Dump][ExceptionInput] Dump the %zu input data of op [%s] failed", | |||||
| i, op_desc_info.op_name.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ExceptionDumper::DumpExceptionOutput(const OpDescInfo &op_desc_info, const string &dump_file) const { | |||||
| GELOGI("[Dump][ExceptionOutput] Start to dump exception output"); | |||||
| for (size_t i = 0; i < op_desc_info.output_addrs.size(); i++) { | |||||
| if (Debug::DumpDevMem(dump_file.data(), op_desc_info.output_addrs.at(i), op_desc_info.output_size.at(i)) != | |||||
| SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "[Dump][ExceptionInput] Dump the %zu input data of op [%s] failed", | |||||
| i, op_desc_info.op_name.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| OpDescInfo *ExceptionDumper::MutableOpDescInfo(uint32_t task_id, uint32_t stream_id) { | |||||
| for (OpDescInfo &op_desc_info : op_desc_info_) { | |||||
| if (op_desc_info.task_id == task_id && op_desc_info.stream_id == stream_id) { | |||||
| return &op_desc_info; | |||||
| } | |||||
| } | |||||
| return nullptr; | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,48 @@ | |||||
| /** | |||||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_COMMON_DUMP_EXCEPTION_DUMPER_H_ | |||||
| #define GE_COMMON_DUMP_EXCEPTION_DUMPER_H_ | |||||
| #include <vector> | |||||
| #include "graph/op_desc.h" | |||||
| #include "framework/common/ge_types.h" | |||||
| #include "graph/load/model_manager/task_info/task_info.h" | |||||
| namespace ge { | |||||
| class ExceptionDumper { | |||||
| public: | |||||
| ExceptionDumper() = default; | |||||
| ~ExceptionDumper(); | |||||
| void SaveDumpOpInfo(const OpDescPtr &op, uint32_t task_id, uint32_t stream_id, | |||||
| std::vector<void *> &input_addrs, std::vector<void *> &output_addrs); | |||||
| void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id); | |||||
| Status DumpExceptionInfo(const std::vector<rtExceptionInfo> &exception_infos) const; | |||||
| bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const; | |||||
| OpDescInfo *MutableOpDescInfo(uint32_t task_id, uint32_t stream_id); | |||||
| private: | |||||
| void SaveOpDescInfo(const OpDescPtr &op, uint32_t task_id, uint32_t stream_id, OpDescInfo &op_desc_info); | |||||
| Status DumpExceptionInput(const OpDescInfo &op_desc_info, const std::string &dump_file) const; | |||||
| Status DumpExceptionOutput(const OpDescInfo &op_desc_info, const std::string &dump_file) const; | |||||
| std::vector<OpDescInfo> op_desc_info_; | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_COMMON_DUMP_EXCEPTION_DUMPER_H_ | |||||
| @@ -16,6 +16,7 @@ set(SRC_LIST | |||||
| "../common/ge/plugin_manager.cc" | "../common/ge/plugin_manager.cc" | ||||
| "../common/ge/op_tiling_manager.cc" | "../common/ge/op_tiling_manager.cc" | ||||
| "../common/dump/dump_properties.cc" | "../common/dump/dump_properties.cc" | ||||
| "../common/dump/exception_dumper.cc" | |||||
| "../common/dump/dump_manager.cc" | "../common/dump/dump_manager.cc" | ||||
| "../common/dump/dump_op.cc" | "../common/dump/dump_op.cc" | ||||
| "../common/dump/opdebug_register.cc" | "../common/dump/opdebug_register.cc" | ||||
| @@ -72,24 +72,6 @@ static bool ParseNameIndex(const std::string &node_name_index, std::string &node | |||||
| static bool IsTensorDescWithSkipDumpAddrType(bool has_mem_type_attr, vector<int64_t> v_memory_type, size_t i) { | static bool IsTensorDescWithSkipDumpAddrType(bool has_mem_type_attr, vector<int64_t> v_memory_type, size_t i) { | ||||
| return has_mem_type_attr && (v_memory_type[i] == RT_MEMORY_L1); | return has_mem_type_attr && (v_memory_type[i] == RT_MEMORY_L1); | ||||
| } | } | ||||
| static uint64_t GetNowTime() { | |||||
| uint64_t ret = 0; | |||||
| mmTimeval tv; | |||||
| if (mmGetTimeOfDay(&tv, nullptr) == 0) { | |||||
| ret = tv.tv_sec * 1000000ULL + tv.tv_usec; | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| static void ReplaceStringElem(std::string &str) { | |||||
| for_each(str.begin(), str.end(), [](char &ch) { | |||||
| if ((ch == ' ') || (ch == '.') || (ch == '/') || (ch == '\\')) { | |||||
| ch = '_'; | |||||
| } | |||||
| }); | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| static int32_t GetIrDataType(ge::DataType data_type) { | static int32_t GetIrDataType(ge::DataType data_type) { | ||||
| @@ -194,66 +176,6 @@ void DataDumper::SaveOpDebugId(uint32_t task_id, uint32_t stream_id, void *op_de | |||||
| is_op_debug_ = is_op_debug; | is_op_debug_ = is_op_debug; | ||||
| } | } | ||||
| void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, | |||||
| uint32_t stream_id) { | |||||
| GELOGD("Start SaveDumpOpInfo of task_id: %u, stream_id: %u", task_id, stream_id); | |||||
| OpDescInfo op_desc_info; | |||||
| op_desc_info.op_name = op->GetName(); | |||||
| op_desc_info.op_type = op->GetType(); | |||||
| op_desc_info.task_id = task_id; | |||||
| op_desc_info.stream_id = stream_id; | |||||
| for (size_t i = 0; i < op->GetAllInputsSize(); ++i) { | |||||
| GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i); | |||||
| if (input_tensor_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| op_desc_info.input_format.emplace_back(input_tensor_desc->GetFormat()); | |||||
| op_desc_info.input_shape.emplace_back(input_tensor_desc->GetShape().GetDims()); | |||||
| op_desc_info.input_data_type.emplace_back(input_tensor_desc->GetDataType()); | |||||
| int64_t input_size = 0; | |||||
| if (TensorUtils::GetTensorSizeInBytes(*input_tensor_desc, input_size) != SUCCESS) { | |||||
| GELOGW("Get input size failed"); | |||||
| return; | |||||
| } | |||||
| GELOGD("Save dump op info, the input size is %ld", input_size); | |||||
| op_desc_info.input_size.emplace_back(input_size); | |||||
| } | |||||
| for (size_t j = 0; j < op->GetOutputsSize(); ++j) { | |||||
| GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j); | |||||
| if (output_tensor_desc == nullptr) { | |||||
| continue; | |||||
| } | |||||
| op_desc_info.output_format.emplace_back(output_tensor_desc->GetFormat()); | |||||
| op_desc_info.output_shape.emplace_back(output_tensor_desc->GetShape().GetDims()); | |||||
| op_desc_info.output_data_type.emplace_back(output_tensor_desc->GetDataType()); | |||||
| int64_t output_size = 0; | |||||
| if (TensorUtils::GetTensorSizeInBytes(*output_tensor_desc, output_size) != SUCCESS) { | |||||
| GELOGW("Get input size failed"); | |||||
| return; | |||||
| } | |||||
| GELOGD("Save dump op info, the output size is %ld", output_size); | |||||
| op_desc_info.output_size.emplace_back(output_size); | |||||
| } | |||||
| op_desc_info.input_addrs = ModelUtils::GetInputDataAddrs(model_param, op); | |||||
| op_desc_info.output_addrs = ModelUtils::GetOutputDataAddrs(model_param, op); | |||||
| op_desc_info_.emplace_back(op_desc_info); | |||||
| } | |||||
| bool DataDumper::GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { | |||||
| GELOGI("There are %zu op need to dump.", op_desc_info_.size()); | |||||
| for (size_t index = 0; index < op_desc_info_.size(); ++index) { | |||||
| OpDescInfo dump_op_info = op_desc_info_.at(index); | |||||
| if (dump_op_info.task_id == task_id && dump_op_info.stream_id == stream_id) { | |||||
| GELOGI("find exception op of task_id: %u, stream_id: %u.", task_id, stream_id); | |||||
| op_desc_info = dump_op_info; | |||||
| return true; | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, | void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, | ||||
| uintptr_t args) { | uintptr_t args) { | ||||
| if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
| @@ -904,98 +826,4 @@ void DataDumper::PrintCheckLog(string &dump_list_key) { | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| Status DataDumper::DumpExceptionInput(const OpDescInfo &op_desc_info, const string &dump_file) { | |||||
| GELOGI("Start to dump exception input"); | |||||
| for (size_t i = 0; i < op_desc_info.input_addrs.size(); i++) { | |||||
| if (Debug::DumpDevMem(dump_file.data(), op_desc_info.input_addrs.at(i), op_desc_info.input_size.at(i)) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Dump the %zu input data failed", i); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DataDumper::DumpExceptionOutput(const OpDescInfo &op_desc_info, const string &dump_file) { | |||||
| GELOGI("Start to dump exception output"); | |||||
| for (size_t i = 0; i < op_desc_info.output_addrs.size(); i++) { | |||||
| if (Debug::DumpDevMem(dump_file.data(), op_desc_info.output_addrs.at(i), op_desc_info.output_size.at(i)) != | |||||
| SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Dump the %zu input data failed", i); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DataDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> exception_infos) { | |||||
| GELOGI("Start to dump exception info"); | |||||
| for (const rtExceptionInfo &iter : exception_infos) { | |||||
| OpDescInfo op_desc_info; | |||||
| if (GetOpDescInfo(iter.streamid, iter.taskid, op_desc_info)) { | |||||
| toolkit::dumpdata::DumpData dump_data; | |||||
| dump_data.set_version("2.0"); | |||||
| dump_data.set_dump_time(GetNowTime()); | |||||
| dump_data.set_op_name(op_desc_info.op_name); | |||||
| for (size_t i = 0; i < op_desc_info.input_format.size(); ++i) { | |||||
| toolkit::dumpdata::OpInput input; | |||||
| input.set_data_type(toolkit::dumpdata::OutputDataType(GetIrDataType(op_desc_info.input_data_type[i]))); | |||||
| input.set_format(toolkit::dumpdata::OutputFormat(op_desc_info.input_format[i])); | |||||
| for (auto dim : op_desc_info.input_shape[i]) { | |||||
| input.mutable_shape()->add_dim(dim); | |||||
| } | |||||
| input.set_size(op_desc_info.input_size[i]); | |||||
| GELOGI("The input size int exception is %ld", op_desc_info.input_size[i]); | |||||
| dump_data.mutable_input()->Add(std::move(input)); | |||||
| } | |||||
| for (size_t j = 0; j < op_desc_info.output_format.size(); ++j) { | |||||
| toolkit::dumpdata::OpOutput output; | |||||
| output.set_data_type(toolkit::dumpdata::OutputDataType(GetIrDataType(op_desc_info.output_data_type[j]))); | |||||
| output.set_format(toolkit::dumpdata::OutputFormat(op_desc_info.output_format[j])); | |||||
| for (auto dim : op_desc_info.output_shape[j]) { | |||||
| output.mutable_shape()->add_dim(dim); | |||||
| } | |||||
| output.set_size(op_desc_info.output_size[j]); | |||||
| GELOGI("The output size int exception is %ld", op_desc_info.output_size[j]); | |||||
| dump_data.mutable_output()->Add(std::move(output)); | |||||
| } | |||||
| uint64_t now_time = GetNowTime(); | |||||
| std::string op_name = op_desc_info.op_name; | |||||
| std::string op_type = op_desc_info.op_type; | |||||
| ReplaceStringElem(op_name); | |||||
| ReplaceStringElem(op_type); | |||||
| string dump_file_path = | |||||
| "./" + op_type + "." + op_name + "." + std::to_string(op_desc_info.task_id) + "." + std::to_string(now_time); | |||||
| GELOGI("The exception dump file path is %s", dump_file_path.c_str()); | |||||
| uint64_t proto_size = dump_data.ByteSizeLong(); | |||||
| std::unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]); | |||||
| bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size); | |||||
| if (!ret || proto_size == 0) { | |||||
| REPORT_INNER_ERROR("E19999", "Serialize proto to string fail"); | |||||
| GELOGE(PARAM_INVALID, "Dump data proto serialize failed"); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| GE_CHK_STATUS_RET(MemoryDumper::DumpToFile(dump_file_path.c_str(), &proto_size, sizeof(uint64_t)), | |||||
| "Failed to dump proto size"); | |||||
| GE_CHK_STATUS_RET(MemoryDumper::DumpToFile(dump_file_path.c_str(), proto_msg.get(), proto_size), | |||||
| "Failed to dump proto msg"); | |||||
| if (DumpExceptionInput(op_desc_info, dump_file_path) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Dump exception input failed"); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| if (DumpExceptionOutput(op_desc_info, dump_file_path) != SUCCESS) { | |||||
| GELOGE(PARAM_INVALID, "Dump exception output failed"); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| GELOGI("Dump exception info SUCCESS"); | |||||
| } else { | |||||
| GELOGE(PARAM_INVALID, "Get op desc info failed,task id:%u,stream id:%u", iter.taskid, iter.streamid); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -70,8 +70,6 @@ class DataDumper { | |||||
| void SaveDumpInput(const std::shared_ptr<Node> &node); | void SaveDumpInput(const std::shared_ptr<Node> &node); | ||||
| void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id); | |||||
| // args is device memory stored first output addr | // args is device memory stored first output addr | ||||
| void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args); | void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args); | ||||
| void SaveEndGraphId(uint32_t task_id, uint32_t stream_id); | void SaveEndGraphId(uint32_t task_id, uint32_t stream_id); | ||||
| @@ -87,14 +85,8 @@ class DataDumper { | |||||
| void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; } | void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; } | ||||
| const DumpProperties &GetDumpProperties() const { return dump_properties_; } | const DumpProperties &GetDumpProperties() const { return dump_properties_; } | ||||
| bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const; | |||||
| const std::vector<OpDescInfo> &GetAllOpDescInfo() const { return op_desc_info_; } | const std::vector<OpDescInfo> &GetAllOpDescInfo() const { return op_desc_info_; } | ||||
| // Dump exception info | |||||
| Status DumpExceptionInput(const OpDescInfo &op_desc_info, const string &dump_file); | |||||
| Status DumpExceptionOutput(const OpDescInfo &op_desc_info, const string &dump_file); | |||||
| Status DumpExceptionInfo(const std::vector<rtExceptionInfo> exception_infos); | |||||
| private: | private: | ||||
| void ReleaseDevMem(void **ptr) noexcept; | void ReleaseDevMem(void **ptr) noexcept; | ||||
| @@ -2656,9 +2656,9 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b | |||||
| GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
| auto exception_infos = model_manager->GetExceptionInfos(); | auto exception_infos = model_manager->GetExceptionInfos(); | ||||
| if (exception_infos.size() > 0) { | if (exception_infos.size() > 0) { | ||||
| GE_CHK_STATUS_RET(data_dumper_.DumpExceptionInfo(exception_infos), "Dump exception info failed"); | |||||
| GE_CHK_STATUS_RET(DumpExceptionInfo(exception_infos), "[Dump][Exception] Dump exception info failed."); | |||||
| } else { | } else { | ||||
| GELOGI("Exception info is null"); | |||||
| GELOGI("[Dump][Exception] Exception info is null."); | |||||
| } | } | ||||
| GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs), "OnComputeDone failed."); | GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs), "OnComputeDone failed."); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| @@ -4352,4 +4352,37 @@ Status DavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callbac | |||||
| listener->SetCallback(callback); | listener->SetCallback(callback); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void DavinciModel::UpdateOpIOAddrs(uint32_t task_id, uint32_t stream_id, const std::vector<void *> &io_addrs) { | |||||
| if (fixed_mem_base_ == reinterpret_cast<uintptr_t>(mem_base_)) { | |||||
| GELOGD("[Update][OpIOAddrs] No need to update op input output addr."); | |||||
| return; | |||||
| } | |||||
| OpDescInfo *op_desc_info = exception_dumper_.MutableOpDescInfo(task_id, stream_id); | |||||
| if (op_desc_info == nullptr) { | |||||
| GELOGW("[Update][OpIOAddrs] Find op desc failed, task_id: %u, stream_id: %u.", task_id, stream_id); | |||||
| return; | |||||
| } | |||||
| size_t input_size = op_desc_info->input_addrs.size(); | |||||
| size_t output_size = op_desc_info->output_addrs.size(); | |||||
| if (input_size + output_size != io_addrs.size()) { | |||||
| GELOGW("[Update][OpIOAddrs] Op[%s] input size[%zu] and output size[%zu] is not equal to io addr size[%zu]", | |||||
| op_desc_info->op_name.c_str(), input_size, output_size, io_addrs.size()); | |||||
| return; | |||||
| } | |||||
| vector<void *> input_addrs; | |||||
| vector<void *> output_addrs; | |||||
| for (size_t i = 0; i < io_addrs.size(); i++) { | |||||
| if (i < input_size) { | |||||
| input_addrs.emplace_back(GetRunAddress(io_addrs[i])); | |||||
| } else { | |||||
| output_addrs.emplace_back(GetRunAddress(io_addrs[i])); | |||||
| } | |||||
| } | |||||
| op_desc_info->input_addrs = input_addrs; | |||||
| op_desc_info->output_addrs = output_addrs; | |||||
| GELOGD("[Update][OpIOAddrs] Op [%s] update input output addr success.", op_desc_info->op_name.c_str()); | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -29,6 +29,7 @@ | |||||
| #include "common/helper/om_file_helper.h" | #include "common/helper/om_file_helper.h" | ||||
| #include "common/opskernel/ge_task_info.h" | #include "common/opskernel/ge_task_info.h" | ||||
| #include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
| #include "common/dump/exception_dumper.h" | |||||
| #include "common/dump/opdebug_register.h" | #include "common/dump/opdebug_register.h" | ||||
| #include "common/types.h" | #include "common/types.h" | ||||
| #include "framework/common/util.h" | #include "framework/common/util.h" | ||||
| @@ -476,13 +477,17 @@ class DavinciModel { | |||||
| Status ReportProfilingData(); | Status ReportProfilingData(); | ||||
| void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { | void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { | ||||
| data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); | |||||
| exception_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); | |||||
| } | } | ||||
| void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const shared_ptr<OpDesc> &op_desc, uintptr_t args) { | void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const shared_ptr<OpDesc> &op_desc, uintptr_t args) { | ||||
| data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); | data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); | ||||
| } | } | ||||
| Status DumpExceptionInfo(const std::vector<rtExceptionInfo> &exception_infos) const { | |||||
| return exception_dumper_.DumpExceptionInfo(exception_infos); | |||||
| } | |||||
| void SetKnownShapeGlobalStep(void *global_step) { | void SetKnownShapeGlobalStep(void *global_step) { | ||||
| known_shape_global_step_ = global_step; | known_shape_global_step_ = global_step; | ||||
| } | } | ||||
| @@ -562,8 +567,9 @@ class DavinciModel { | |||||
| const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } | const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } | ||||
| bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { | bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { | ||||
| return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info); | |||||
| return exception_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info); | |||||
| } | } | ||||
| void UpdateOpIOAddrs(uint32_t task_id, uint32_t stream_id, const std::vector<void *> &io_addrs); | |||||
| bool GetRunningFlag() const { return running_flg_; } | bool GetRunningFlag() const { return running_flg_; } | ||||
| void SetRunningFlag(bool flag) { running_flg_ = flag; } | void SetRunningFlag(bool flag) { running_flg_ = flag; } | ||||
| @@ -1012,6 +1018,7 @@ class DavinciModel { | |||||
| int64_t maxDumpOpNum_; | int64_t maxDumpOpNum_; | ||||
| // for data dump | // for data dump | ||||
| DataDumper data_dumper_; | DataDumper data_dumper_; | ||||
| ExceptionDumper exception_dumper_; | |||||
| OpdebugRegister opdebug_register_; | OpdebugRegister opdebug_register_; | ||||
| uint64_t iterator_count_; | uint64_t iterator_count_; | ||||
| bool is_l1_fusion_enable_; | bool is_l1_fusion_enable_; | ||||
| @@ -280,6 +280,7 @@ ModelManager::~ModelManager() { | |||||
| model_map_.clear(); | model_map_.clear(); | ||||
| model_aicpu_kernel_.clear(); | model_aicpu_kernel_.clear(); | ||||
| cust_aicpu_so_.clear(); | cust_aicpu_so_.clear(); | ||||
| dump_exception_flag_ = false; | |||||
| GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0))); | GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0))); | ||||
| } | } | ||||
| @@ -1587,9 +1588,21 @@ Status ModelManager::GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint3 | |||||
| for (const auto &model : model_map_) { | for (const auto &model : model_map_) { | ||||
| auto davinci_model = model.second; | auto davinci_model = model.second; | ||||
| if (davinci_model->GetDeviceId() == device_id) { | if (davinci_model->GetDeviceId() == device_id) { | ||||
| GELOGI("Start to GetOpDescInfo of device_id: %u.", device_id); | |||||
| GELOGI("[Get][OpDescInfo] Start to GetOpDescInfo of device_id: %u in davinci model.", device_id); | |||||
| if (davinci_model->GetOpDescInfo(stream_id, task_id, op_desc_info)) { | if (davinci_model->GetOpDescInfo(stream_id, task_id, op_desc_info)) { | ||||
| GELOGI("Find specific node of stream_id: %u, task_id: %u.", stream_id, task_id); | |||||
| GELOGI("[Get][OpDescInfo] Find specific node of stream_id: %u, task_id: %u in davinci model.", | |||||
| stream_id, task_id); | |||||
| return SUCCESS; | |||||
| } | |||||
| } | |||||
| } | |||||
| for (const auto &model : hybrid_model_map_) { | |||||
| auto hybrid_model = model.second; | |||||
| if (hybrid_model->GetDeviceId() == device_id) { | |||||
| GELOGI("[Get][OpDescInfo] Start to GetOpDescInfo of device_id: %u in hybrid model.", device_id); | |||||
| if (hybrid_model->GetOpDescInfo(stream_id, task_id, op_desc_info)) { | |||||
| GELOGI("[Get][OpDescInfo] Find specific node of stream_id: %u, task_id: %u in hybrid model.", | |||||
| stream_id, task_id); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } | } | ||||
| @@ -1602,6 +1615,7 @@ Status ModelManager::EnableExceptionDump(const std::map<string, string> &options | |||||
| if (iter != options.end()) { | if (iter != options.end()) { | ||||
| GELOGI("Find option enable_exeception_dump is %s", iter->second.c_str()); | GELOGI("Find option enable_exeception_dump is %s", iter->second.c_str()); | ||||
| if (iter->second == "1") { | if (iter->second == "1") { | ||||
| dump_exception_flag_ = true; | |||||
| rtError_t rt_ret = rtSetTaskFailCallback(reinterpret_cast<rtTaskFailCallback>(ExceptionCallback)); | rtError_t rt_ret = rtSetTaskFailCallback(reinterpret_cast<rtTaskFailCallback>(ExceptionCallback)); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| REPORT_CALL_ERROR("E19999", "Call rtSetTaskFailCallback fail, ret = 0x%X", | REPORT_CALL_ERROR("E19999", "Call rtSetTaskFailCallback fail, ret = 0x%X", | ||||
| @@ -313,6 +313,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| instance->AddExceptionInfo(*rt_exception_info); | instance->AddExceptionInfo(*rt_exception_info); | ||||
| } | } | ||||
| bool IsDumpExceptionOpen() { return dump_exception_flag_; } | |||||
| private: | private: | ||||
| /// | /// | ||||
| /// @ingroup domi_ome | /// @ingroup domi_ome | ||||
| @@ -356,6 +357,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| std::map<uintptr_t, std::map<std::string, CustAICPUKernelPtr>> cust_aicpu_so_; | std::map<uintptr_t, std::map<std::string, CustAICPUKernelPtr>> cust_aicpu_so_; | ||||
| static DumpProperties dump_properties_; | static DumpProperties dump_properties_; | ||||
| bool dump_exception_flag_ = false; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -357,6 +357,7 @@ void KernelExTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) { | |||||
| Status KernelExTaskInfo::UpdateArgs() { | Status KernelExTaskInfo::UpdateArgs() { | ||||
| GELOGI("KernelExTaskInfo::UpdateArgs in."); | GELOGI("KernelExTaskInfo::UpdateArgs in."); | ||||
| davinci_model_->SetTotalIOAddrs(io_addrs_); | davinci_model_->SetTotalIOAddrs(io_addrs_); | ||||
| davinci_model_->UpdateOpIOAddrs(task_id_, stream_id_, io_addrs_); | |||||
| GELOGI("KernelExTaskInfo::UpdateArgs success."); | GELOGI("KernelExTaskInfo::UpdateArgs success."); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -523,6 +523,7 @@ Status KernelTaskInfo::UpdateArgs() { | |||||
| return CopyNoncontinuousArgs(io_addr_offset_); | return CopyNoncontinuousArgs(io_addr_offset_); | ||||
| } | } | ||||
| davinci_model_->SetTotalIOAddrs(io_addrs_); | davinci_model_->SetTotalIOAddrs(io_addrs_); | ||||
| davinci_model_->UpdateOpIOAddrs(task_id_, stream_id_, io_addrs_); | |||||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | ||||
| return CopyNoncontinuousArgs(sizeof(aicpu::AicpuParamHead)); | return CopyNoncontinuousArgs(sizeof(aicpu::AicpuParamHead)); | ||||
| } | } | ||||
| @@ -63,5 +63,27 @@ Status GraphExecutionContext::Synchronize(rtStream_t rt_stream) { | |||||
| REPORT_CALL_ERROR("E19999", "invoke rtStreamSynchronize failed, ret = %d", rt_ret); | REPORT_CALL_ERROR("E19999", "invoke rtStreamSynchronize failed, ret = %d", rt_ret); | ||||
| return RT_FAILED; | return RT_FAILED; | ||||
| } | } | ||||
| Status GraphExecutionContext::DumpExceptionInfo(const std::vector<rtExceptionInfo> &exception_infos) { | |||||
| if (exception_infos.empty()) { | |||||
| GELOGI("[Dump][ExceptionInfo] Exception info is null."); | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGI("[Dump][ExceptionInfo] Start to search dynamic op info and to dump."); | |||||
| if (exception_dumper.DumpExceptionInfo(exception_infos) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Dump][Exception] Dump dynamic op exception info failed."); | |||||
| return FAILED; | |||||
| } | |||||
| GELOGI("[Dump][ExceptionInfo] Start to search static op info and to dump."); | |||||
| for (const auto &iter : davinci_model) { | |||||
| if (iter != nullptr) { | |||||
| if (iter->DumpExceptionInfo(exception_infos) != SUCCESS) { | |||||
| GELOGE(FAILED, "[Dump][ExceptionInfo] Dump static op exception info failed."); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -23,6 +23,7 @@ | |||||
| #include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
| #include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
| #include "graph/ge_local_context.h" | #include "graph/ge_local_context.h" | ||||
| #include "graph/load/model_manager/davinci_model.h" | |||||
| #include "hybrid/common/npu_memory_allocator.h" | #include "hybrid/common/npu_memory_allocator.h" | ||||
| #include "hybrid/common/tensor_value.h" | #include "hybrid/common/tensor_value.h" | ||||
| #include "hybrid/executor/hybrid_profiler.h" | #include "hybrid/executor/hybrid_profiler.h" | ||||
| @@ -54,6 +55,7 @@ struct GraphExecutionContext { | |||||
| void SetErrorCode(Status error_code); | void SetErrorCode(Status error_code); | ||||
| Status GetStatus() const; | Status GetStatus() const; | ||||
| Status Synchronize(rtStream_t rt_stream); | Status Synchronize(rtStream_t rt_stream); | ||||
| Status DumpExceptionInfo(const std::vector<rtExceptionInfo> &exception_infos); | |||||
| uint64_t session_id = 0; | uint64_t session_id = 0; | ||||
| uint64_t context_id = 0; | uint64_t context_id = 0; | ||||
| @@ -68,6 +70,8 @@ struct GraphExecutionContext { | |||||
| DumpProperties dump_properties; | DumpProperties dump_properties; | ||||
| bool trace_enabled = false; | bool trace_enabled = false; | ||||
| bool dump_enabled = false; | bool dump_enabled = false; | ||||
| ExceptionDumper exception_dumper; | |||||
| std::vector<std::shared_ptr<ge::DavinciModel>> davinci_model; | |||||
| std::atomic_bool is_eos_{false}; | std::atomic_bool is_eos_{false}; | ||||
| long profiling_level = 0; | long profiling_level = 0; | ||||
| long iteration = 0; | long iteration = 0; | ||||
| @@ -61,6 +61,8 @@ class HybridModelAsyncExecutor { | |||||
| void SetRunningFlag(bool flag) { running_flag_ = flag; } | void SetRunningFlag(bool flag) { running_flag_ = flag; } | ||||
| const GraphExecutionContext * GeContext() { return executor_->GetContext(); } | |||||
| private: | private: | ||||
| Status InitInputDesc(); | Status InitInputDesc(); | ||||
| @@ -18,6 +18,7 @@ | |||||
| #include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
| #include "graph/runtime_inference_context.h" | #include "graph/runtime_inference_context.h" | ||||
| #include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
| #include "graph/load/model_manager/model_manager.h" | |||||
| #include "common/dump/dump_manager.h" | #include "common/dump/dump_manager.h" | ||||
| #include "common/profiling/profiling_manager.h" | #include "common/profiling/profiling_manager.h" | ||||
| @@ -102,7 +103,17 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||||
| } | } | ||||
| if (!model_->IsSingleOp()) { | if (!model_->IsSingleOp()) { | ||||
| HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | |||||
| Status ret = executor.Synchronize(); | |||||
| if (ret != ge::SUCCESS) { | |||||
| auto model_manager = ModelManager::GetInstance(); | |||||
| GE_CHECK_NOTNULL(model_manager); | |||||
| auto exception_infos = model_manager->GetExceptionInfos(); | |||||
| if (!exception_infos.empty()) { | |||||
| HYBRID_CHK_STATUS_RET(context_.DumpExceptionInfo(exception_infos), | |||||
| "[Execute][GraphInternal] Dump exception info failed."); | |||||
| } | |||||
| GELOGE(ret, "[Execute][GraphInternal] Synchronize failed."); | |||||
| } | |||||
| RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | ||||
| } | } | ||||
| @@ -4,6 +4,7 @@ | |||||
| #include "common/dump/dump_manager.h" | #include "common/dump/dump_manager.h" | ||||
| #include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
| #include "graph/runtime_inference_context.h" | #include "graph/runtime_inference_context.h" | ||||
| #include "graph/load/model_manager/model_manager.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| @@ -266,6 +267,13 @@ Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &ar | |||||
| ret = stage_executors_[i]->Synchronize(); | ret = stage_executors_[i]->Synchronize(); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| auto model_manager = ModelManager::GetInstance(); | |||||
| GE_CHECK_NOTNULL(model_manager); | |||||
| auto exception_infos = model_manager->GetExceptionInfos(); | |||||
| if (!exception_infos.empty()) { | |||||
| HYBRID_CHK_STATUS_RET(context_.DumpExceptionInfo(exception_infos), | |||||
| "[Execute][GraphInternal] Dump exception info failed."); | |||||
| } | |||||
| GELOGE(ret, "[Invoke][Synchronize] failed for [Executor: %zu].", i); | GELOGE(ret, "[Invoke][Synchronize] failed for [Executor: %zu].", i); | ||||
| REPORT_CALL_ERROR("E19999", "[Executor: %zu] failed to Synchronize result.", i); | REPORT_CALL_ERROR("E19999", "[Executor: %zu] failed to Synchronize result.", i); | ||||
| has_error = true; | has_error = true; | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
| #include "graph/utils/tensor_adapter.h" | #include "graph/utils/tensor_adapter.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| #include "graph/load/model_manager/model_manager.h" | |||||
| #include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
| #include "hybrid/executor//worker//shape_inference_engine.h" | #include "hybrid/executor//worker//shape_inference_engine.h" | ||||
| #include "common/dump/dump_op.h" | #include "common/dump/dump_op.h" | ||||
| @@ -70,6 +71,7 @@ class NodeDoneCallback { | |||||
| Status PrepareConstInputs(const NodeItem &node_item); | Status PrepareConstInputs(const NodeItem &node_item); | ||||
| Status DumpDynamicNode(); | Status DumpDynamicNode(); | ||||
| Status ProfilingReport(); | Status ProfilingReport(); | ||||
| Status SaveDumpOpInfo(); | |||||
| Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, | Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, | ||||
| std::vector<TaskDescInfo> &task_desc_info); | std::vector<TaskDescInfo> &task_desc_info); | ||||
| GraphExecutionContext *graph_context_; | GraphExecutionContext *graph_context_; | ||||
| @@ -266,6 +268,40 @@ Status NodeDoneCallback::DumpDynamicNode() { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status NodeDoneCallback::SaveDumpOpInfo() { | |||||
| GE_CHECK_NOTNULL(graph_context_); | |||||
| GE_CHECK_NOTNULL(graph_context_->model); | |||||
| auto node = context_->GetNodeItem().node; | |||||
| if (node == nullptr) { | |||||
| GELOGE(PARAM_INVALID, "[Save][DumpOpInfo] Get node is nullptr."); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| auto op_desc = node->GetOpDesc(); | |||||
| GE_CHECK_NOTNULL(op_desc); | |||||
| vector<void *> input_addrs; | |||||
| vector<void *> output_addrs; | |||||
| for (int i = 0; i < context_->NumInputs(); i++) { | |||||
| auto tensor_value = context_->GetInput(i); | |||||
| GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "[Save][DumpOpInfo] Tensor value is nullptr."); | |||||
| void *input_addr = const_cast<void *>(tensor_value->GetData()); | |||||
| input_addrs.emplace_back(input_addr); | |||||
| } | |||||
| for (int j = 0; j < context_->NumOutputs(); j++) { | |||||
| auto tensor_value = context_->GetOutput(j); | |||||
| GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "[Save][DumpOpInfo] Tensor value is nullptr."); | |||||
| void *output_addr = const_cast<void *>(tensor_value->GetData()); | |||||
| output_addrs.emplace_back(output_addr); | |||||
| } | |||||
| uint32_t stream_id = context_->GetStreamId(); | |||||
| uint32_t task_id = context_->GetTaskId(); | |||||
| graph_context_->exception_dumper.SaveDumpOpInfo(op_desc, task_id, stream_id, input_addrs, output_addrs); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status NodeDoneCallback::OnNodeDone() { | Status NodeDoneCallback::OnNodeDone() { | ||||
| auto &node_item = context_->GetNodeItem(); | auto &node_item = context_->GetNodeItem(); | ||||
| GELOGI("[%s] Start callback process.", node_item.NodeName().c_str()); | GELOGI("[%s] Start callback process.", node_item.NodeName().c_str()); | ||||
| @@ -278,6 +314,12 @@ Status NodeDoneCallback::OnNodeDone() { | |||||
| GE_CHK_STATUS_RET(DumpDynamicNode(), "[Call][DumpDynamicNode] Failed."); | GE_CHK_STATUS_RET(DumpDynamicNode(), "[Call][DumpDynamicNode] Failed."); | ||||
| } | } | ||||
| auto model_manager = ModelManager::GetInstance(); | |||||
| GE_CHECK_NOTNULL(model_manager); | |||||
| if (model_manager->IsDumpExceptionOpen()) { | |||||
| GE_CHK_STATUS_RET(SaveDumpOpInfo(), "[Save][DumpOpInfo] Failed to dump op info."); | |||||
| } | |||||
| if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { | ||||
| GE_CHK_STATUS_RET(ProfilingReport(), "[Report][Profiling] of node[%s] failed.", node_item.NodeName().c_str()); | GE_CHK_STATUS_RET(ProfilingReport(), "[Report][Profiling] of node[%s] failed.", node_item.NodeName().c_str()); | ||||
| } | } | ||||
| @@ -82,6 +82,12 @@ class HybridDavinciModel::Impl { | |||||
| model_.SetOmName(model_name); | model_.SetOmName(model_name); | ||||
| } | } | ||||
| uint32_t GetDeviceId() { | |||||
| return model_.GetDeviceId(); | |||||
| } | |||||
| const GraphExecutionContext * GeContext() { return executor_.GeContext(); } | |||||
| uint64_t GetSessionId() { | uint64_t GetSessionId() { | ||||
| return model_.GetSessionId(); | return model_.GetSessionId(); | ||||
| } | } | ||||
| @@ -199,6 +205,11 @@ void HybridDavinciModel::SetOmName(const string &om_name) { | |||||
| } | } | ||||
| } | } | ||||
| uint32_t HybridDavinciModel::GetDeviceId() const { | |||||
| GE_CHECK_NOTNULL(impl_); | |||||
| return impl_->GetDeviceId(); | |||||
| } | |||||
| Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) { | Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) { | ||||
| GE_CHECK_NOTNULL(impl_); | GE_CHECK_NOTNULL(impl_); | ||||
| return impl_->GetDynamicBatchInfo(batch_info, dynamic_type); | return impl_->GetDynamicBatchInfo(batch_info, dynamic_type); | ||||
| @@ -245,5 +256,22 @@ bool HybridDavinciModel::GetRunningFlag() const { return impl_->GetRunningFlag() | |||||
| Status HybridDavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callback) { | Status HybridDavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callback) { | ||||
| return impl_->SetRunAsyncListenerCallback(callback); | return impl_->SetRunAsyncListenerCallback(callback); | ||||
| } | } | ||||
| bool HybridDavinciModel::GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { | |||||
| if (impl_ == nullptr) { | |||||
| return false; | |||||
| } | |||||
| auto context = impl_->GeContext(); | |||||
| GE_CHECK_NOTNULL(context); | |||||
| bool ret = context->exception_dumper.GetOpDescInfo(stream_id, task_id, op_desc_info); | |||||
| if (!ret) { | |||||
| for (const auto &iter : context->davinci_model) { | |||||
| if (iter->GetOpDescInfo(stream_id, task_id, op_desc_info)) { | |||||
| return true; | |||||
| } | |||||
| } | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -61,6 +61,8 @@ class HybridDavinciModel { | |||||
| uint64_t GetSessionId(); | uint64_t GetSessionId(); | ||||
| uint32_t GetDeviceId() const; | |||||
| Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type); | Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type); | ||||
| void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order); | void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order); | ||||
| @@ -80,6 +82,8 @@ class HybridDavinciModel { | |||||
| Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback); | Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback); | ||||
| bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const; | |||||
| private: | private: | ||||
| HybridDavinciModel() = default; | HybridDavinciModel() = default; | ||||
| class Impl; | class Impl; | ||||
| @@ -72,6 +72,10 @@ uint32_t HybridDavinciModel::GetDataInputerSize() { | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| uint32_t HybridDavinciModel::GetDeviceId() const { | |||||
| return 0; | |||||
| } | |||||
| Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) { | Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) { | ||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| } | } | ||||
| @@ -99,5 +103,9 @@ bool HybridDavinciModel::GetRunningFlag() const { | |||||
| Status HybridDavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callback) { | Status HybridDavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callback) { | ||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| } | } | ||||
| bool HybridDavinciModel::GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { | |||||
| return true; | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -208,6 +208,8 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
| REPORT_CALL_ERROR("E19999", "rtGetTaskIdAndStreamID failed, ret: 0x%X.", rt_ret); | REPORT_CALL_ERROR("E19999", "rtGetTaskIdAndStreamID failed, ret: 0x%X.", rt_ret); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| context.SetTaskId(task_id); | |||||
| context.SetStreamId(stream_id); | |||||
| GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | ||||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
| @@ -208,6 +208,8 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||||
| REPORT_CALL_ERROR("E19999", "rtGetTaskIdAndStreamID failed, ret: 0x%X.", rt_ret); | REPORT_CALL_ERROR("E19999", "rtGetTaskIdAndStreamID failed, ret: 0x%X.", rt_ret); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| context.SetTaskId(task_id); | |||||
| context.SetStreamId(stream_id); | |||||
| GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | ||||
| (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | (void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | ||||
| auto callback = [=, &context]() { | auto callback = [=, &context]() { | ||||
| @@ -118,6 +118,14 @@ Status KnownNodeTask::Init(TaskContext &context) { | |||||
| davinci_model_->SubModelId()), | davinci_model_->SubModelId()), | ||||
| "[Destroy][AicpuKernel] failed, session_id:%lu, model_id:%u, sub_model_id:%u", | "[Destroy][AicpuKernel] failed, session_id:%lu, model_id:%u, sub_model_id:%u", | ||||
| davinci_model_->GetSessionId(), davinci_model_->Id(), davinci_model_->SubModelId()); | davinci_model_->GetSessionId(), davinci_model_->Id(), davinci_model_->SubModelId()); | ||||
| if (!load_flag_) { | |||||
| auto execution_context = const_cast<GraphExecutionContext *>(context.GetExecutionContext()); | |||||
| GE_CHECK_NOTNULL(execution_context); | |||||
| auto &davinci_model = execution_context->davinci_model; | |||||
| davinci_model.emplace_back(davinci_model_); | |||||
| load_flag_ = true; | |||||
| } | |||||
| GELOGI("[%s] KnownNodeExecutor::Init success.", context.GetNodeName()); | GELOGI("[%s] KnownNodeExecutor::Init success.", context.GetNodeName()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -42,6 +42,7 @@ class KnownNodeTask : public NodeTask { | |||||
| virtual Status DoInitDavinciModel(void *weight, size_t weight_size); | virtual Status DoInitDavinciModel(void *weight, size_t weight_size); | ||||
| private: | private: | ||||
| std::shared_ptr<DavinciModel> davinci_model_ = nullptr; | std::shared_ptr<DavinciModel> davinci_model_ = nullptr; | ||||
| bool load_flag_ = false; | |||||
| }; | }; | ||||
| class KnownNodeExecutor : public NodeExecutor { | class KnownNodeExecutor : public NodeExecutor { | ||||
| @@ -1 +1 @@ | |||||
| Subproject commit df9abef65f902f37ca664f6dda4c60727dac2aca | |||||
| Subproject commit 424ac0609fe17f455865436462a2c62f85aea2b1 | |||||
| @@ -166,6 +166,7 @@ set(COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/common/dump/dump_properties.cc" | "${GE_CODE_DIR}/ge/common/dump/dump_properties.cc" | ||||
| "${GE_CODE_DIR}/ge/common/helper/model_helper.cc" | "${GE_CODE_DIR}/ge/common/helper/model_helper.cc" | ||||
| "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" | "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" | ||||
| "${GE_CODE_DIR}/ge/common/dump/exception_dumper.cc" | |||||
| "${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc" | "${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc" | ||||
| "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" | "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" | ||||
| "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" | "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" | ||||
| @@ -756,6 +757,7 @@ set(MULTI_PARTS_TEST_FILES | |||||
| "common/datatype_transfer_unittest.cc" | "common/datatype_transfer_unittest.cc" | ||||
| "common/dump_manager_unittest.cc" | "common/dump_manager_unittest.cc" | ||||
| "common/dump_op_unittest.cc" | "common/dump_op_unittest.cc" | ||||
| "common/dump_exception_unittest.cc" | |||||
| "common/opdebug_register_unittest.cc" | "common/opdebug_register_unittest.cc" | ||||
| "common/format_transfer_unittest.cc" | "common/format_transfer_unittest.cc" | ||||
| "common/format_transfer_transpose_unittest.cc" | "common/format_transfer_transpose_unittest.cc" | ||||
| @@ -0,0 +1,54 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #define protected public | |||||
| #define private public | |||||
| #include "common/dump/exception_dumper.h" | |||||
| #include "common/debug/log.h" | |||||
| #include "common/ge_inner_error_codes.h" | |||||
| #undef private | |||||
| #undef protected | |||||
| namespace ge { | |||||
| class UTEST_dump_exception : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| TEST_F(UTEST_dump_exception, save_dump_op_info_success) { | |||||
| OpDescPtr op_desc = std::make_shared<OpDesc>("GatherV2", "GatherV2"); | |||||
| uint32_t task_id = 1; | |||||
| uint32_t stream_id = 233; | |||||
| vector<void *> input_addr; | |||||
| vector<void *> output_addr; | |||||
| ExceptionDumper exception_dumper; | |||||
| exception_dumper.SaveDumpOpInfo(op_desc, task_id, stream_id, input_addr, output_addr); | |||||
| } | |||||
| TEST_F(UTEST_dump_exception, dump_exception_info) { | |||||
| rtExceptionInfo exception_info = {1, 2, 3, 4, 5}; | |||||
| std::vector<rtExceptionInfo> exception_infos = { exception_info }; | |||||
| OpDescInfo op_desc_info = {"Save", "Save", 1, 2, {FORMAT_NCHW}, {{1}}, {DT_FLOAT}, {}, {2}, | |||||
| {FORMAT_NCHW}, {{1}}, {DT_FLOAT}, {}, {2}}; | |||||
| ExceptionDumper exception_dumper; | |||||
| exception_dumper.op_desc_info_ = { op_desc_info }; | |||||
| exception_dumper.DumpExceptionInfo(exception_infos); | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -1034,4 +1034,16 @@ TEST_F(UtestDavinciModel, NnExecute) { | |||||
| model.task_list_.resize(1); | model.task_list_.resize(1); | ||||
| EXPECT_EQ(model.NnExecute(stream, false, input_data, output_data), SUCCESS); | EXPECT_EQ(model.NnExecute(stream, false, input_data, output_data), SUCCESS); | ||||
| } | } | ||||
| TEST_F(UtestDavinciModel, update_io_addr_success) { | |||||
| DavinciModel model(0, nullptr); | |||||
| uint32_t task_id = 1; | |||||
| uint32_t stream_id = 2; | |||||
| model.fixed_mem_base_ = 0x22; | |||||
| model.mem_base_ = reinterpret_cast<uint8_t *>(&task_id); | |||||
| OpDescInfo op_desc_info = {"Save", "Save", 1, 2, {FORMAT_NCHW}, {{1}}, {DT_FLOAT}, {nullptr}, {2}, | |||||
| {FORMAT_NCHW}, {{1}}, {DT_FLOAT}, {nullptr}, {2}}; | |||||
| model.exception_dumper_.op_desc_info_ = { op_desc_info }; | |||||
| vector<void *> io_addr = {nullptr, nullptr}; | |||||
| model.UpdateOpIOAddrs(task_id, stream_id, io_addr); | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||