From d29b8c994b62a1d4dec8a510a8f1e25a8d976548 Mon Sep 17 00:00:00 2001 From: caifubi Date: Tue, 30 Jun 2020 16:48:11 +0800 Subject: [PATCH] Data dump for ge_runtime --- inc/framework/ge_runtime/model_runner.h | 5 +- inc/framework/ge_runtime/task_info.h | 95 ++++++++++++++----------- src/ge/ge_runtime/model_runner.cc | 20 ++++++ src/ge/ge_runtime/runtime_model.cc | 30 ++++++-- src/ge/ge_runtime/runtime_model.h | 5 +- src/ge/ge_runtime/task/aicpu_task.cc | 14 ++-- src/ge/ge_runtime/task/aicpu_task.h | 6 ++ src/ge/ge_runtime/task/task.h | 6 ++ src/ge/ge_runtime/task/tbe_task.cc | 7 +- src/ge/ge_runtime/task/tbe_task.h | 4 ++ 10 files changed, 135 insertions(+), 57 deletions(-) diff --git a/inc/framework/ge_runtime/model_runner.h b/inc/framework/ge_runtime/model_runner.h index 26e1b739..8e312b09 100644 --- a/inc/framework/ge_runtime/model_runner.h +++ b/inc/framework/ge_runtime/model_runner.h @@ -28,18 +28,21 @@ namespace ge { namespace model_runner { class RuntimeModel; - +using RuntimeInfo = std::tuple; class ModelRunner { public: static ModelRunner &Instance(); bool LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint32_t model_id, std::shared_ptr davinci_model, std::shared_ptr listener); + bool LoadModelComplete(uint32_t model_id); const std::vector &GetTaskIdList(uint32_t model_id) const; const std::vector &GetStreamIdList(uint32_t model_id) const; + const std::map> &GetRuntimeInfoMap(uint32_t model_id) const; + bool UnloadModel(uint32_t model_id); bool RunModel(uint32_t model_id, const InputData &input_data, OutputData *output_data); diff --git a/inc/framework/ge_runtime/task_info.h b/inc/framework/ge_runtime/task_info.h index bfc5883f..68d71870 100644 --- a/inc/framework/ge_runtime/task_info.h +++ b/inc/framework/ge_runtime/task_info.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "cce/taskdown_api.h" @@ -52,21 +53,27 @@ class TaskInfo { virtual ~TaskInfo() {} uint32_t stream_id() const { return stream_id_; } TaskInfoType type() const { return type_; } + std::string op_name() const { return op_name_; } + bool dump_flag() const { return dump_flag_; } protected: - TaskInfo(uint32_t stream_id, TaskInfoType type) : stream_id_(stream_id), type_(type) {} + TaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, bool dump_flag) + : op_name_(op_name), stream_id_(stream_id), type_(type), dump_flag_(dump_flag) {} private: + std::string op_name_; uint32_t stream_id_; TaskInfoType type_; + bool dump_flag_; }; class CceTaskInfo : public TaskInfo { public: - CceTaskInfo(uint32_t stream_id, const cce::ccOpContext &ctx, const std::string &stub_func, uint32_t block_dim, - const std::vector &args, uint32_t args_size, const std::vector &sm_desc, - const std::vector &flow_table, const std::vector &args_offset, bool is_flowtable) - : TaskInfo(stream_id, TaskInfoType::CCE), + CceTaskInfo(const std::string &op_name, uint32_t stream_id, const cce::ccOpContext &ctx, const std::string &stub_func, + uint32_t block_dim, const std::vector &args, uint32_t args_size, + const std::vector &sm_desc, const std::vector &flow_table, + const std::vector &args_offset, bool is_flowtable) + : TaskInfo(op_name, stream_id, TaskInfoType::CCE, false), ctx_(ctx), stub_func_(stub_func), block_dim_(block_dim), @@ -102,11 +109,11 @@ class CceTaskInfo : public TaskInfo { class TbeTaskInfo : public TaskInfo { public: - TbeTaskInfo(uint32_t stream_id, const std::string &stub_func, uint32_t block_dim, const std::vector &args, - uint32_t args_size, const std::vector &sm_desc, void *binary, uint32_t binary_size, - const std::vector &meta_data, const std::vector &input_data_addrs, - const std::vector &output_data_addrs, const std::vector &workspace_addrs) - : TaskInfo(stream_id, TaskInfoType::TBE), + TbeTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string &stub_func, uint32_t block_dim, + const std::vector &args, uint32_t args_size, const std::vector &sm_desc, void *binary, + uint32_t binary_size, const std::vector &meta_data, const std::vector &input_data_addrs, + const std::vector &output_data_addrs, const std::vector &workspace_addrs, bool dump_flag) + : TaskInfo(op_name, stream_id, TaskInfoType::TBE, dump_flag), stub_func_(stub_func), block_dim_(block_dim), args_(args), @@ -153,9 +160,10 @@ class TbeTaskInfo : public TaskInfo { class AicpuTaskInfo : public TaskInfo { public: - AicpuTaskInfo(uint32_t stream_id, const string &so_name, const std::string &kernel_name, const std::string &node_def, - const std::vector &input_data_addrs, const std::vector &output_data_addrs) - : TaskInfo(stream_id, TaskInfoType::AICPU), + AicpuTaskInfo(const std::string &op_name, uint32_t stream_id, const string &so_name, const std::string &kernel_name, + const std::string &node_def, const std::vector &input_data_addrs, + const std::vector &output_data_addrs, bool dump_flag) + : TaskInfo(op_name, stream_id, TaskInfoType::AICPU, dump_flag), so_name_(so_name), kernel_name_(kernel_name), node_def_(node_def), @@ -179,8 +187,8 @@ class AicpuTaskInfo : public TaskInfo { class LabelSetTaskInfo : public TaskInfo { public: - LabelSetTaskInfo(uint32_t stream_id, uint32_t label_id) - : TaskInfo(stream_id, TaskInfoType::LABEL_SET), label_id_(label_id) {} + LabelSetTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id) + : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_SET, false), label_id_(label_id) {} ~LabelSetTaskInfo() override {} uint32_t label_id() const { return label_id_; } @@ -190,8 +198,8 @@ class LabelSetTaskInfo : public TaskInfo { class LabelGotoTaskInfo : public TaskInfo { public: - LabelGotoTaskInfo(uint32_t stream_id, uint32_t label_id) - : TaskInfo(stream_id, TaskInfoType::LABEL_GOTO), label_id_(label_id) {} + LabelGotoTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_id) + : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_GOTO, false), label_id_(label_id) {} ~LabelGotoTaskInfo() override {} uint32_t label_id() const { return label_id_; } @@ -201,8 +209,9 @@ class LabelGotoTaskInfo : public TaskInfo { class LabelSwitchTaskInfo : public TaskInfo { public: - LabelSwitchTaskInfo(uint32_t stream_id, uint32_t label_size, const std::vector &label_list, void *cond) - : TaskInfo(stream_id, TaskInfoType::LABEL_SWITCH), + LabelSwitchTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t label_size, + const std::vector &label_list, void *cond) + : TaskInfo(op_name, stream_id, TaskInfoType::LABEL_SWITCH, false), label_size_(label_size), label_list_(label_list), cond_(cond) {} @@ -222,8 +231,8 @@ class EventTaskInfo : public TaskInfo { uint32_t event_id() const { return event_id_; } protected: - EventTaskInfo(uint32_t stream_id, TaskInfoType type, uint32_t event_id) - : TaskInfo(stream_id, type), event_id_(event_id) {} + EventTaskInfo(const std::string &op_name, uint32_t stream_id, TaskInfoType type, uint32_t event_id) + : TaskInfo(op_name, stream_id, type, false), event_id_(event_id) {} virtual ~EventTaskInfo() override {} uint32_t event_id_; @@ -231,39 +240,41 @@ class EventTaskInfo : public TaskInfo { class EventRecordTaskInfo : public EventTaskInfo { public: - EventRecordTaskInfo(uint32_t stream_id, uint32_t event_id) - : EventTaskInfo(stream_id, TaskInfoType::EVENT_RECORD, event_id) {} + EventRecordTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t event_id) + : EventTaskInfo(op_name, stream_id, TaskInfoType::EVENT_RECORD, event_id) {} ~EventRecordTaskInfo() override {} }; class EventWaitTaskInfo : public EventTaskInfo { public: - EventWaitTaskInfo(uint32_t stream_id, uint32_t event_id) - : EventTaskInfo(stream_id, TaskInfoType::EVENT_WAIT, event_id) {} + EventWaitTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t event_id) + : EventTaskInfo(op_name, stream_id, TaskInfoType::EVENT_WAIT, event_id) {} ~EventWaitTaskInfo() override {} }; class FusionStartTaskInfo : public TaskInfo { public: - explicit FusionStartTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_START) {} + explicit FusionStartTaskInfo(const std::string &op_name, uint32_t stream_id) + : TaskInfo(op_name, stream_id, TaskInfoType::FUSION_START, false) {} ~FusionStartTaskInfo() override {} }; class FusionEndTaskInfo : public TaskInfo { public: - explicit FusionEndTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_END) {} + explicit FusionEndTaskInfo(const std::string &op_name, uint32_t stream_id) + : TaskInfo(op_name, stream_id, TaskInfoType::FUSION_END, false) {} ~FusionEndTaskInfo() override {} }; class HcclTaskInfo : public TaskInfo { public: - HcclTaskInfo(uint32_t stream_id, const std::string hccl_type, void *input_data_addr, void *output_data_addr, - void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num, + HcclTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string hccl_type, void *input_data_addr, + void *output_data_addr, void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num, const std::vector &private_def, void *ops_kernel_store, int32_t count, int64_t root_id, int64_t op_type, int64_t data_type, const std::string &group, std::function hcom_bind_model, std::function hcom_unbind_model, - std::function, void *)> hcom_distribute_task) - : TaskInfo(stream_id, TaskInfoType::HCCL), + std::function, void *)> hcom_distribute_task, bool dump_flag) + : TaskInfo(op_name, stream_id, TaskInfoType::HCCL, dump_flag), hccl_type_(hccl_type), input_data_addr_(input_data_addr), output_data_addr_(output_data_addr), @@ -322,8 +333,11 @@ class HcclTaskInfo : public TaskInfo { class ProfilerTraceTaskInfo : public TaskInfo { public: - ProfilerTraceTaskInfo(uint32_t stream_id, uint64_t log_id, bool notify, uint32_t flat) - : TaskInfo(stream_id, TaskInfoType::PROFILER_TRACE), log_id_(log_id), notify_(notify), flat_(flat) {} + ProfilerTraceTaskInfo(const std::string &op_name, uint32_t stream_id, uint64_t log_id, bool notify, uint32_t flat) + : TaskInfo(op_name, stream_id, TaskInfoType::PROFILER_TRACE, false), + log_id_(log_id), + notify_(notify), + flat_(flat) {} ~ProfilerTraceTaskInfo() override {} uint64_t log_id() const { return log_id_; } @@ -338,8 +352,9 @@ class ProfilerTraceTaskInfo : public TaskInfo { class MemcpyAsyncTaskInfo : public TaskInfo { public: - MemcpyAsyncTaskInfo(uint32_t stream_id, void *dst, uint64_t dst_max, void *src, uint64_t count, uint32_t kind) - : TaskInfo(stream_id, TaskInfoType::MEMCPY_ASYNC), + MemcpyAsyncTaskInfo(const std::string &op_name, uint32_t stream_id, void *dst, uint64_t dst_max, void *src, + uint64_t count, uint32_t kind, bool dump_flag) + : TaskInfo(op_name, stream_id, TaskInfoType::MEMCPY_ASYNC, dump_flag), dst_(dst), dst_max_(dst_max), src_(src), @@ -363,9 +378,9 @@ class MemcpyAsyncTaskInfo : public TaskInfo { class StreamSwitchTaskInfo : public TaskInfo { public: - StreamSwitchTaskInfo(uint32_t stream_id, int64_t true_stream_id, void *input_addr, void *value_addr, int64_t cond, - int64_t data_type) - : TaskInfo(stream_id, TaskInfoType::STREAM_SWITCH), + StreamSwitchTaskInfo(const std::string &op_name, uint32_t stream_id, int64_t true_stream_id, void *input_addr, + void *value_addr, int64_t cond, int64_t data_type) + : TaskInfo(op_name, stream_id, TaskInfoType::STREAM_SWITCH, false), true_stream_id_(true_stream_id), input_addr_(input_addr), value_addr_(value_addr), @@ -389,8 +404,8 @@ class StreamSwitchTaskInfo : public TaskInfo { class StreamActiveTaskInfo : public TaskInfo { public: - StreamActiveTaskInfo(uint32_t stream_id, uint32_t active_stream_id) - : TaskInfo(stream_id, TaskInfoType::STREAM_ACTIVE), active_stream_id_(active_stream_id) {} + StreamActiveTaskInfo(const std::string &op_name, uint32_t stream_id, uint32_t active_stream_id) + : TaskInfo(op_name, stream_id, TaskInfoType::STREAM_ACTIVE, false), active_stream_id_(active_stream_id) {} ~StreamActiveTaskInfo() override {} uint32_t active_stream_id() const { return active_stream_id_; } diff --git a/src/ge/ge_runtime/model_runner.cc b/src/ge/ge_runtime/model_runner.cc index 0a7aea22..b6e43dd5 100644 --- a/src/ge/ge_runtime/model_runner.cc +++ b/src/ge/ge_runtime/model_runner.cc @@ -49,6 +49,15 @@ bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint return true; } +bool ModelRunner::LoadModelComplete(uint32_t model_id) { + auto model_iter = runtime_models_.find(model_id); + if (model_iter == runtime_models_.end()) { + GELOGE(PARAM_INVALID, "Model id %u not found.", model_id); + return false; + } + return model_iter->second->LoadComplete(); +} + const std::vector &ModelRunner::GetTaskIdList(uint32_t model_id) const { auto model_iter = runtime_models_.find(model_id); if (model_iter == runtime_models_.end()) { @@ -71,6 +80,17 @@ const std::vector &ModelRunner::GetStreamIdList(uint32_t model_id) con return model_iter->second->GetStreamIdList(); } +const std::map> &ModelRunner::GetRuntimeInfoMap(uint32_t model_id) const { + auto model_iter = runtime_models_.find(model_id); + if (model_iter == runtime_models_.end()) { + GELOGW("Model id %u not found.", model_id); + static const std::map> empty_ret; + return empty_ret; + } + + return model_iter->second->GetRuntimeInfoMap(); +} + bool ModelRunner::UnloadModel(uint32_t model_id) { auto iter = runtime_models_.find(model_id); if (iter != runtime_models_.end()) { diff --git a/src/ge/ge_runtime/runtime_model.cc b/src/ge/ge_runtime/runtime_model.cc index 87c755b5..f71b6ec8 100644 --- a/src/ge/ge_runtime/runtime_model.cc +++ b/src/ge/ge_runtime/runtime_model.cc @@ -28,7 +28,6 @@ namespace ge { namespace model_runner { - RuntimeModel::~RuntimeModel() { GELOGI("RuntimeModel destructor start"); @@ -221,21 +220,40 @@ bool RuntimeModel::LoadTask() { } task_id_list_.push_back(task_id); stream_id_list_.push_back(stream_id); + if (task->Args() != nullptr) { + std::shared_ptr runtime_tuple = nullptr; + GE_MAKE_SHARED(runtime_tuple = std::make_shared(task_id, stream_id, task->Args()), return false); + auto emplace_ret = runtime_info_map_.emplace(task->task_name(), runtime_tuple); + if (!emplace_ret.second) { + GELOGW("Task name exist:%s", task->task_name().c_str()); + } + } } if (task_list_.empty()) { GELOGE(FAILED, "Task list is empty"); return false; } - GELOGI("Distribute task succ."); - auto rt_ret = rtModelLoadComplete(rt_model_handle_); + GELOGI("LoadTask succ."); + return true; +} + +bool RuntimeModel::LoadComplete() { + uint32_t task_id = 0; + uint32_t stream_id = 0; + auto rt_ret = rtModelGetTaskId(rt_model_handle_, &task_id, &stream_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rtModelGetTaskId failed, ret:0x%X", rt_ret); + return RT_FAILED; + } + task_id_list_.push_back(task_id); + stream_id_list_.push_back(stream_id); + + rt_ret = rtModelLoadComplete(rt_model_handle_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api rtModelLoadComplete failed, ret: 0x%X.", rt_ret); return false; } - - GELOGI("LoadTask succ."); - return true; } bool RuntimeModel::Load(uint32_t device_id, uint64_t session_id, std::shared_ptr &davinci_model) { diff --git a/src/ge/ge_runtime/runtime_model.h b/src/ge/ge_runtime/runtime_model.h index b89aed1c..67535296 100644 --- a/src/ge/ge_runtime/runtime_model.h +++ b/src/ge/ge_runtime/runtime_model.h @@ -27,7 +27,7 @@ namespace ge { namespace model_runner { - +using RuntimeInfo = std::tuple; class Task; class RuntimeModel { public: @@ -35,8 +35,10 @@ class RuntimeModel { ~RuntimeModel(); bool Load(uint32_t device_id, uint64_t session_id, std::shared_ptr &davinci_model); + bool LoadComplete(); const std::vector &GetTaskIdList() const; const std::vector &GetStreamIdList() const; + const std::map> &GetRuntimeInfoMap() const { return runtime_info_map_; } bool Run(); bool CopyInputData(const InputData &input_data); bool GetInputOutputDescInfo(bool zero_copy, std::vector *input_desc, @@ -79,6 +81,7 @@ class RuntimeModel { std::vector task_id_list_{}; std::vector stream_id_list_{}; + std::map> runtime_info_map_; }; } // namespace model_runner diff --git a/src/ge/ge_runtime/task/aicpu_task.cc b/src/ge/ge_runtime/task/aicpu_task.cc index 4cb71866..9b126ec0 100644 --- a/src/ge/ge_runtime/task/aicpu_task.cc +++ b/src/ge/ge_runtime/task/aicpu_task.cc @@ -85,11 +85,15 @@ bool AicpuTask::Distribute() { return false; } - GELOGI("Distribute AicpuTask start, args_size = %u, io_addrs_num = %u, so_name = %s, kernel_name = %s.", args_size, - io_addrs_num, task_info_->so_name().data(), task_info_->kernel_name().data()); - rt_ret = rtCpuKernelLaunch(reinterpret_cast(task_info_->so_name().data()), - reinterpret_cast(task_info_->kernel_name().data()), 1, args_, args_size, - nullptr, stream_); + input_output_addr_ = reinterpret_cast(reinterpret_cast(args_) + io_addr_offset); + + auto dump_flag = task_info_->dump_flag() ? RT_KERNEL_DUMPFLAG : RT_KERNEL_DEFAULT; + GELOGI( + "Distribute AicpuTask start, args_size = %u, io_addrs_num = %u, so_name = %s, kernel_name = %s, dump_flag = %d.", + args_size, io_addrs_num, task_info_->so_name().data(), task_info_->kernel_name().data(), dump_flag); + rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(task_info_->so_name().data()), + reinterpret_cast(task_info_->kernel_name().data()), 1, args_, + args_size, nullptr, stream_, dump_flag); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return false; diff --git a/src/ge/ge_runtime/task/aicpu_task.h b/src/ge/ge_runtime/task/aicpu_task.h index f5cdc617..cc21af8a 100644 --- a/src/ge/ge_runtime/task/aicpu_task.h +++ b/src/ge/ge_runtime/task/aicpu_task.h @@ -18,6 +18,7 @@ #define GE_GE_RUNTIME_TASK_AICPU_TASK_H_ #include +#include #include "ge_runtime/task/task.h" namespace ge { @@ -30,12 +31,17 @@ class AicpuTask : public TaskRepeater { bool Distribute() override; + void *Args() override { return input_output_addr_; } + + std::string task_name() const override { return task_info_->op_name(); } + private: static void ReleaseRtMem(void **ptr) noexcept; std::shared_ptr task_info_; void *stream_; void *args_; + void *input_output_addr_; }; } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/task.h b/src/ge/ge_runtime/task/task.h index 7c748a7d..6c4df248 100644 --- a/src/ge/ge_runtime/task/task.h +++ b/src/ge/ge_runtime/task/task.h @@ -18,7 +18,9 @@ #define GE_GE_RUNTIME_TASK_TASK_H_ #include +#include #include +#include #include "runtime/rt_model.h" #include "ge_runtime/model_context.h" #include "ge_runtime/task_info.h" @@ -32,6 +34,10 @@ class Task { virtual ~Task() {} virtual bool Distribute() = 0; + + virtual void *Args() { return nullptr; } + + virtual std::string task_name() const { return ""; } }; template diff --git a/src/ge/ge_runtime/task/tbe_task.cc b/src/ge/ge_runtime/task/tbe_task.cc index 8a3c36a4..e7025ae8 100644 --- a/src/ge/ge_runtime/task/tbe_task.cc +++ b/src/ge/ge_runtime/task/tbe_task.cc @@ -95,15 +95,14 @@ bool TbeTask::Distribute() { return false; } - GELOGI("InitTbeTask end."); GELOGI("DistributeTbeTask start."); - rt_ret = rtKernelLaunch(stub_func_, task_info_->block_dim(), args_, args_size, nullptr, stream_); + auto dump_flag = task_info_->dump_flag() ? RT_KERNEL_DUMPFLAG : RT_KERNEL_DEFAULT; + rt_ret = rtKernelLaunchWithFlag(stub_func_, task_info_->block_dim(), args_, args_size, nullptr, stream_, dump_flag); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api rtKernelLaunch failed, ret: 0x%X", rt_ret); return false; } - - GELOGI("DistributeTbeTask end."); + GELOGI("[DataDump] task name:%s, dump_flag:%d", task_info_->op_name().c_str(), dump_flag); return true; } diff --git a/src/ge/ge_runtime/task/tbe_task.h b/src/ge/ge_runtime/task/tbe_task.h index 994ba5e2..a8ce6268 100644 --- a/src/ge/ge_runtime/task/tbe_task.h +++ b/src/ge/ge_runtime/task/tbe_task.h @@ -30,6 +30,10 @@ class TbeTask : public TaskRepeater { bool Distribute() override; + void *Args() override { return args_; } + + std::string task_name() const override { return task_info_->op_name(); } + private: std::shared_ptr task_info_; void *stream_;