@@ -830,6 +830,13 @@ Status DataDumper::UnloadDumpInfo() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void DataDumper::DumpShrink() { | |||||
compute_graph_.reset(); | |||||
input_map_.clear(); | |||||
ref_info_.clear(); | |||||
op_list_.clear(); | |||||
} | |||||
void DataDumper::PrintCheckLog(string &dump_list_key) { | void DataDumper::PrintCheckLog(string &dump_list_key) { | ||||
std::set<std::string> model_list = dump_properties_.GetAllDumpModel(); | std::set<std::string> model_list = dump_properties_.GetAllDumpModel(); | ||||
if (model_list.empty()) { | if (model_list.empty()) { | ||||
@@ -83,6 +83,8 @@ class DataDumper { | |||||
Status UnloadDumpInfo(); | Status UnloadDumpInfo(); | ||||
void DumpShrink(); | |||||
void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; } | void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; } | ||||
const DumpProperties &GetDumpProperties() const { return dump_properties_; } | const DumpProperties &GetDumpProperties() const { return dump_properties_; } | ||||
bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const; | bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const; | ||||
@@ -112,18 +114,18 @@ class DataDumper { | |||||
struct InnerInputMapping; | struct InnerInputMapping; | ||||
std::vector<OpDescInfo> op_desc_info_; | std::vector<OpDescInfo> op_desc_info_; | ||||
std::vector<InnerDumpInfo> op_list_; | |||||
std::vector<InnerDumpInfo> op_list_; // release after DavinciModel::Init | |||||
uint32_t end_graph_task_id_ = 0; | uint32_t end_graph_task_id_ = 0; | ||||
uint32_t end_graph_stream_id_ = 0; | uint32_t end_graph_stream_id_ = 0; | ||||
bool is_end_graph_ = false; | bool is_end_graph_ = false; | ||||
std::multimap<std::string, InnerInputMapping> input_map_; | |||||
std::multimap<std::string, InnerInputMapping> input_map_; // release after DavinciModel::Init | |||||
bool load_flag_; | bool load_flag_; | ||||
uint32_t device_id_; | uint32_t device_id_; | ||||
uintptr_t global_step_; | uintptr_t global_step_; | ||||
uintptr_t loop_per_iter_; | uintptr_t loop_per_iter_; | ||||
uintptr_t loop_cond_; | uintptr_t loop_cond_; | ||||
ComputeGraphPtr compute_graph_; | |||||
std::map<OpDescPtr, void *> ref_info_; | |||||
ComputeGraphPtr compute_graph_; // release after DavinciModel::Init | |||||
std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init | |||||
void *l1_fusion_addr_ = nullptr; | void *l1_fusion_addr_ = nullptr; | ||||
@@ -150,14 +150,7 @@ DavinciModel::~DavinciModel() { | |||||
GELOGW("UnloadDumpInfo failed, ret: %u.", ret); | GELOGW("UnloadDumpInfo failed, ret: %u.", ret); | ||||
} | } | ||||
for (const auto &op_and_addr : saved_task_addrs_) { | |||||
auto addr = op_and_addr.second; | |||||
if (addr != nullptr) { | |||||
GE_CHK_RT(rtFree(addr)); | |||||
} | |||||
addr = nullptr; | |||||
} | |||||
saved_task_addrs_.clear(); | |||||
ClearTaskAddrs(); | |||||
GE_CHK_STATUS(ModelRunStop()); | GE_CHK_STATUS(ModelRunStop()); | ||||
@@ -221,6 +214,17 @@ DavinciModel::~DavinciModel() { | |||||
} | } | ||||
} | } | ||||
void DavinciModel::ClearTaskAddrs() { | |||||
for (const auto &op_and_addr : saved_task_addrs_) { | |||||
auto addr = op_and_addr.second; | |||||
if (addr != nullptr) { | |||||
GE_CHK_RT(rtFree(addr)); | |||||
} | |||||
addr = nullptr; | |||||
} | |||||
saved_task_addrs_.clear(); | |||||
} | |||||
void DavinciModel::UnbindHcomStream() { | void DavinciModel::UnbindHcomStream() { | ||||
if (!all_hccl_stream_list_.empty()) { | if (!all_hccl_stream_list_.empty()) { | ||||
for (size_t i = 0; i < all_hccl_stream_list_.size(); i++) { | for (size_t i = 0; i < all_hccl_stream_list_.size(); i++) { | ||||
@@ -263,7 +267,10 @@ Status DavinciModel::Assign(const GeModelPtr &ge_model) { | |||||
/// | /// | ||||
void DavinciModel::Shrink() { | void DavinciModel::Shrink() { | ||||
skt_info_ = {0, 0, 0, 0, nullptr, nullptr, {}, {}, {}, {}, {}, RT_KERNEL_DEFAULT, -1, 0, nullptr}; | skt_info_ = {0, 0, 0, 0, nullptr, nullptr, {}, {}, {}, {}, {}, RT_KERNEL_DEFAULT, -1, 0, nullptr}; | ||||
DumperShrink(); | |||||
ge_model_.reset(); // delete object. | ge_model_.reset(); // delete object. | ||||
op_list_.clear(); | |||||
ClearTaskAddrs(); | |||||
} | } | ||||
Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size) { | Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size) { | ||||
@@ -738,7 +745,6 @@ Status DavinciModel::ReportProfilingData() { | |||||
} | } | ||||
ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info); | ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info); | ||||
GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); | GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); | ||||
op_list_.clear(); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -963,7 +969,9 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod | |||||
} | } | ||||
data_by_index[data_index] = op_desc; | data_by_index[data_index] = op_desc; | ||||
data_op_list_.push_back(op_desc); | |||||
auto data_op = AttrUtils::CopyOpDesc(op_desc); | |||||
GE_CHECK_NOTNULL(data_op); | |||||
data_op_list_.push_back(data_op); | |||||
if (known_node_) { | if (known_node_) { | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -1019,7 +1027,9 @@ Status DavinciModel::OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_ | |||||
data_op_list_.clear(); | data_op_list_.clear(); | ||||
for (auto &item : data_by_index) { | for (auto &item : data_by_index) { | ||||
data_op_list_.emplace_back(item.second); | |||||
auto data_op = AttrUtils::CopyOpDesc(item.second); | |||||
GE_CHECK_NOTNULL(data_op); | |||||
data_op_list_.emplace_back(data_op); | |||||
auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); | auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); | ||||
GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); | GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); | ||||
input_addrs_list_.emplace_back(output_addrs); | input_addrs_list_.emplace_back(output_addrs); | ||||
@@ -481,6 +481,10 @@ class DavinciModel { | |||||
data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); | data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); | ||||
} | } | ||||
void DumperShrink() { | |||||
data_dumper_.DumpShrink(); | |||||
} | |||||
void SetEndGraphId(uint32_t task_id, uint32_t stream_id); | void SetEndGraphId(uint32_t task_id, uint32_t stream_id); | ||||
DavinciModel &operator=(const DavinciModel &model) = delete; | DavinciModel &operator=(const DavinciModel &model) = delete; | ||||
@@ -644,6 +648,8 @@ class DavinciModel { | |||||
void ReleaseTask(); | void ReleaseTask(); | ||||
void ClearTaskAddrs(); | |||||
void UnbindTaskSinkStream(); | void UnbindTaskSinkStream(); | ||||
bool IsAicpuKernelConnectSpecifiedLayer(); | bool IsAicpuKernelConnectSpecifiedLayer(); | ||||
@@ -875,12 +881,12 @@ class DavinciModel { | |||||
string om_name_; | string om_name_; | ||||
uint32_t version_; | uint32_t version_; | ||||
GeModelPtr ge_model_; | |||||
GeModelPtr ge_model_; // release after DavinciModel::Init | |||||
bool need_destroy_aicpu_kernel_{false}; | bool need_destroy_aicpu_kernel_{false}; | ||||
vector<string> out_node_name_; | vector<string> out_node_name_; | ||||
map<uint32_t, OpDescPtr> op_list_; | |||||
map<uint32_t, OpDescPtr> op_list_; // release after DavinciModel::Init | |||||
// data op_desc | // data op_desc | ||||
vector<OpDescPtr> data_op_list_; | vector<OpDescPtr> data_op_list_; | ||||
@@ -975,7 +981,7 @@ class DavinciModel { | |||||
DataDumper data_dumper_; | DataDumper data_dumper_; | ||||
uint64_t iterator_count_; | uint64_t iterator_count_; | ||||
bool is_l1_fusion_enable_; | bool is_l1_fusion_enable_; | ||||
map<OpDescPtr, void *> saved_task_addrs_; | |||||
map<OpDescPtr, void *> saved_task_addrs_; // release after DavinciModel::Init | |||||
void *l1_fusion_addr_ = nullptr; | void *l1_fusion_addr_ = nullptr; | ||||
bool known_node_ = false; | bool known_node_ = false; | ||||
@@ -254,6 +254,17 @@ TEST_F(UtestModelManagerDavinciModel, eventlist_success) { | |||||
delete model; | delete model; | ||||
} | } | ||||
// test Shrink | |||||
TEST_F(UtestModelManagerDavinciModel, shrink_success) { | |||||
DavinciModel model(0, g_label_call_back); | |||||
OpDescPtr op_desc_ptr = make_shared<OpDesc>("Cast", "Cast"); | |||||
void *addr = nullptr; | |||||
rtMalloc(&addr, 128, RT_MEMORY_HBM); | |||||
model.saved_task_addrs_.emplace(op_desc_ptr, addr); | |||||
model.Shrink(); | |||||
EXPECT_EQ(model.saved_task_addrs_.isEmpty(), true); | |||||
} | |||||
// test rtMalloc | // test rtMalloc | ||||
TEST_F(UtestModelManagerDavinciModel, failed_reset_device) { | TEST_F(UtestModelManagerDavinciModel, failed_reset_device) { | ||||
DavinciModel model(0, g_label_call_back); | DavinciModel model(0, g_label_call_back); | ||||