Browse Source

fix dynamic shape with while

tags/v1.1.0
weiyang 3 years ago
parent
commit
5e8f1579e2
8 changed files with 56 additions and 23 deletions
  1. +17
    -13
      ge/graph/load/new_model_manager/davinci_model.cc
  2. +1
    -1
      ge/graph/load/new_model_manager/davinci_model.h
  3. +1
    -1
      ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc
  4. +19
    -3
      ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc
  5. +2
    -0
      ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h
  6. +15
    -1
      ge/graph/passes/subgraph_pass.cc
  7. +0
    -4
      ge/single_op/single_op_model.cc
  8. +1
    -0
      ge/single_op/task/aicpu_kernel_task_builder.cc

+ 17
- 13
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -649,7 +649,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
for (const ge::NodePtr &node : compute_graph->GetDirectNode()) {
auto op_desc = node->GetOpDesc();
GE_IF_BOOL_EXEC(op_desc == nullptr, continue);
GetFixedAddrAttr(op_desc);
GE_IF_BOOL_EXEC(op_desc->GetType() != VARIABLE, continue);
GE_IF_BOOL_EXEC(IsBroadCastOpData(node),
(void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore"););
@@ -838,7 +837,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
}
continue;
}

// for dynamic shape with control flow
SetLabelForDynamic(node);
if (IsNoTaskAndDumpNeeded(op_desc)) {
GELOGD("node[%s] without task, and save op_desc and addr for dump", op_desc->GetName().c_str());
const RuntimeParam &rts_param = GetRuntimeParam();
@@ -912,6 +912,21 @@ Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_gr
return SUCCESS;
}

void DavinciModel::SetLabelForDynamic(const NodePtr &node) {
if (known_node_ && node->GetOpDesc()->GetType() == LABELSWITCHBYINDEX) {
for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
if (peer_out_data_anchor != nullptr) {
string tensor_name = node->GetName();
auto peer_node = peer_out_data_anchor->GetOwnerNode();
(void)AttrUtils::SetStr(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name);
(void)AttrUtils::SetInt(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, 0);
tensor_name_to_peer_output_index_[tensor_name] = 0;
}
}
}
}

/// @ingroup ge
/// @brief Data Op Initialize.
/// @param [in] NodePtr: Data Op.
@@ -3948,15 +3963,4 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) {
}
}

void DavinciModel::GetFixedAddrAttr(const OpDescPtr &op_desc) {
if (op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR) && op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX)) {
string tensor_name;
(void)AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name);
int64_t index = -1;
(void)AttrUtils::GetInt(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, index);
if (index >= 0) {
tensor_name_to_peer_output_index_[tensor_name] = index;
}
}
}
} // namespace ge

+ 1
- 1
ge/graph/load/new_model_manager/davinci_model.h View File

@@ -838,7 +838,7 @@ class DavinciModel {
std::vector<ge::OutputTensorInfo> &outputs);

void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info);
void GetFixedAddrAttr(const OpDescPtr &op_desc);
void SetLabelForDynamic(const NodePtr &node);

bool is_model_has_inited_;
uint32_t model_id_;


+ 1
- 1
ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc View File

@@ -144,7 +144,7 @@ Status LabelSwitchByIndexTaskInfo::CalculateArgs(const domi::TaskDef &task_def,
GELOGE(FAILED, "Label switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize());
return FAILED;
}
string input_tensor_name = op_desc->GetInputNameByIndex(0);
string input_tensor_name = op_desc->GetName();
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(input_tensor_name);
auto tensor_desc = op_desc->GetInputDesc(0);
int64_t tensor_size = 0;


+ 19
- 3
ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc View File

@@ -35,6 +35,7 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
kind_ = memcpy_async_.kind();
dst_max_ = memcpy_async_.dst_max();
OpDescPtr op_desc = davinci_model_->GetOpByIndex(memcpy_async_.op_index());
op_desc_ = op_desc;
if (op_desc == nullptr) {
GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async_.op_index());
return INTERNAL_ERROR;
@@ -45,7 +46,8 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
dst_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(src_) + sizeof(void *));
// for zero copy
kind_ = RT_MEMCPY_ADDR_DEVICE_TO_DEVICE;
GELOGI("MemcpyAsyncTaskInfo src_ %p, dst_ %p, args_offset %u.", src_, dst_, args_offset_);
GELOGI("MemcpyAsyncTaskInfo op name %s, src_ %p, dst_ %p, args_offset %u.",
op_desc->GetName().c_str(), src_, dst_, args_offset_);
return SUCCESS;
}

@@ -93,12 +95,22 @@ Status MemcpyAsyncTaskInfo::Distribute() {
}

Status MemcpyAsyncTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
OpDescPtr op_desc = davinci_model_->GetOpByIndex(task_def.memcpy_async().op_index());
// the num of src and dst size is 2
uint32_t args_size = sizeof(void *) * 2;
args_offset_ = davinci_model->GetTotalArgsSize();
davinci_model->SetTotalArgsSize(args_size);
davinci_model_ = davinci_model;
GELOGI("MemcpyAsyncTaskInfo kernel args_size %u, args_offset %u", args_size, args_offset_);
string peer_input_name;
if (AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) {
uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name);
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name);
auto tensor_desc = op_desc->GetOutputDesc(output_index);
int64_t tensor_size = 0;
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size);
}
return SUCCESS;
}

@@ -117,8 +129,12 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() {

vector<void *> io_addrs;
io_addrs.emplace_back(reinterpret_cast<void *>(src_));
io_addrs.emplace_back(reinterpret_cast<void *>(dst_));

if (op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) {
void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_);
io_addrs.emplace_back(fixed_addr);
} else {
io_addrs.emplace_back(reinterpret_cast<void *>(dst_));
}
davinci_model_->SetTotalIOAddrs(io_addrs);

GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success.");


+ 2
- 0
ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h View File

@@ -44,6 +44,8 @@ class MemcpyAsyncTaskInfo : public TaskInfo {
uint8_t *src_;
uint64_t count_;
uint32_t kind_;
OpDescPtr op_desc_;
int64_t fixed_addr_offset_;
DavinciModel *davinci_model_ = nullptr;
uint32_t args_offset_ = 0;
domi::MemcpyAsyncDef memcpy_async_;


+ 15
- 1
ge/graph/passes/subgraph_pass.cc View File

@@ -149,7 +149,8 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node
std::string op_type;
bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) ||
IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) ||
((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0));
((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) ||
(NodeUtils::IsDynamicShape(node) && (kWhileOpTypes.count(in_node->GetType()) != 0));
if (insert_flag) {
GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
@@ -212,6 +213,19 @@ Status SubgraphPass::WhileBodySubgraph(const ComputeGraphPtr &graph, const NodeP
return SUCCESS;
}

// insert identity between data and labelswitch in while cond subgraph
if (NodeUtils::IsDynamicShape(node)) {
ComputeGraphPtr while_cond = NodeUtils::GetSubgraph(*node, 0);
GE_CHECK_NOTNULL(while_cond);
std::vector<NodePtr> cond_data_nodes;
for (const auto &n : while_cond->GetDirectNode()) {
if (n->GetType() == DATA) {
cond_data_nodes.emplace_back(n);
}
}
GE_CHK_STATUS_RET(InsertInputMemcpy(while_cond, cond_data_nodes), "InsertInputMemcpy failed.");
}

std::vector<NodePtr> data_nodes;
std::set<uint32_t> bypass_index;
NodePtr output_node = nullptr;


+ 0
- 4
ge/single_op/single_op_model.cc View File

@@ -31,7 +31,6 @@
#include "task/aicpu_task_builder.h"
#include "task/aicpu_kernel_task_builder.h"
#include "task/tbe_task_builder.h"
#include "graph/load/new_model_manager/model_manager.h"

static std::atomic<std::uint64_t> aicpu_sessionid(0);

@@ -278,7 +277,6 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
GELOGD("Skip task type: %d", static_cast<int>(task_type));
}
}
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed.");
return SUCCESS;
}

@@ -450,8 +448,6 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
GELOGD("Skip task type: %d", static_cast<int>(task_type));
}
}
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed.");

return SUCCESS;
}



+ 1
- 0
ge/single_op/task/aicpu_kernel_task_builder.cc View File

@@ -63,6 +63,7 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) {
task.is_custom_ = true;
task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU;
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed");
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed.");
}

task.num_inputs_ = op_desc_->GetInputsSize();


Loading…
Cancel
Save