@@ -649,7 +649,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
for (const ge::NodePtr &node : compute_graph->GetDirectNode()) { | |||
auto op_desc = node->GetOpDesc(); | |||
GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | |||
GetFixedAddrAttr(op_desc); | |||
GE_IF_BOOL_EXEC(op_desc->GetType() != VARIABLE, continue); | |||
GE_IF_BOOL_EXEC(IsBroadCastOpData(node), | |||
(void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore");); | |||
@@ -838,7 +837,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||
} | |||
continue; | |||
} | |||
// for dynamic shape with control flow | |||
SetLabelForDynamic(node); | |||
if (IsNoTaskAndDumpNeeded(op_desc)) { | |||
GELOGD("node[%s] without task, and save op_desc and addr for dump", op_desc->GetName().c_str()); | |||
const RuntimeParam &rts_param = GetRuntimeParam(); | |||
@@ -912,6 +912,21 @@ Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_gr | |||
return SUCCESS; | |||
} | |||
void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | |||
if (known_node_ && node->GetOpDesc()->GetType() == LABELSWITCHBYINDEX) { | |||
for (auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||
auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); | |||
if (peer_out_data_anchor != nullptr) { | |||
string tensor_name = node->GetName(); | |||
auto peer_node = peer_out_data_anchor->GetOwnerNode(); | |||
(void)AttrUtils::SetStr(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name); | |||
(void)AttrUtils::SetInt(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, 0); | |||
tensor_name_to_peer_output_index_[tensor_name] = 0; | |||
} | |||
} | |||
} | |||
} | |||
/// @ingroup ge | |||
/// @brief Data Op Initialize. | |||
/// @param [in] NodePtr: Data Op. | |||
@@ -3948,15 +3963,4 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) { | |||
} | |||
} | |||
void DavinciModel::GetFixedAddrAttr(const OpDescPtr &op_desc) { | |||
if (op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR) && op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX)) { | |||
string tensor_name; | |||
(void)AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name); | |||
int64_t index = -1; | |||
(void)AttrUtils::GetInt(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, index); | |||
if (index >= 0) { | |||
tensor_name_to_peer_output_index_[tensor_name] = index; | |||
} | |||
} | |||
} | |||
} // namespace ge |
@@ -838,7 +838,7 @@ class DavinciModel { | |||
std::vector<ge::OutputTensorInfo> &outputs); | |||
void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); | |||
void GetFixedAddrAttr(const OpDescPtr &op_desc); | |||
void SetLabelForDynamic(const NodePtr &node); | |||
bool is_model_has_inited_; | |||
uint32_t model_id_; | |||
@@ -144,7 +144,7 @@ Status LabelSwitchByIndexTaskInfo::CalculateArgs(const domi::TaskDef &task_def, | |||
GELOGE(FAILED, "Label switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize()); | |||
return FAILED; | |||
} | |||
string input_tensor_name = op_desc->GetInputNameByIndex(0); | |||
string input_tensor_name = op_desc->GetName(); | |||
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(input_tensor_name); | |||
auto tensor_desc = op_desc->GetInputDesc(0); | |||
int64_t tensor_size = 0; | |||
@@ -35,6 +35,7 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||
kind_ = memcpy_async_.kind(); | |||
dst_max_ = memcpy_async_.dst_max(); | |||
OpDescPtr op_desc = davinci_model_->GetOpByIndex(memcpy_async_.op_index()); | |||
op_desc_ = op_desc; | |||
if (op_desc == nullptr) { | |||
GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async_.op_index()); | |||
return INTERNAL_ERROR; | |||
@@ -45,7 +46,8 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||
dst_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(src_) + sizeof(void *)); | |||
// for zero copy | |||
kind_ = RT_MEMCPY_ADDR_DEVICE_TO_DEVICE; | |||
GELOGI("MemcpyAsyncTaskInfo src_ %p, dst_ %p, args_offset %u.", src_, dst_, args_offset_); | |||
GELOGI("MemcpyAsyncTaskInfo op name %s, src_ %p, dst_ %p, args_offset %u.", | |||
op_desc->GetName().c_str(), src_, dst_, args_offset_); | |||
return SUCCESS; | |||
} | |||
@@ -93,12 +95,22 @@ Status MemcpyAsyncTaskInfo::Distribute() { | |||
} | |||
Status MemcpyAsyncTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
OpDescPtr op_desc = davinci_model_->GetOpByIndex(task_def.memcpy_async().op_index()); | |||
// the num of src and dst size is 2 | |||
uint32_t args_size = sizeof(void *) * 2; | |||
args_offset_ = davinci_model->GetTotalArgsSize(); | |||
davinci_model->SetTotalArgsSize(args_size); | |||
davinci_model_ = davinci_model; | |||
GELOGI("MemcpyAsyncTaskInfo kernel args_size %u, args_offset %u", args_size, args_offset_); | |||
string peer_input_name; | |||
if (AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { | |||
uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); | |||
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); | |||
auto tensor_desc = op_desc->GetOutputDesc(output_index); | |||
int64_t tensor_size = 0; | |||
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||
davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -117,8 +129,12 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() { | |||
vector<void *> io_addrs; | |||
io_addrs.emplace_back(reinterpret_cast<void *>(src_)); | |||
io_addrs.emplace_back(reinterpret_cast<void *>(dst_)); | |||
if (op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { | |||
void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); | |||
io_addrs.emplace_back(fixed_addr); | |||
} else { | |||
io_addrs.emplace_back(reinterpret_cast<void *>(dst_)); | |||
} | |||
davinci_model_->SetTotalIOAddrs(io_addrs); | |||
GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success."); | |||
@@ -44,6 +44,8 @@ class MemcpyAsyncTaskInfo : public TaskInfo { | |||
uint8_t *src_; | |||
uint64_t count_; | |||
uint32_t kind_; | |||
OpDescPtr op_desc_; | |||
int64_t fixed_addr_offset_; | |||
DavinciModel *davinci_model_ = nullptr; | |||
uint32_t args_offset_ = 0; | |||
domi::MemcpyAsyncDef memcpy_async_; | |||
@@ -149,7 +149,8 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node | |||
std::string op_type; | |||
bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) || | |||
IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || | |||
((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)); | |||
((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) || | |||
(NodeUtils::IsDynamicShape(node) && (kWhileOpTypes.count(in_node->GetType()) != 0)); | |||
if (insert_flag) { | |||
GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); | |||
std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; | |||
@@ -212,6 +213,19 @@ Status SubgraphPass::WhileBodySubgraph(const ComputeGraphPtr &graph, const NodeP | |||
return SUCCESS; | |||
} | |||
// insert identity between data and labelswitch in while cond subgraph | |||
if (NodeUtils::IsDynamicShape(node)) { | |||
ComputeGraphPtr while_cond = NodeUtils::GetSubgraph(*node, 0); | |||
GE_CHECK_NOTNULL(while_cond); | |||
std::vector<NodePtr> cond_data_nodes; | |||
for (const auto &n : while_cond->GetDirectNode()) { | |||
if (n->GetType() == DATA) { | |||
cond_data_nodes.emplace_back(n); | |||
} | |||
} | |||
GE_CHK_STATUS_RET(InsertInputMemcpy(while_cond, cond_data_nodes), "InsertInputMemcpy failed."); | |||
} | |||
std::vector<NodePtr> data_nodes; | |||
std::set<uint32_t> bypass_index; | |||
NodePtr output_node = nullptr; | |||
@@ -31,7 +31,6 @@ | |||
#include "task/aicpu_task_builder.h" | |||
#include "task/aicpu_kernel_task_builder.h" | |||
#include "task/tbe_task_builder.h" | |||
#include "graph/load/new_model_manager/model_manager.h" | |||
static std::atomic<std::uint64_t> aicpu_sessionid(0); | |||
@@ -278,7 +277,6 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||
GELOGD("Skip task type: %d", static_cast<int>(task_type)); | |||
} | |||
} | |||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); | |||
return SUCCESS; | |||
} | |||
@@ -450,8 +448,6 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||
GELOGD("Skip task type: %d", static_cast<int>(task_type)); | |||
} | |||
} | |||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); | |||
return SUCCESS; | |||
} | |||
@@ -63,6 +63,7 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { | |||
task.is_custom_ = true; | |||
task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; | |||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); | |||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); | |||
} | |||
task.num_inputs_ = op_desc_->GetInputsSize(); | |||