Browse Source

回退 'Pull Request !499 : inference supports dynamic shape'

tags/v1.2.0
王涛 Gitee 3 years ago
parent
commit
87704d8512
29 changed files with 60 additions and 984 deletions
  1. +0
    -4
      ge/common/ge/op_tiling_manager.cc
  2. +0
    -1
      ge/common/ge/op_tiling_manager.h
  3. +6
    -88
      ge/executor/CMakeLists.txt
  4. +4
    -69
      ge/executor/ge_executor.cc
  5. +1
    -83
      ge/executor/module.mk
  6. +1
    -1
      ge/ge_local_engine/CMakeLists.txt
  7. +1
    -1
      ge/ge_local_engine/engine/host_cpu_engine.h
  8. +0
    -51
      ge/graph/build/graph_builder.cc
  9. +2
    -5
      ge/graph/load/graph_loader.cc
  10. +1
    -2
      ge/graph/load/graph_loader.h
  11. +34
    -54
      ge/graph/load/new_model_manager/davinci_model.cc
  12. +2
    -5
      ge/graph/load/new_model_manager/davinci_model.h
  13. +1
    -47
      ge/graph/load/new_model_manager/model_manager.cc
  14. +1
    -4
      ge/graph/load/new_model_manager/model_manager.h
  15. +1
    -46
      ge/graph/partition/dynamic_shape_partition.cc
  16. +0
    -1
      ge/graph/partition/dynamic_shape_partition.h
  17. +1
    -1
      ge/graph/passes/transop_breadth_fusion_pass.cc
  18. +3
    -3
      ge/host_cpu_engine/CMakeLists.txt
  19. +0
    -38
      ge/hybrid/executor/hybrid_model_async_executor.cc
  20. +0
    -5
      ge/hybrid/executor/hybrid_model_async_executor.h
  21. +0
    -79
      ge/hybrid/hybrid_davinci_model.cc
  22. +0
    -21
      ge/hybrid/hybrid_davinci_model.h
  23. +0
    -32
      ge/hybrid/hybrid_davinci_model_stub.cc
  24. +1
    -214
      ge/hybrid/model/hybrid_model.cc
  25. +0
    -26
      ge/hybrid/model/hybrid_model.h
  26. +0
    -30
      ge/hybrid/model/hybrid_model_builder.cc
  27. +0
    -56
      ge/hybrid/node_executor/aicore/aicore_op_task.cc
  28. +0
    -1
      ge/hybrid/node_executor/aicore/aicore_op_task.h
  29. +0
    -16
      inc/framework/executor/ge_executor.h

+ 0
- 4
ge/common/ge/op_tiling_manager.cc View File

@@ -88,8 +88,4 @@ void OpTilingManager::LoadSo() {
}
}

OpTilingManager &OpTilingManager::GetInstance() {
static OpTilingManager instance;
return instance;
}
} // namespace ge

+ 0
- 1
ge/common/ge/op_tiling_manager.h View File

@@ -25,7 +25,6 @@ using SoToHandleMap = std::map<std::string, void *>;
class OpTilingManager {
public:
OpTilingManager() = default;
static OpTilingManager &GetInstance();
~OpTilingManager();
void LoadSo();



+ 6
- 88
ge/executor/CMakeLists.txt View File

@@ -72,89 +72,7 @@ set(SRC_LIST
"../single_op/task/tbe_task_builder.cc"
"../single_op/task/aicpu_task_builder.cc"
"../single_op/task/aicpu_kernel_task_builder.cc"
"../hybrid/common/tensor_value.cc"
"../hybrid/common/npu_memory_allocator.cc"
"../hybrid/executor/rt_callback_manager.cc"
"../hybrid/executor/node_state.cc"
"../hybrid/executor/node_done_manager.cc"
"../hybrid/executor/hybrid_profiler.cc"
"../hybrid/executor/hybrid_model_executor.cc"
"../hybrid/executor/hybrid_model_async_executor.cc"
"../hybrid/executor/hybrid_execution_context.cc"
"../hybrid/executor/subgraph_context.cc"
"../hybrid/executor/subgraph_executor.cc"
"../hybrid/executor/worker/task_compile_engine.cc"
"../hybrid/executor/worker/shape_inference_engine.cc"
"../hybrid/executor/worker/execution_engine.cc"
"../hybrid/model/hybrid_model.cc"
"../hybrid/model/hybrid_model_builder.cc"
"../hybrid/model/node_item.cc"
"../hybrid/model/graph_item.cc"
"../hybrid/node_executor/aicore/aicore_node_executor.cc"
"../hybrid/node_executor/aicore/aicore_op_task.cc"
"../hybrid/node_executor/aicore/aicore_task_builder.cc"
"../hybrid/node_executor/aicpu/aicpu_node_executor.cc"
"../hybrid/node_executor/compiledsubgraph/known_node_executor.cc"
"../hybrid/node_executor/ge_local/ge_local_node_executor.cc"
"../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc"
"../hybrid/node_executor/host_cpu/kernel_factory.cc"
"../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc"
"../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
"../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
"../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
"../hybrid/node_executor/controlop/control_op_executor.cc"
"../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
"../hybrid/node_executor/rts/rts_node_executor.cc"
"../hybrid/node_executor/node_executor.cc"
"../hybrid/node_executor/task_context.cc"
"../hybrid/hybrid_davinci_model.cc"
"../ge_local_engine/engine/host_cpu_engine.cc"
"../graph/common/omg_util.cc"
"../graph/manager/host_mem_manager.cc"
"../graph/build/memory/var_mem_assign_util.cc"
"../host_kernels/transpose_kernel.cc"
"../host_kernels/add_kernel.cc"
"../host_kernels/broadcast_args_kernel.cc"
"../host_kernels/broadcast_gradient_args_kernel.cc"
"../host_kernels/cast_kernel.cc"
"../host_kernels/concat_offset_kernel.cc"
"../host_kernels/concat_v2_kernel.cc"
"../host_kernels/dynamic_stitch_kernel.cc"
"../host_kernels/identity_kernel.cc"
"../host_kernels/empty_kernel.cc"
"../host_kernels/expanddims_kernel.cc"
"../host_kernels/fill_kernel.cc"
"../host_kernels/floordiv_kernel.cc"
"../host_kernels/floormod_kernel.cc"
"../host_kernels/gather_v2_kernel.cc"
"../host_kernels/greater_kernel.cc"
"../host_kernels/kernel_utils.cc"
"../host_kernels/maximum_kernel.cc"
"../host_kernels/mul_kernel.cc"
"../host_kernels/pack_kernel.cc"
"../host_kernels/permute_kernel.cc"
"../host_kernels/range_kernel.cc"
"../host_kernels/rank_kernel.cc"
"../host_kernels/reduce_prod_kernel.cc"
"../host_kernels/reshape_kernel.cc"
"../host_kernels/rsqrt_kernel.cc"
"../host_kernels/shape_kernel.cc"
"../host_kernels/shape_n_kernel.cc"
"../host_kernels/size_kernel.cc"
"../host_kernels/slice_d_kernel.cc"
"../host_kernels/slice_kernel.cc"
"../host_kernels/squeeze_kernel.cc"
"../host_kernels/unsqueeze_kernel.cc"
"../host_kernels/ssd_prior_box_kernel.cc"
"../host_kernels/strided_slice_kernel.cc"
"../host_kernels/sub_kernel.cc"
"../host_kernels/transdata_kernel.cc"
"../host_kernels/unpack_kernel.cc"
"../graph/passes/pass_utils.cc"
"../graph/common/bcast.cc"
"../common/fp16_t.cc"
"../common/formats/format_transfers/format_transfer_transpose.cc"
"../common/formats/utils/formats_trans_utils.cc"
"../hybrid/hybrid_davinci_model_stub.cc"
)

######## libge_executor.a ########
@@ -187,9 +105,9 @@ target_include_directories(ge_executor PRIVATE
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
${GE_CODE_DIR}/../inc/cce
${GE_CODE_DIR}/../inc/cce
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(ge_executor PRIVATE
@@ -229,9 +147,9 @@ target_include_directories(ge_executor_shared PRIVATE
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
${GE_CODE_DIR}/../inc/cce
${GE_CODE_DIR}/../inc/cce
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(ge_executor_shared PRIVATE
@@ -240,7 +158,7 @@ target_link_libraries(ge_executor_shared PRIVATE
-Wl,--no-as-needed
ge_common
runtime
slog
slog
mmpa
graph
register


+ 4
- 69
ge/executor/ge_executor.cc View File

@@ -39,8 +39,6 @@
#include "graph/manager/graph_var_manager.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "opskernel_manager/ops_kernel_builder_manager.h"
#include "graph/opsproto_manager.h"
#include "ge_local_engine/engine/host_cpu_engine.h"

using std::string;
using std::vector;
@@ -223,33 +221,6 @@ class ModelListenerAdapter : public ModelListener {
std::shared_ptr<ge::ModelListener> listener;
};

static void InitOpsProtoManger() {
string opsproto_path;
const char *path_env = std::getenv("ASCEND_OPP_PATH");
if (path_env != nullptr) {
string path = path_env;
string file_path = RealPath(path.c_str());
if (file_path.empty()) {
GELOGE(FAILED, "File path %s is invalid.", path.c_str());
return;
}
opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/");
GELOGI("Get opsproto so path from env : %s", path.c_str());
} else {
string path_base = PluginManager::GetPath();
GELOGI("path_base is %s", path_base.c_str());
path_base = path_base.substr(0, path_base.rfind('/'));
path_base = path_base.substr(0, path_base.rfind('/') + 1);
opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
}

GELOGI("Get opsproto path is %s", opsproto_path.c_str());
OpsProtoManager *manager = OpsProtoManager::Instance();
map<string, string> option_tmp;
option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path));
(void)manager->Initialize(option_tmp);
}

GeExecutor::GeExecutor() {}

Status GeExecutor::Initialize() {
@@ -259,16 +230,6 @@ Status GeExecutor::Initialize() {
return ge::SUCCESS;
}

OpTilingManager::GetInstance().LoadSo();

Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize();
if (initHostCpuEngineStatus != SUCCESS) {
GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine");
return initHostCpuEngineStatus;
}

InitOpsProtoManger();

std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM);
mem_type.push_back(RT_MEMORY_P2P_DDR);
auto ret = MemManager::Instance().Initialize(mem_type);
@@ -638,16 +599,10 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
return ACL_ERROR_GE_INTERNAL_ERROR;
}

std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = ModelManager::GetInstance()->GetHybridModel(model_id);
if (hybrid_davinci_model != nullptr) {
uint64_t session_id = hybrid_davinci_model->GetSessionId();
std::shared_ptr<DavinciModel> davinci_model = ModelManager::GetInstance()->GetModel(model_id);
if (davinci_model != nullptr) {
uint64_t session_id = davinci_model->GetSessionId();
VarManagerPool::Instance().RemoveVarManager(session_id);
} else {
std::shared_ptr<DavinciModel> davinci_model = ModelManager::GetInstance()->GetModel(model_id);
if (davinci_model != nullptr) {
uint64_t session_id = davinci_model->GetSessionId();
VarManagerPool::Instance().RemoveVarManager(session_id);
}
}
ret = GraphLoader::UnloadModel(model_id);
if (ret != SUCCESS) {
@@ -977,26 +932,6 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat
*/
Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
ge::RunModelData &run_output_data, bool async_mode) {
std::vector<GeTensorDesc> input_desc = {};
std::vector<GeTensorDesc> output_desc = {};
return ExecModel(model_id, stream, run_input_data, input_desc, run_output_data, output_desc, async_mode);
}

/**
* @ingroup ge
* @brief Synchronous execution of offline model(Do not create thread)
* @param [in] uint32_t model_id: Model ID to execute
void* stream: stream to execute
const domi::InputData *input_data: Model input data
const std::vector<GeTensorDesc> &input_desc: Description of model input data
bool async_mode: is asynchronize mode
* @param [out] domi::OutputData *output_data: Model output data
* @param [out] std::vector<GeTensorDesc> &output_desc: Description of model output data
* @return SUCCESS handle successfully / others handle failed
*/
Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data,
std::vector<GeTensorDesc> &output_desc, bool async_mode) {
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
@@ -1021,7 +956,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel
}
}

return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc);
return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, output_data);
}

/**


+ 1
- 83
ge/executor/module.mk View File

@@ -61,91 +61,9 @@ local_ge_executor_src_files := \
../single_op/task/tbe_task_builder.cc \
../single_op/task/aicpu_task_builder.cc \
../single_op/task/aicpu_kernel_task_builder.cc \
../hybrid/hybrid_davinci_model_stub.cc\
../hybrid/node_executor/aicpu/aicpu_ext_info.cc \
../graph/common/local_context.cc \
../hybrid/common/tensor_value.cc \
../hybrid/common/npu_memory_allocator.cc \
../hybrid/executor/rt_callback_manager.cc \
../hybrid/executor/node_state.cc \
../hybrid/executor/node_done_manager.cc \
../hybrid/executor/hybrid_profiler.cc \
../hybrid/executor/hybrid_model_executor.cc \
../hybrid/executor/hybrid_model_async_executor.cc \
../hybrid/executor/hybrid_execution_context.cc \
../hybrid/executor/subgraph_context.cc \
../hybrid/executor/subgraph_executor.cc \
../hybrid/executor/worker/task_compile_engine.cc \
../hybrid/executor/worker/shape_inference_engine.cc \
../hybrid/executor/worker/execution_engine.cc \
../hybrid/model/hybrid_model.cc \
../hybrid/model/hybrid_model_builder.cc \
../hybrid/model/node_item.cc \
../hybrid/model/graph_item.cc \
../hybrid/node_executor/aicore/aicore_node_executor.cc \
../hybrid/node_executor/aicore/aicore_op_task.cc \
../hybrid/node_executor/aicore/aicore_task_builder.cc \
../hybrid/node_executor/aicpu/aicpu_node_executor.cc \
../hybrid/node_executor/compiledsubgraph/known_node_executor.cc \
../hybrid/node_executor/ge_local/ge_local_node_executor.cc \
../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc \
../hybrid/node_executor/host_cpu/kernel_factory.cc \
../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc \
../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \
../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \
../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \
../hybrid/node_executor/controlop/control_op_executor.cc \
../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \
../hybrid/node_executor/rts/rts_node_executor.cc \
../hybrid/node_executor/node_executor.cc \
../hybrid/node_executor/task_context.cc \
../hybrid/hybrid_davinci_model.cc \
../ge_local_engine/engine/host_cpu_engine.cc \
../graph/common/omg_util.cc \
../graph/manager/host_mem_manager.cc \
../graph/build/memory/var_mem_assign_util.cc \
../host_kernels/transpose_kernel.cc \
../host_kernels/add_kernel.cc \
../host_kernels/broadcast_args_kernel.cc \
../host_kernels/broadcast_gradient_args_kernel.cc \
../host_kernels/cast_kernel.cc \
../host_kernels/concat_offset_kernel.cc \
../host_kernels/concat_v2_kernel.cc \
../host_kernels/dynamic_stitch_kernel.cc \
../host_kernels/identity_kernel.cc \
../host_kernels/empty_kernel.cc \
../host_kernels/expanddims_kernel.cc \
../host_kernels/fill_kernel.cc \
../host_kernels/floordiv_kernel.cc \
../host_kernels/floormod_kernel.cc \
../host_kernels/gather_v2_kernel.cc \
../host_kernels/greater_kernel.cc \
../host_kernels/kernel_utils.cc \
../host_kernels/maximum_kernel.cc \
../host_kernels/mul_kernel.cc \
../host_kernels/pack_kernel.cc \
../host_kernels/permute_kernel.cc \
../host_kernels/range_kernel.cc \
../host_kernels/rank_kernel.cc \
../host_kernels/reduce_prod_kernel.cc \
../host_kernels/reshape_kernel.cc \
../host_kernels/rsqrt_kernel.cc \
../host_kernels/shape_kernel.cc \
../host_kernels/shape_n_kernel.cc \
../host_kernels/size_kernel.cc \
../host_kernels/slice_d_kernel.cc \
../host_kernels/slice_kernel.cc \
../host_kernels/squeeze_kernel.cc \
../host_kernels/unsqueeze_kernel.cc \
../host_kernels/ssd_prior_box_kernel.cc \
../host_kernels/strided_slice_kernel.cc \
../host_kernels/sub_kernel.cc \
../host_kernels/transdata_kernel.cc \
../host_kernels/unpack_kernel.cc \
../graph/passes/pass_utils.cc \
../graph/common/bcast.cc \
../common/fp16_t.cc \
../common/formats/format_transfers/format_transfer_transpose.cc \
../common/formats/utils/formats_trans_utils.cc \

local_ge_executor_c_include := \
proto/insert_op.proto \


+ 1
- 1
ge/ge_local_engine/CMakeLists.txt View File

@@ -195,7 +195,7 @@ set_target_properties(atc_ge_local_opskernel_builder PROPERTIES
)

############ libge_local_opskernel_builder.a ############
add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})
add_library(ge_local_opskernel_builder_static SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})

target_compile_options(ge_local_opskernel_builder_static PRIVATE
-Werror


+ 1
- 1
ge/ge_local_engine/engine/host_cpu_engine.h View File

@@ -20,7 +20,7 @@
#include "framework/common/ge_inner_error_codes.h"
#include "graph/node.h"
#include "graph/operator.h"
#include "external/../register/register.h"
#include "register/register.h"

namespace ge {
class HostCpuEngine {


+ 0
- 51
ge/graph/build/graph_builder.cc View File

@@ -30,7 +30,6 @@
#include "model/ge_model.h"
#include "graph/ge_context.h"
#include "opskernel_manager/ops_kernel_builder_manager.h"
#include "graph/utils/op_desc_utils.h"

using domi::BuildMode;

@@ -312,53 +311,6 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt
return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id);
}

static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor,
const std::vector<InDataAnchorPtr> &in_anchors, const std::string &name) {
GE_CHECK_NOTNULL(out_anchor);
NodePtr in_node = out_anchor->GetOwnerNode();
GE_CHECK_NOTNULL(in_node);
OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC);
OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0))
.AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0))
.Build();
(void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false);
if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str());
return FAILED;
}
return SUCCESS;
}

static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) {
for (auto &node : graph->GetDirectNode()) {
// CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
continue;
}
auto op_type = op_desc->GetType();
if (op_type == NETOUTPUT) {
for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
NodePtr in_node = peer_out_anchor->GetOwnerNode();
GE_CHECK_NOTNULL(in_node);

std::string in_node_op_type = in_node->GetType();
if (in_node_op_type == CONSTANT) {
GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) {
GELOGE(FAILED, "Insert memcpy between %s and %s failed.", in_node->GetName().c_str(), node->GetName().c_str());
return FAILED;
}
}
}
}
}
return SUCCESS;
}

Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
@@ -380,9 +332,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
continue;
}

GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed.");

if (sub_graph->GetGraphUnknownFlag()) {
// unknown shape build flow
GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id),


+ 2
- 5
ge/graph/load/graph_loader.cc View File

@@ -274,16 +274,13 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da
/// @param [in] stream stream to execute model on
/// @param [in] async_mode is asynchronize mode.
/// @param [in] input_data model input data
/// @param [in] input_desc description of model input data
/// @param [out] output_data model output data
/// @param [out] output_desc description of model output data
///
Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
std::vector<GeTensorDesc> &output_desc) {
OutputData &output_data) {
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc);
Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, output_data);
if (ret != SUCCESS) {
GELOGE(ret, "Execute model failed, model_id:%u.", model_id);
return ret;


+ 1
- 2
ge/graph/load/graph_loader.h View File

@@ -65,8 +65,7 @@ class GraphLoader {
const std::vector<uint32_t> &output_queue_ids);

static Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
std::vector<GeTensorDesc> &output_desc);
OutputData &output_data);

static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id);



+ 34
- 54
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -118,8 +118,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener
load_end_time_(0),
time_info_(),
dataInputTid(0),
is_weight_mem_has_inited_(false),
is_feature_map_mem_has_inited_(false),
is_model_has_inited_(false),
model_id_(0),
runtime_model_id_(0),
version_(0),
@@ -265,65 +264,34 @@ void DavinciModel::Shrink() {
ge_model_.reset(); // delete object.
}

Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size) {
if (is_weight_mem_has_inited_) {
GELOGE(FAILED, "call InitWeightMem more than once.");
Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
if (is_model_has_inited_) {
GELOGE(FAILED, "call InitModelMem more than once .");
return FAILED;
}
is_weight_mem_has_inited_ = true;
is_model_has_inited_ = true;

std::size_t data_size = TotalMemSize();
std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;
const Buffer &weights = ge_model_->GetWeight();
std::size_t weights_size = weights.GetSize();
GE_CHECK_LE(weights_size, ALLOC_MEMORY_MAX_SIZE);

if ((weight_ptr != nullptr) && (weight_size < weights_size)) {
GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size);
return FAILED;
}

weights_mem_base_ = static_cast<uint8_t *>(dev_ptr);
is_inner_weight_base_ = false;

if (weights_size != 0) {
weights_mem_base_ = static_cast<uint8_t *>(weight_ptr);
is_inner_weight_base_ = false;
if (weight_ptr == nullptr) {
weights_mem_base_ = MallocWeightsMem(weights_size);
if (weights_mem_base_ == nullptr) {
GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED;
}
is_inner_weight_base_ = true;
}
GELOGI("[IMAS]InitWeightMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
weights_mem_base_, weights_size);
GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE));
GELOGI("copy weights data to device");
}

runtime_param_.weight_base = weights_mem_base_;
return SUCCESS;
}


Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
if (is_feature_map_mem_has_inited_) {
GELOGE(FAILED, "call InitFeatureMapMem more than once .");
if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
return FAILED;
}
is_feature_map_mem_has_inited_ = true;

std::size_t data_size = TotalMemSize();
std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;

if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
if ((weight_ptr != nullptr) && (weight_size < weights_size)) {
GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size);
return FAILED;
}

mem_base_ = static_cast<uint8_t *>(dev_ptr);
p2p_mem_base_ = static_cast<uint8_t *>(dev_ptr);
weights_mem_base_ = static_cast<uint8_t *>(dev_ptr);
is_inner_mem_base_ = false;
is_inner_weight_base_ = false;

if (TotalMemSize() && mem_base_ == nullptr) {
mem_base_ = MallocFeatureMapMem(data_size);
@@ -331,14 +299,12 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size);
return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED;
}
GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
GEEVENT("[IMAS]InitModelMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
mem_base_, data_size);
weights_mem_base_ = mem_base_;

if (!is_inner_weight_base_) {
weights_mem_base_ = mem_base_;
is_inner_weight_base_ = true;
}
is_inner_mem_base_ = true;
is_inner_weight_base_ = true;
}

if (p2p_data_size != 0) {
@@ -347,11 +313,27 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size);
return GE_EXEC_ALLOC_P2P_MEM_FAILED;
}
GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
GELOGI("InitModelMem graph_%u MallocMemory type[P] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
p2p_mem_base_, p2p_data_size);
is_inner_p2p_mem_base_ = true;
}

if (weights_size != 0) {
weights_mem_base_ = static_cast<uint8_t *>(weight_ptr);
is_inner_weight_base_ = false;
if (weight_ptr == nullptr) {
weights_mem_base_ = MallocWeightsMem(weights_size);
if (weights_mem_base_ == nullptr) {
GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED;
}
is_inner_weight_base_ = true;
}
GELOGI("[IMAS]InitModelMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
weights_mem_base_, weights_size);
GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE));
}

GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed.");
runtime_param_.mem_base = mem_base_;
runtime_param_.weight_base = weights_mem_base_;
@@ -661,9 +643,8 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size

GE_TIMESTAMP_START(InitModelMem);
GELOGD("Known node is %d", known_node_);
GE_CHK_STATUS_RET_NOLOG(InitWeightMem(dev_ptr, weight_ptr, weight_size));
if (!known_node_) {
GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size));
GE_CHK_STATUS_RET_NOLOG(InitModelMem(dev_ptr, mem_size, weight_ptr, weight_size));
data_inputer_ = new (std::nothrow) DataInputer();
GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr.");
}
@@ -1160,7 +1141,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) {
GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS,
GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;);
}

return SUCCESS;
}



+ 2
- 5
ge/graph/load/new_model_manager/davinci_model.h View File

@@ -584,8 +584,7 @@ class DavinciModel {

Status SyncVarData();

Status InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size);
Status InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size);
Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize);

void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input);

@@ -851,9 +850,7 @@ class DavinciModel {
Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node);
Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc);

bool is_weight_mem_has_inited_;
bool is_feature_map_mem_has_inited_;

bool is_model_has_inited_;
uint32_t model_id_;
uint32_t runtime_model_id_;
string name_;


+ 1
- 47
ge/graph/load/new_model_manager/model_manager.cc View File

@@ -31,7 +31,6 @@
#include "model/ge_root_model.h"
#include "graph/common/local_context.h"
#include "common/formats/utils/formats_trans_utils.h"
#include "hybrid/hybrid_davinci_model.h"

namespace ge {
thread_local uint32_t device_count = 0;
@@ -205,13 +204,6 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {

ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
std::lock_guard<std::mutex> lock(map_mutex_);
auto hybrid_davinci_model = hybrid_model_map_.find(model_id);
if (hybrid_davinci_model != hybrid_model_map_.end()) {
uint64_t session_id = hybrid_davinci_model->second->GetSessionId();
DestroyAicpuSession(session_id);
return SUCCESS;
}

auto it = model_map_.find(model_id);
if (it == model_map_.end()) {
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
@@ -933,12 +925,6 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats,
bool new_model_desc) {
std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
if (hybrid_davinci_model != nullptr) {
hybrid_davinci_model->SetModelDescVersion(new_model_desc);
return hybrid_davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats);
}

std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID,
"GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);
@@ -957,11 +943,6 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
///
Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
int32_t &dynamic_type) {
std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
if (hybrid_davinci_model != nullptr) {
return hybrid_davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type);
}

std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetDynamicBatchInfo failed, Invalid model id %u!", model_id);
@@ -994,12 +975,6 @@ Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector<vect
///
Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id,
std::vector<std::string> &user_input_shape_order) {
auto hybrid_davinci_model = GetHybridModel(model_id);
if (hybrid_davinci_model != nullptr) {
hybrid_davinci_model->GetUserDesignateShapeOrder(user_input_shape_order);
return SUCCESS;
}

auto davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id)
@@ -1015,12 +990,6 @@ Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &
}

Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) {
std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
if (hybrid_davinci_model != nullptr) {
hybrid_davinci_model->GetModelAttr(dynamic_output_shape_info);
return SUCCESS;
}

std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHECK_NOTNULL(davinci_model);
davinci_model->GetModelAttr(dynamic_output_shape_info);
@@ -1232,25 +1201,10 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d
/// @param [in] stream model stream
/// @param [in] async_mode is asynchronize mode.
/// @param [in] input_data input data
/// @param [in] input_desc description of input data
/// @param [out] output_data output data
/// @param [out] output_desc description of output data
///
Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
std::vector<GeTensorDesc> &output_desc) {
std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
if (hybrid_davinci_model != nullptr) {
auto inputs = input_data.blobs;
auto outputs = output_data.blobs;

Status status = hybrid_davinci_model->Execute(inputs, input_desc, outputs, output_desc, stream);
if (status == SUCCESS) {
GELOGI("Execute model %u success.", model_id);
}
return status;
}

OutputData &output_data) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id);



+ 1
- 4
ge/graph/load/new_model_manager/model_manager.h View File

@@ -148,13 +148,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
/// @param [in] stream model stream
/// @param [in] async_mode is asynchronize mode.
/// @param [in] input_data model input data
/// @param [in] input_desc description of model input data
/// @param [out] output_data model output data
/// @param [out] output_desc description of model output data
///
ge::Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
std::vector<GeTensorDesc> &output_desc);
OutputData &output_data);

ge::Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs);



+ 1
- 46
ge/graph/partition/dynamic_shape_partition.cc View File

@@ -26,7 +26,6 @@
#include <vector>
#include "common/ge/ge_util.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/debug/log.h"
#include "framework/common/types.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/utils/graph_utils.h"
@@ -73,7 +72,7 @@ Status DynamicShapePartitioner::Partition() {
}
REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, true),
"Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str());
REQUIRE_SUCCESS(CtrlEdgeTransfer(), "Failed do ctrl edge transfer!");
DumpGraph("_Before_DSP");
auto status = PartitionImpl();
GELOGD("%s.", DebugString().c_str());
@@ -87,50 +86,6 @@ Status DynamicShapePartitioner::Partition() {
return status;
}

Status DynamicShapePartitioner::CtrlEdgeTransfer() {
GELOGD("Do ctrl edge transfer start!");
GE_CHECK_NOTNULL(root_graph_);

bool is_dynamic_shape = false;
(void)AttrUtils::GetBool(root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape);
if (!is_dynamic_shape) {
return SUCCESS;
}
for (auto &subgraph : root_graph_->GetAllSubgraphs()) {
for (ge::NodePtr &n : subgraph->GetDirectNode()) {
auto op_desc = n->GetOpDesc();
if (op_desc == nullptr) {
continue;
}
auto op_type = op_desc->GetType();
if (op_type == CONSTANT || op_type == CONSTANTOP) {
if (n->GetInAllNodes().empty()) {
GELOGD("[CtrlEdgeTransferPass] node [%s] in nodes is empty", n->GetName().c_str());
continue;
}

GELOGD("start to tranfer ctrl edge for const node [%s]", n->GetName().c_str());

for (auto &in_control_node : n->GetInControlNodes()) {
GE_CHECK_NOTNULL(in_control_node);
GE_CHK_STATUS_RET(ge::GraphUtils::RemoveEdge(in_control_node->GetOutControlAnchor(),
n->GetInControlAnchor()), "remove edge failed");
for (auto &out_node : n->GetOutNodes()) {
if (out_node == nullptr) {
continue;
}
GE_CHK_STATUS_RET(ge::GraphUtils::AddEdge(in_control_node->GetOutControlAnchor(),
out_node->GetInControlAnchor()), "add edge failed.");
}
}
}
}
}

GELOGD("Do ctrl edge transfer end!");
return SUCCESS;
}

Status DynamicShapePartitioner::PartitionImpl() {
REQUIRE_SUCCESS(root_graph_->TopologicalSorting(), "Graph topological sort failed.");
REQUIRE_SUCCESS(InitClusters(), "Failed init cluster nodes.");


+ 0
- 1
ge/graph/partition/dynamic_shape_partition.h View File

@@ -151,7 +151,6 @@ class DynamicShapePartitioner {
Status IsUnknownShapeGraph(ge::ComputeGraphPtr graph, bool &is_unknow);
Status IsUnknownShapeNode(ge::NodePtr node, bool &is_unknow);
bool IsUnknownShapeTensor(const ge::GeTensorDesc &tensor);
Status CtrlEdgeTransfer();
ge::ComputeGraphPtr root_graph_; // The original graph to partition
std::unordered_map<NodePtr, std::shared_ptr<Cluster>> node_2_cluster_; // Record nodes and the cluster it belongs to
// topological sorted clusters, this field will change with the splitting.


+ 1
- 1
ge/graph/passes/transop_breadth_fusion_pass.cc View File

@@ -63,7 +63,7 @@ std::string TransOpBreadthFusionPass::GetNodeId(const int anchor_index, const No
GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, GELOGE(FAILED, "node is null"); return "");
if (node->GetType() == CAST) {
trans_data_type = true;
} else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED || node->GetType() == EXPANDDIMS) {
} else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED) {
trans_format = true;
trans_shape = true;
} else if (node->GetType() == TRANSDATA) {


+ 3
- 3
ge/host_cpu_engine/CMakeLists.txt View File

@@ -8,7 +8,7 @@ set(SRC_LIST
"engine/host_cpu_engine.cc"
"ops_kernel_store/host_cpu_ops_kernel_info.cc"
"ops_kernel_store/op/op_factory.cc"
"ops_kernel_store/op/host_op.cc"
"ops_kernel_store/op/host_op.cc"
)

set(CPU_OPS_KERNEL_LIST
@@ -98,7 +98,7 @@ target_link_libraries(atc_host_cpu_engine PRIVATE

set_target_properties(atc_host_cpu_engine PROPERTIES
OUTPUT_NAME host_cpu_engine
LIBRARY_OUTPUT_DIRECTORY atclib
LIBRARY_OUTPUT_DIRECTORY atclib
)

############ libhost_cpu_opskernel_builder.so ############
@@ -185,7 +185,7 @@ set_target_properties(atc_host_cpu_opskernel_builder PROPERTIES
)

############ libhost_cpu_opskernel_builder.a ############
add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST})
add_library(host_cpu_opskernel_builder_static SHARED ${CPU_OPS_KERNEL_LIST})

target_compile_options(host_cpu_opskernel_builder_static PRIVATE
-Werror


+ 0
- 38
ge/hybrid/executor/hybrid_model_async_executor.cc View File

@@ -353,44 +353,6 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a
return SUCCESS;
}

Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs,
const std::vector<GeTensorDesc> &input_desc,
std::vector<DataBuffer> &outputs,
std::vector<GeTensorDesc> &output_desc) {
GELOGI("Start to execute model.");

HybridModelExecutor::ExecuteArgs args;
args.inputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); ++i) {
TensorValue tensor_value(inputs[i].data, inputs[i].length);
args.inputs[i] = tensor_value;
}
GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model.");
for (const auto &output_tensor_desc : args.output_desc) {
output_desc.emplace_back(*output_tensor_desc);
}

for (size_t i = 0; i < args.outputs.size(); ++i) {
int64_t output_real_size = 0;
ge::graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(output_desc[i], output_real_size);
if (graph_status != GRAPH_SUCCESS) {
GELOGE(FAILED, "Get tensor size in bytes failed.");
return FAILED;
}
if (output_real_size > 0) {
if (outputs[i].length < static_cast<uint64_t>(output_real_size)) {
GELOGE(FAILED, "output idx[%zu], the memory size of output[%lu] given by user should be greater than or equal to the real size of output[%ld]",
i, outputs[i].length, output_real_size);
return FAILED;
}
GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, RT_MEMCPY_DEVICE_TO_DEVICE));
}
outputs[i].length = output_real_size;
}

return SUCCESS;
}

Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) {
GELOGD("Start to execute model.");
// prepare inputs


+ 0
- 5
ge/hybrid/executor/hybrid_model_async_executor.h View File

@@ -35,11 +35,6 @@ class HybridModelAsyncExecutor {

Status Init();

Status Execute(const std::vector<DataBuffer> &inputs,
const std::vector<GeTensorDesc> &input_desc,
std::vector<DataBuffer> &outputs,
std::vector<GeTensorDesc> &output_desc);

Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs);

Status Start(const std::shared_ptr<ModelListener> &listener);


+ 0
- 79
ge/hybrid/hybrid_davinci_model.cc View File

@@ -38,14 +38,6 @@ class HybridDavinciModel::Impl {
return SUCCESS;
}

Status Execute(const std::vector<DataBuffer> &inputs,
const std::vector<GeTensorDesc> &input_desc,
std::vector<DataBuffer> &outputs,
std::vector<GeTensorDesc> &output_desc,
rtStream_t stream) {
return executor_.Execute(inputs, input_desc, outputs, output_desc);
}

Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) {
return executor_.Execute(inputs, outputs);
}
@@ -76,33 +68,6 @@ class HybridDavinciModel::Impl {
executor_.SetDeviceId(device_id);
}

uint64_t GetSessionId() {
return model_.GetSessionId();
}

Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) {
return model_.GetDynamicBatchInfo(batch_info, dynamic_type);
}

void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) {
model_.GetUserDesignateShapeOrder(user_input_shape_order);
}

void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) {
model_.GetModelAttr(dynamic_output_shape_info);
}

Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &input_formats,
std::vector<uint32_t> &output_formats) {
return model_.GetInputOutputDescInfo(input_desc, output_desc, input_formats, output_formats);
}

void SetModelDescVersion(bool is_new_model_desc) {
model_.SetModelDescVersion(is_new_model_desc);
}

private:
std::shared_ptr<ModelListener> listener_;
HybridModel model_;
@@ -130,14 +95,6 @@ Status HybridDavinciModel::Init() {
return impl_->Init();
}

Status HybridDavinciModel::Execute(const std::vector<DataBuffer> &inputs,
const std::vector<GeTensorDesc> &input_desc,
std::vector<DataBuffer> &outputs,
std::vector<GeTensorDesc> &output_desc, rtStream_t stream) {
GE_CHECK_NOTNULL(impl_);
return impl_->Execute(inputs, input_desc, outputs, output_desc, stream);
}

Status HybridDavinciModel::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) {
GE_CHECK_NOTNULL(impl_);
return impl_->Execute(inputs, outputs);
@@ -175,41 +132,5 @@ void HybridDavinciModel::SetDeviceId(uint32_t device_id) {
impl_->SetDeviceId(device_id);
}
}

Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) {
GE_CHECK_NOTNULL(impl_);
return impl_->GetDynamicBatchInfo(batch_info, dynamic_type);
}

void HybridDavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) {
if (impl_ != nullptr) {
impl_->GetUserDesignateShapeOrder(user_input_shape_order);
}
}

void HybridDavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) {
if (impl_ != nullptr) {
impl_->GetModelAttr(dynamic_output_shape_info);
}
}

Status HybridDavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &input_formats,
std::vector<uint32_t> &output_formats) {
GE_CHECK_NOTNULL(impl_);
return impl_->GetInputOutputDescInfo(input_desc, output_desc, input_formats, output_formats);
}

void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) {
if (impl_ != nullptr) {
impl_->SetModelDescVersion(is_new_model_desc);
}
}

uint64_t HybridDavinciModel::GetSessionId() {
GE_CHECK_NOTNULL(impl_);
return impl_->GetSessionId();
}
} // namespace hybrid
} // namespace ge

+ 0
- 21
ge/hybrid/hybrid_davinci_model.h View File

@@ -37,12 +37,6 @@ class HybridDavinciModel {

Status Init();

Status Execute(const std::vector<DataBuffer> &inputs,
const std::vector<GeTensorDesc> &input_desc,
std::vector<DataBuffer> &outputs,
std::vector<GeTensorDesc> &output_desc,
rtStream_t stream);

Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs);

Status ModelRunStart();
@@ -57,21 +51,6 @@ class HybridDavinciModel {

void SetDeviceId(uint32_t device_id);

uint64_t GetSessionId();

Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type);

void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order);

void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info);

Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &input_formats,
std::vector<uint32_t> &output_formats);

void SetModelDescVersion(bool is_new_model_desc);

private:
HybridDavinciModel() = default;
class Impl;


+ 0
- 32
ge/hybrid/hybrid_davinci_model_stub.cc View File

@@ -28,14 +28,6 @@ Status HybridDavinciModel::Init() {
return UNSUPPORTED;
}

Status HybridDavinciModel::Execute(const std::vector<DataBuffer> &inputs,
const std::vector<GeTensorDesc> &input_desc,
std::vector<DataBuffer> &outputs,
std::vector<GeTensorDesc> &output_desc,
rtStream_t stream) {
return UNSUPPORTED;
}

Status HybridDavinciModel::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) {
return UNSUPPORTED;
}
@@ -60,29 +52,5 @@ void HybridDavinciModel::SetModelId(uint32_t model_id) {

void HybridDavinciModel::SetDeviceId(uint32_t device_id) {
}

uint64_t HybridDavinciModel::GetSessionId() {
return 0;
}

Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) {
return UNSUPPORTED;
}

void HybridDavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) {
}

void HybridDavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) {
}

Status HybridDavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &input_formats,
std::vector<uint32_t> &output_formats) {
return UNSUPPORTED;
}

void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) {
}
} // namespace hybrid
} // namespace ge

+ 1
- 214
ge/hybrid/model/hybrid_model.cc View File

@@ -21,18 +21,12 @@
#include "graph/utils/graph_utils.h"
#include "graph/utils/node_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "hybrid/common/npu_memory_allocator.h"
#include "hybrid/model/hybrid_model_builder.h"
#include "hybrid/node_executor/node_executor.h"
#include "common/op/ge_op_utils.h"

namespace ge {
namespace hybrid {
namespace {
const int64_t kMemSizeUnknownShape = -1; // Unknown shape mem size
}

HybridModel::HybridModel(GeRootModelPtr ge_model) : ge_root_model_(std::move(ge_model)) {
}

@@ -134,214 +128,7 @@ const GraphItem *HybridModel::GetSubgraphItem(const ComputeGraphPtr &subgraph) c
}

const string &HybridModel::GetModelName() const {
return model_name_;
}

Status HybridModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) {
// dynamic shape do not need dynamic batch
batch_info = {};
dynamic_type = -1;
return SUCCESS;
}

void HybridModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) {
// dynamic shape do not need dynamic batch
user_input_shape_order = {};
}

void HybridModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) {
dynamic_output_shape_info = {};
}

Status HybridModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &input_formats,
std::vector<uint32_t> &output_formats) {
auto node_item_list = root_graph_item_->GetInputNodes();
if (node_item_list.empty()) {
GELOGE(FAILED, "node item list is empty!");
return FAILED;
}

GE_CHECK_NOTNULL(node_item_list[0]->node);
GE_CHECK_NOTNULL(node_item_list[0]->node->GetOpDesc());
if (node_item_list[0]->node->GetOpDesc()->GetInputsSize() != 1) {
GELOGE(FAILED, "input size of op is not 1!");
return FAILED;
}

GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed");
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed");

return SUCCESS;
}

void HybridModel::SetInputDimsAndShapeRangesInfo(const vector<int64_t> &model_input_dims, std::vector<std::pair<int64_t,int64_t>> &shape_ranges,
Format &format, InputOutputDescInfo &input) {
uint32_t n, c, h, w;
n = format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N;
c = format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C;
h = format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H;
w = format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W;

if (model_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) {
input.shape_info.num = model_input_dims[n];
input.shape_info.height = model_input_dims[h];
input.shape_info.width = model_input_dims[w];
input.shape_info.channel = model_input_dims[c];
}
for (auto model_input_dim : model_input_dims) {
input.shape_info.dims.push_back(model_input_dim);
}
input.shape_info.shape_ranges = shape_ranges;
return;
}

void HybridModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input) {
std::vector<std::pair<int64_t,int64_t>> shape_ranges;
if (is_new_model_desc_ && op_desc->HasAttr(ATTR_NAME_INPUT_DIMS)) {
// When static aipp is set, need to get the model input dims which processed by aipp
vector<int64_t> model_input_dims;
(void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_DIMS, model_input_dims);
SetInputDimsAndShapeRangesInfo(model_input_dims, shape_ranges, format, input);
return;
}
// judge if this data is linked dynamic aipp first, multiply batch has been considered
if (op_desc->HasAttr("_dynamic_aipp_input_dims")) {
vector<int64_t> dynamic_aipp_input_dims;
(void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims);
SetInputDimsAndShapeRangesInfo(dynamic_aipp_input_dims, shape_ranges, format, input);
return;
} else {
vector<int64_t> input_dims = op_desc->GetInputDescPtr(0)->GetShape().GetDims();
op_desc->GetInputDescPtr(0)->GetShapeRange(shape_ranges);
SetInputDimsAndShapeRangesInfo(input_dims, shape_ranges, format, input);
return;
}
}

Status HybridModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) {
auto node_item_list = root_graph_item_->GetInputNodes();
for (auto &node_item : node_item_list) {
InputOutputDescInfo input;

GE_CHECK_NOTNULL(node_item->node);
auto op_desc = node_item->node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0));

Format format = op_desc->GetInputDescPtr(0)->GetFormat();
input.data_type = op_desc->GetInputDescPtr(0)->GetDataType();
input.name = op_desc->GetName();

int64_t input_size = 0;
GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed.");

// support dynamic shape
if (input_size < 0) {
GELOGD("dynamic shape scene, input size is unknown. "
"format=%d, data_type=%d, input_size=%ld",
format, input.data_type, input_size);
input_size = kMemSizeUnknownShape; // -1
}

// not support dynamic shape input for now, so input_size here will be not less than zero.
input.size = input_size;

CreateInputDimsInfo(op_desc, format, input);

formats.push_back(format);
input_desc.push_back(input);
}
is_new_model_desc_ = false;
return SUCCESS;
}

void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output_desc_info, uint32_t &format_result) {
GE_IF_BOOL_EXEC(output_desc == nullptr, GELOGE(FAILED, "output desc ptr is nullptr"); return );
Format format = output_desc->GetFormat();
GeShape shape = output_desc->GetShape();
std::vector<std::pair<int64_t,int64_t>> shape_ranges;
output_desc->GetShapeRange(shape_ranges);
DataType data_type = output_desc->GetDataType();
int64_t dims[] = {1, 1, 1, 1};
format_result = format;
if (format == FORMAT_ND) { // for ND tensor
for (size_t i = 0; i < shape.GetDimNum() && i < (sizeof(dims) / sizeof(dims[0])); i++) {
dims[i] = shape.GetDim(i);
}
} else { // FOR FORMAT_NHWC or FORMAT_NCHW
dims[0] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N); // 0: first dim
dims[1] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C); // 1: second dim
dims[2] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H); // 2: third dim
dims[3] = shape.GetDim(format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W); // 3: forth dim
}
output_desc_info.shape_info.num = dims[0]; // 0: first dim
output_desc_info.shape_info.channel = dims[1]; // 1: second dim
output_desc_info.shape_info.height = dims[2]; // 2: third dim
output_desc_info.shape_info.width = dims[3]; // 3: forth dim
if (format == FORMAT_FRACTAL_Z) { // FraczToHWCK
int64_t k = shape.GetDim(0); // 0: first dim
int64_t c = shape.GetDim(1); // 1: second dim
int64_t h = shape.GetDim(2); // 2: third dim
int64_t w = shape.GetDim(3); // 3: forth dim
output_desc_info.shape_info.dims.push_back(h);
output_desc_info.shape_info.dims.push_back(w);
output_desc_info.shape_info.dims.push_back(c);
output_desc_info.shape_info.dims.push_back(k);
if (shape_ranges.size() == 4) { // 4 dims
output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[2]); // h:2
output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[3]); // w:3
output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[1]); // c:1
output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[0]); // k:0
}
format_result = FORMAT_HWCN;
} else {
for (size_t j = 0; j < shape.GetDimNum(); j++) {
output_desc_info.shape_info.dims.push_back(shape.GetDim(j));
}
output_desc_info.shape_info.shape_ranges = shape_ranges;
}
int64_t tensor_size = 0;
(void)TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size);
output_desc_info.size = static_cast<uint64_t>(tensor_size);
output_desc_info.data_type = output_desc->GetDataType();
}

Status HybridModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats) {
std::vector<ConstGeTensorDescPtr> output_desc_list;
GE_CHK_STATUS_RET(root_graph_item_->GetOutputDescList(output_desc_list), "get output desc info failed"); // output_desc_list contains vaild input desc

vector<std::string> out_node_names;
(void)ge::AttrUtils::GetListStr(ge_root_model_->GetRootGraph(), ATTR_MODEL_OUT_NODES_NAME, out_node_names);

GE_CHECK_NOTNULL(root_graph_item_->GetOutputNode());
auto op_desc = root_graph_item_->GetOutputNode()->op_desc;
GE_CHECK_NOTNULL(op_desc);

auto out_size = static_cast<uint32_t>(op_desc->GetInputsSize());
GE_CHK_BOOL_RET_STATUS(out_size == output_desc_list.size(), FAILED, "output size[%u] not match output_desc_list size[%zu]", out_size, output_desc_list.size());

for (uint32_t index = 0; index < out_size; ++index) {
string output_name;
std::vector<std::string> src_name = op_desc->GetSrcName();
std::vector<int64_t> src_index = op_desc->GetSrcIndex();
if (out_size == out_node_names.size()) {
bool contains_colon = out_node_names[index].find(":") != std::string::npos;
output_name = contains_colon ? out_node_names[index] : out_node_names[index] + ":" + std::to_string(src_index[index]);
} else {
output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]);
}

InputOutputDescInfo output_desc_info;
output_desc_info.name = output_name;

uint32_t format_result;
CreateOutput(output_desc_list[index], output_desc_info, format_result);
output_desc.push_back(output_desc_info);
formats.push_back(format_result);
}
return SUCCESS;
return model_name_;
}
} // namespace hybrid
} // namespace ge

+ 0
- 26
ge/hybrid/model/hybrid_model.h View File

@@ -83,30 +83,6 @@ class HybridModel {

const string &GetModelName() const;

Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type);

void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order);

void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info);

Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &input_formats,
std::vector<uint32_t> &outputFormats);

Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats);

void CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output, uint32_t &format_result);

Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats);

void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input);

void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; }

void SetInputDimsAndShapeRangesInfo(const vector<int64_t> &model_input_dims, std::vector<std::pair<int64_t, int64_t>> &shape_ranges,
Format &format, InputOutputDescInfo &input);

private:
friend class HybridModelBuilder;
friend class HybridModelAsyncExecutor;
@@ -125,8 +101,6 @@ class HybridModel {
std::map<std::string, std::unique_ptr<GraphItem>> subgraph_items_;
std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_;

bool is_new_model_desc_ = false; // support aipp

// runtime fields
uint32_t device_id_ = 0;
uint32_t model_id_ = 0;


+ 0
- 30
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -27,8 +27,6 @@
#include "graph/utils/graph_utils.h"
#include "hybrid/common/npu_memory_allocator.h"
#include "hybrid/node_executor/node_executor.h"
#include "framework/common/debug/ge_log.h"
#include "graph/utils/attr_utils.h"

namespace ge {
namespace hybrid {
@@ -39,30 +37,6 @@ const uint32_t kAlignment = 32;
const int kBytes = 8;
const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown";

Status SetOutputNameAttr(ComputeGraph &graph) {
vector<string> output_names;
for (const auto &node : graph.GetDirectNode()) {
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
continue;
}
auto op_type = op_desc->GetType();
if (op_type == NETOUTPUT) {
for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
NodePtr in_node = peer_out_anchor->GetOwnerNode();
GE_CHECK_NOTNULL(in_node);
output_names.push_back(in_node->GetName());
}
}
}
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&graph, ATTR_MODEL_OUT_NODES_NAME, output_names),
GELOGE(FAILED, "SetListStr of ATTR_MODEL_OUT_NODES_NAME failed.");
return FAILED);
return SUCCESS;
}

int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) {
int64_t var_size = 0;
auto data_type = desc.GetDataType();
@@ -965,10 +939,6 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr

Status HybridModelBuilder::IndexTaskDefs() {
const auto &root_graph = ge_root_model_->GetRootGraph();
if (SetOutputNameAttr(*root_graph) != SUCCESS) {
GELOGW("Set output name attr failed.");
}

for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) {
auto &name = it.first;
auto &ge_model = it.second;


+ 0
- 56
ge/hybrid/node_executor/aicore/aicore_op_task.cc View File

@@ -19,7 +19,6 @@
#include "framework/common/debug/log.h"
#include "hybrid/executor/hybrid_execution_context.h"
#include "hybrid/node_executor/aicore/aicore_task_builder.h"
#include "graph/load/new_model_manager/tbe_handle_store.h"

using optiling::OpRunInfo;

@@ -37,58 +36,6 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def)
return SUCCESS;
}

Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
auto op_desc_ptr = make_shared<OpDesc>(op_desc);
GE_CHECK_NOTNULL(op_desc_ptr);
auto tbe_kernel = op_desc_ptr->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
if (tbe_kernel == nullptr) {
GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str());
return INTERNAL_ERROR;
}
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance();
rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str());
if (rt_ret != RT_ERROR_NONE) {
void *bin_handle = nullptr;
if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) {
GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str());
rtDevBinary_t binary;
std::string json_string;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string),
GELOGI("Get original type of session_graph_id."));
if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU;
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF;
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC;
} else {
GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str());
return PARAM_INVALID;
}
binary.version = 0;
binary.data = tbe_kernel->GetBinData();
binary.length = tbe_kernel->GetBinDataSize();
GELOGI("TBE: binary.length: %lu", binary.length);
GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle));
std::string meta_data;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_METADATA, meta_data),
GELOGI("Get original type of json_string"));
GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str());
GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str())));
kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel);
} else {
GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str());
kernel_store.ReferTBEHandle(stub_name_.c_str());
}
std::string kernel_name;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, op_desc_ptr->GetName() + "_kernelname", kernel_name),
GELOGI("Get original type of kernel_name"));
GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str());
GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0));
}
return SUCCESS;
}

Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
GE_CHK_STATUS_RET(ValidateTaskDef(task_def),
"[%s] Failed to validate task def: [%s]",
@@ -98,9 +45,6 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef
const domi::KernelDef &kernel_def = task_def.kernel();
const domi::KernelContext &context = kernel_def.context();
stub_name_ = kernel_def.stub_func();

GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc));

GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_));
args_size_ = kernel_def.args_size();
block_dim_ = kernel_def.block_dim();


+ 0
- 1
ge/hybrid/node_executor/aicore/aicore_op_task.h View File

@@ -62,7 +62,6 @@ class AiCoreOpTask {
static Status ValidateTaskDef(const domi::TaskDef &task_def);
Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def);
Status InitTilingInfo(const OpDesc &op_desc);
Status RegisterTbeHandle(const OpDesc &op_desc);

std::string stub_name_;
void *stub_func_ = nullptr;


+ 0
- 16
inc/framework/executor/ge_executor.h View File

@@ -234,22 +234,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &input_data,
ge::RunModelData &output_data, bool async_mode = false);

///
/// @ingroup ge
/// @brief Synchronous execution of offline model(Do not create thread)
/// @param [in] uint32_t model_id: Model ID to execute
/// @param [in] void* stream: stream to execute
/// @param [in] bool async_mode: is asynchronize mode.
/// @param [in] const domi::InputData *input_data: Model input data
/// @param [in] const std::vector<GeTensorDesc> &input_desc: description of model input data
/// @param [out] domi::OutputData *output_data: Model output data
/// @param [out] std::vector<GeTensorDesc> &output_desc: description of model output data
/// @return SUCCESS handle successfully / others handle failed
///
ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data,
std::vector<GeTensorDesc> &output_desc, bool async_mode = false);

///
/// @ingroup ge
/// @brief Get weight memory size from model file


Loading…
Cancel
Save