Browse Source

support dynamic inputs

tags/v1.2.0
chuxing 3 years ago
parent
commit
061df1c781
2 changed files with 57 additions and 41 deletions
  1. +55
    -39
      ge/hybrid/executor/hybrid_model_async_executor.cc
  2. +2
    -2
      ge/hybrid/executor/hybrid_model_async_executor.h

+ 55
- 39
ge/hybrid/executor/hybrid_model_async_executor.cc View File

@@ -94,7 +94,7 @@ Status HybridModelAsyncExecutor::Init() {
executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_));
GE_CHECK_NOTNULL(executor_);
GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine");
GE_CHK_STATUS_RET(InitInputTensors(), "Failed to init input tensors");
GE_CHK_STATUS_RET(InitInputDesc(), "Failed to init input tensors");
return SUCCESS;
}

@@ -198,50 +198,66 @@ Status HybridModelAsyncExecutor::SyncVarData() {
}

Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, HybridModelExecutor::ExecuteArgs &args) {
args.inputs.resize(input_tensors_.size());
if (current_data.blobs.size() < input_tensor_desc_.size()) {
GELOGE(PARAM_INVALID, "Blob size mismatches, expect at least %zu, but got %zu",
input_tensor_desc_.size(), current_data.blobs.size());
return PARAM_INVALID;
}

auto allocator = NpuMemoryAllocator::GetAllocator(device_id_);
GE_CHECK_NOTNULL(allocator);
args.input_desc.resize(input_tensor_desc_.size());
const std::vector<DataBuffer> &blobs = current_data.blobs;
for (const auto &it : input_tensors_) {
auto input_index = it.first;
auto input_tensor = it.second;
auto data_size = input_tensor.GetSize();
GELOGD("To copy input data for input[%u]", input_index);
if (input_index >= blobs.size()) {
GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld",
blobs.size(), model_->input_nodes_.size(), input_index, data_size);
return FAILED;
for (size_t input_index = 0; input_index < input_tensor_desc_.size(); ++input_index) {
auto tensor_size = input_sizes_[input_index];
if (is_input_dynamic_[input_index]) {
if (input_index >= current_data.shapes.size()) {
GELOGE(PARAM_INVALID, "Shape index out of range, index = %zu, shape size = %zu",
input_index, current_data.shapes.size());
return PARAM_INVALID;
}
auto &tensor_desc = input_tensor_desc_[input_index];
tensor_desc->SetShape(GeShape(current_data.shapes[input_index]));
args.input_desc[input_index] = tensor_desc;
GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str());
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size),
"Failed to calc tensor size, index = %zu, shape = [%s]",
input_index,
tensor_desc->GetShape().ToString().c_str());
GELOGD("Input tensor[%zu] size = %zu", input_index, tensor_size);
}

GE_CHECK_GE(tensor_size, 0);
auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size);
GE_CHECK_NOTNULL(tensor_buffer);
args.inputs.emplace_back(std::shared_ptr<TensorBuffer>(tensor_buffer.release()));

GELOGD("To copy input data for input[%u]", input_index);
const DataBuffer &data_buf = blobs[input_index];
auto mem_size = static_cast<uint32_t>(data_size);
auto mem_size = static_cast<uint64_t>(tensor_size);
GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length,
PARAM_INVALID,
"input data size(%lu) does not match model required size(%u), ret failed.",
"input data size(%lu) does not match model required size(%lu), ret failed.",
data_buf.length,
mem_size);

GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%u] datasize[%lu]",
model_->root_runtime_param_.graph_id, input_index, input_tensor.GetData(), mem_size, data_buf.length);
GE_CHK_RT_RET(rtMemcpy(input_tensor.MutableData(),
model_->root_runtime_param_.graph_id,
input_index,
args.inputs[input_index].GetData(),
mem_size,
data_buf.length);
GE_CHK_RT_RET(rtMemcpy(args.inputs[input_index].MutableData(),
mem_size,
data_buf.data,
data_buf.length,
RT_MEMCPY_HOST_TO_DEVICE));
args.inputs[input_index] = input_tensor;
if (is_input_dynamic_[input_index]) {
auto &tensor_desc = input_tensor_desc_[input_index];
tensor_desc->SetShape(GeShape(current_data.shapes[input_index]));
args.input_desc[input_index] = tensor_desc;
GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str());
}
}

return SUCCESS;
}

Status HybridModelAsyncExecutor::InitInputTensors() {
auto allocator = NpuMemoryAllocator::GetAllocator(device_id_);
GE_CHECK_NOTNULL(allocator);
Status HybridModelAsyncExecutor::InitInputDesc() {
int input_index = 0;
for (const auto &input_node : model_->GetRootGraphItem()->GetInputNodes()) {
GELOGD("Init input[%u], node = %s, is_dynamic = %d",
@@ -250,21 +266,21 @@ Status HybridModelAsyncExecutor::InitInputTensors() {
input_node->is_dynamic);
auto output_desc = input_node->MutableOutputDesc(kDataOutputIndex);
GE_CHECK_NOTNULL(output_desc);
int64_t tensor_size = 0;
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size),
"Failed to get size from %s",
input_node->NodeName().c_str());
if (tensor_size == 0) {
GELOGW("[%s] Tensor size == 0", input_node->NodeName().c_str());
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*output_desc, tensor_size),
"Failed to calc tensor size");
GELOGD("[%s] Tensor size updated to %ld", input_node->NodeName().c_str(), tensor_size);
int64_t tensor_size = -1;
if (!input_node->is_dynamic) {
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size),
"Failed to get size from %s",
input_node->NodeName().c_str());

if (tensor_size == 0) {
GELOGW("[%s] Tensor size == 0", input_node->NodeName().c_str());
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*output_desc, tensor_size),
"Failed to calc tensor size");
GELOGD("[%s] Tensor size updated to %ld", input_node->NodeName().c_str(), tensor_size);
}
}
auto buffer = TensorBuffer::Create(allocator, tensor_size);
GE_CHECK_NOTNULL(buffer);
TensorValue tensor(shared_ptr<TensorBuffer>(buffer.release()));
tensor.SetName("Input_" + input_node->NodeName());
input_tensors_.emplace(input_index, tensor);

input_sizes_.emplace(input_index, tensor_size);
input_tensor_desc_.emplace(input_index, output_desc);
is_input_dynamic_.push_back(input_node->is_dynamic);
input_index += 1;


+ 2
- 2
ge/hybrid/executor/hybrid_model_async_executor.h View File

@@ -53,7 +53,7 @@ class HybridModelAsyncExecutor {
Status EnqueueData(const std::shared_ptr<InputDataWrapper> &data);

private:
Status InitInputTensors();
Status InitInputDesc();

Status RunInternal();

@@ -85,7 +85,7 @@ class HybridModelAsyncExecutor {
uint64_t iterator_count_ = 0;

rtStream_t stream_ = nullptr;
std::map<uint32_t, TensorValue> input_tensors_;
std::map<uint32_t, int64_t> input_sizes_;
std::map<uint32_t, GeTensorDescPtr> input_tensor_desc_;
std::vector<bool> is_input_dynamic_;
std::shared_ptr<ModelListener> listener_;


Loading…
Cancel
Save