Browse Source

Update some diff from yellow zone

pull/616/head
taoxiangdong 5 years ago
parent
commit
b1c70681ec
5 changed files with 57 additions and 35 deletions
  1. +29
    -24
      ge/graph/build/model_builder.cc
  2. +3
    -1
      ge/graph/build/model_builder.h
  3. +1
    -1
      ge/graph/load/new_model_manager/davinci_model.cc
  4. +23
    -8
      ge/graph/load/new_model_manager/model_manager.cc
  5. +1
    -1
      ge/graph/load/new_model_manager/model_manager.h

+ 29
- 24
ge/graph/build/model_builder.cc View File

@@ -581,15 +581,11 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
// Add TBE Kernels and custom aicpu op bin // Add TBE Kernels and custom aicpu op bin
std::set<std::string> tbe_name_set; std::set<std::string> tbe_name_set;
std::set<std::string> aicpu_name_set; std::set<std::string> aicpu_name_set;
std::vector<std::string> aicpu_optype_list;
std::vector<std::string> aicpu_tf_optype_list;
std::set<std::string> aicpu_optype_set;
std::set<std::string> aicpu_tf_optype_set;
for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
auto node_op_desc = n->GetOpDesc(); auto node_op_desc = n->GetOpDesc();
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
// check aicpu op type // check aicpu op type
(void)CheckAicpuOp(node_op_desc, aicpu_optype_set, aicpu_tf_optype_set);
(void)CollectCheckAicpuAttr(node_op_desc, aicpu_optype_set, aicpu_tf_optype_set);
TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
if (tbe_kernel == nullptr) { if (tbe_kernel == nullptr) {
std::string kernel_name; std::string kernel_name;
@@ -611,24 +607,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
tbe_kernel_store_.AddTBEKernel(tbe_kernel); tbe_kernel_store_.AddTBEKernel(tbe_kernel);
} }


if (ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list)) {
GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size());
aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
}

if (ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list)) {
GELOGI("Already have aicpu tf optype size: %zu", aicpu_tf_optype_list.size());
aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end());
}

// reset list with set
aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end());
aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end());
GELOGI("Check Aicpu op types ComputeGraph: %s aicpu_optype_set: %zu, aicpu_optype_list: %zu, aicpu_tf_optype_set: %zu, aicpu_tf_optype_list:%zu.",
compute_graph_->GetName().c_str(), aicpu_optype_set.size(), aicpu_optype_list.size(), aicpu_tf_optype_set.size(), aicpu_tf_optype_list.size());
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(model, "needCheckCpu", aicpu_optype_list), return FAILED, "Set attr needCheckCpu fail.");

GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(model, "needCheckTf", aicpu_tf_optype_list), return FAILED, "Set attr needCheckTf fail.");
(void)SetModelAicpuCheckAttr(model);


for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
auto node_op_desc = n->GetOpDesc(); auto node_op_desc = n->GetOpDesc();
@@ -822,7 +801,7 @@ Status ModelBuilder::CompileSingleOp() {
return ge::SUCCESS; return ge::SUCCESS;
} }


Status ModelBuilder::CheckAicpuOp(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set, std::set<std::string> &tf_engine_set) {
Status ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set, std::set<std::string> &tf_engine_set) {
GE_CHECK_NOTNULL(op_desc); GE_CHECK_NOTNULL(op_desc);
std::string aicpu_optype; std::string aicpu_optype;
bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype); bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype);
@@ -840,4 +819,30 @@ Status ModelBuilder::CheckAicpuOp(const OpDescPtr &op_desc, std::set<std::string
return SUCCESS; return SUCCESS;
} }

Status ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model) {
std::vector<std::string> aicpu_optype_list;
std::vector<std::string> aicpu_tf_optype_list;
std::set<std::string> aicpu_optype_set;
std::set<std::string> aicpu_tf_optype_set;
if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) {
GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size());
aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
}

if (ge::AttrUtils::GetListStr(&model, "needCheckTf", aicpu_tf_optype_list)) {
GELOGI("Already have aicpu tf optype size: %zu", aicpu_tf_optype_list.size());
aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end());
}

// reset list with set
aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end());
aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end());
GELOGI("Check Aicpu op types ComputeGraph: %s aicpu_optype_set: %zu, aicpu_optype_list: %zu, aicpu_tf_optype_set: %zu, aicpu_tf_optype_list:%zu.",
compute_graph_->GetName().c_str(), aicpu_optype_set.size(), aicpu_optype_list.size(), aicpu_tf_optype_set.size(), aicpu_tf_optype_list.size());
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return FAILED, "Set attr needCheckCpu fail.");

GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return FAILED, "Set attr needCheckTf fail.");
return SUCCESS;
}
} // namespace ge } // namespace ge

+ 3
- 1
ge/graph/build/model_builder.h View File

@@ -83,7 +83,9 @@ class ModelBuilder {


Status CompileSingleOp(); Status CompileSingleOp();


Status CheckAicpuOp(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set, std::set<std::string> &tf_engine_set);
Status CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set, std::set<std::string> &tf_engine_set);

Status SetModelCheckAicpuAttr(ge::Model &model);


uint64_t session_id_; uint64_t session_id_;




+ 1
- 1
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -485,7 +485,7 @@ Status DavinciModel::DoTaskSink() {


GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");


GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOp(ge_model_), "Check aicpu op type failed.");
GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed.");


GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed.");




+ 23
- 8
ge/graph/load/new_model_manager/model_manager.cc View File

@@ -54,6 +54,7 @@ const char *const kDeleteCustOp = "deleteCustOp";
const int kTimeSpecNano = 1000000000; const int kTimeSpecNano = 1000000000;
const int kTimeSpecMiro = 1000000; const int kTimeSpecMiro = 1000000;
const int kSessionMaxBias = 100; const int kSessionMaxBias = 100;
const int kOpNameMaxSize = 100;
struct CustAicpuSoBuf { struct CustAicpuSoBuf {
uint64_t kernelSoBuf; uint64_t kernelSoBuf;
uint32_t kernelSoBufLen; uint32_t kernelSoBufLen;
@@ -1540,7 +1541,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
std::vector<ReturnCode> res_ret_code_list; std::vector<ReturnCode> res_ret_code_list;


if (aicpu_optype_list.empty() && aicpu_tf_optype_list.empty()) { if (aicpu_optype_list.empty() && aicpu_tf_optype_list.empty()) {
GELOGI("No need to check aicpu optype.");
GELOGI("No need to check aicpu op type.");
return SUCCESS; return SUCCESS;
} }


@@ -1624,7 +1625,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
op_check_info_req.offSetLen = sizeof(SysOpCheckInfo); op_check_info_req.offSetLen = sizeof(SysOpCheckInfo);
op_check_info_req.sysOpInfoList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_req_op_list)); op_check_info_req.sysOpInfoList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_req_op_list));


op_check_info_res.opListNum = op_nums;
op_check_info_res.opListNum = 0;
op_check_info_res.returnCodeList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_ret_code_list)); op_check_info_res.returnCodeList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_ret_code_list));
op_check_info_res.sysOpInfoList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_res_op_list)); op_check_info_res.sysOpInfoList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_res_op_list));


@@ -1636,7 +1637,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
} }
allocated_mem.push_back(args); allocated_mem.push_back(args);
GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), &op_check_info_req, sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE)); GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), &op_check_info_req, sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE));
GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen), sizeof(SysOpCheckResp), &op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE));
GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint64_t>(args) + op_check_info_req.offSetLen), sizeof(SysOpCheckResp), &op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE));


GE_CHK_RT(rtStreamCreate(&stream, 0)); GE_CHK_RT(rtStreamCreate(&stream, 0));
GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream)); GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream));
@@ -1648,8 +1649,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
} }


// Check the response // Check the response
void *d_op_check_info_res = reinterpret_cast<void *>(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen);
op_check_info_res = {};
void *d_op_check_info_res = reinterpret_cast<void *>(reinterpret_cast<uint64_t>(args) + op_check_info_req.offSetLen);
GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_DEVICE_TO_HOST)); GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_DEVICE_TO_HOST));
std::function<void()> callback = [&]() { std::function<void()> callback = [&]() {
for (auto mem : allocated_mem) { for (auto mem : allocated_mem) {
@@ -1658,25 +1658,40 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
GE_CHK_RT(rtStreamDestroy(stream)); GE_CHK_RT(rtStreamDestroy(stream));
}; };


if (op_check_info_res.isWithoutJson) {
GELOGI("No need to check aicpu in this scenoria.");
GE_MAKE_GUARD(release, callback);
return SUCCESS;
}
uint64_t res_op_nums = op_check_info_res.opListNum; uint64_t res_op_nums = op_check_info_res.opListNum;
GELOGI("Check aicpu type, is without json: %d, res op num: %lu.", op_check_info_res.isWithoutJson, res_op_nums);
if (res_op_nums != 0) { if (res_op_nums != 0) {
res_ret_code_list.clear();
res_ret_code_list.resize(res_op_nums);
res_aicpu_op_info_list.clear();
res_aicpu_op_info_list.resize(res_op_nums);
GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.returnCodeList)), sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.returnCodeList)), sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.sysOpInfoList)), sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.sysOpInfoList)), sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) { if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) {
GELOGE(FAILED, "Number of retcode is not equal to number of op type."); GELOGE(FAILED, "Number of retcode is not equal to number of op type.");
GE_MAKE_GUARD(release, callback); GE_MAKE_GUARD(release, callback);
return FAILED;
} }
std::string fail_reason = "Check aicpu op_type failed. details: ";
std::string fail_reason;
for (uint32_t i = 0; i < res_op_nums; i++) { for (uint32_t i = 0; i < res_op_nums; i++) {
ReturnCode ret_code = res_ret_code_list.at(i); ReturnCode ret_code = res_ret_code_list.at(i);
SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i); SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i);
GELOGI("Not surpport aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, aicpu_info.kernelsType, aicpu_info.opLen, ret_code);
std::vector<char> op_name; std::vector<char> op_name;
op_name.clear();
op_name.resize(kOpNameMaxSize);
GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast<void *>(aicpu_info.opType), aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST)); GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast<void *>(aicpu_info.opType), aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST));
std::string kernel_type = (static_cast<OpKernelType>(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL"; std::string kernel_type = (static_cast<OpKernelType>(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL";
string op_name_str(op_name.data()); string op_name_str(op_name.data());
fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + " " + to_string(static_cast<int>(ret_code)) + "<0: op_type, 1: format, 2: datatype> not support.";
fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + " ret code:" + to_string(static_cast<int>(ret_code)) + "<0: op_type, 1: format, 2: datatype> \n";
} }
GELOGE(FAILED, "%s", fail_reason.c_str());
fail_reason += "not support.";
GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str());
GE_MAKE_GUARD(release, callback); GE_MAKE_GUARD(release, callback);
return FAILED; return FAILED;
} }


+ 1
- 1
ge/graph/load/new_model_manager/model_manager.h View File

@@ -297,7 +297,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {


ge::Status LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list, std::vector<std::string> &aicpu_tf_optype_list); ge::Status LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list, std::vector<std::string> &aicpu_tf_optype_list);


ge::Status CheckAicpuOp(GeModelPtr ge_model);
ge::Status CheckAicpuOpList(GeModelPtr ge_model);


ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);




Loading…
Cancel
Save