|
|
@@ -54,6 +54,7 @@ const char *const kDeleteCustOp = "deleteCustOp"; |
|
|
const int kTimeSpecNano = 1000000000; |
|
|
const int kTimeSpecNano = 1000000000; |
|
|
const int kTimeSpecMiro = 1000000; |
|
|
const int kTimeSpecMiro = 1000000; |
|
|
const int kSessionMaxBias = 100; |
|
|
const int kSessionMaxBias = 100; |
|
|
|
|
|
const int kOpNameMaxSize = 100; |
|
|
struct CustAicpuSoBuf { |
|
|
struct CustAicpuSoBuf { |
|
|
uint64_t kernelSoBuf; |
|
|
uint64_t kernelSoBuf; |
|
|
uint32_t kernelSoBufLen; |
|
|
uint32_t kernelSoBufLen; |
|
|
@@ -1540,7 +1541,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op |
|
|
std::vector<ReturnCode> res_ret_code_list; |
|
|
std::vector<ReturnCode> res_ret_code_list; |
|
|
|
|
|
|
|
|
if (aicpu_optype_list.empty() && aicpu_tf_optype_list.empty()) { |
|
|
if (aicpu_optype_list.empty() && aicpu_tf_optype_list.empty()) { |
|
|
GELOGI("No need to check aicpu optype."); |
|
|
|
|
|
|
|
|
GELOGI("No need to check aicpu op type."); |
|
|
return SUCCESS; |
|
|
return SUCCESS; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
@@ -1624,7 +1625,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op |
|
|
op_check_info_req.offSetLen = sizeof(SysOpCheckInfo); |
|
|
op_check_info_req.offSetLen = sizeof(SysOpCheckInfo); |
|
|
op_check_info_req.sysOpInfoList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_req_op_list)); |
|
|
op_check_info_req.sysOpInfoList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_req_op_list)); |
|
|
|
|
|
|
|
|
op_check_info_res.opListNum = op_nums; |
|
|
|
|
|
|
|
|
op_check_info_res.opListNum = 0; |
|
|
op_check_info_res.returnCodeList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_ret_code_list)); |
|
|
op_check_info_res.returnCodeList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_ret_code_list)); |
|
|
op_check_info_res.sysOpInfoList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_res_op_list)); |
|
|
op_check_info_res.sysOpInfoList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_res_op_list)); |
|
|
|
|
|
|
|
|
@@ -1636,7 +1637,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op |
|
|
} |
|
|
} |
|
|
allocated_mem.push_back(args); |
|
|
allocated_mem.push_back(args); |
|
|
GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), &op_check_info_req, sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE)); |
|
|
GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), &op_check_info_req, sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE)); |
|
|
GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen), sizeof(SysOpCheckResp), &op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE)); |
|
|
|
|
|
|
|
|
GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint64_t>(args) + op_check_info_req.offSetLen), sizeof(SysOpCheckResp), &op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE)); |
|
|
|
|
|
|
|
|
GE_CHK_RT(rtStreamCreate(&stream, 0)); |
|
|
GE_CHK_RT(rtStreamCreate(&stream, 0)); |
|
|
GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream)); |
|
|
GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream)); |
|
|
@@ -1648,8 +1649,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// Check the response |
|
|
// Check the response |
|
|
void *d_op_check_info_res = reinterpret_cast<void *>(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen); |
|
|
|
|
|
op_check_info_res = {}; |
|
|
|
|
|
|
|
|
void *d_op_check_info_res = reinterpret_cast<void *>(reinterpret_cast<uint64_t>(args) + op_check_info_req.offSetLen); |
|
|
GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_DEVICE_TO_HOST)); |
|
|
GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_DEVICE_TO_HOST)); |
|
|
std::function<void()> callback = [&]() { |
|
|
std::function<void()> callback = [&]() { |
|
|
for (auto mem : allocated_mem) { |
|
|
for (auto mem : allocated_mem) { |
|
|
@@ -1658,25 +1658,40 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op |
|
|
GE_CHK_RT(rtStreamDestroy(stream)); |
|
|
GE_CHK_RT(rtStreamDestroy(stream)); |
|
|
}; |
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
if (op_check_info_res.isWithoutJson) { |
|
|
|
|
|
GELOGI("No need to check aicpu in this scenoria."); |
|
|
|
|
|
GE_MAKE_GUARD(release, callback); |
|
|
|
|
|
return SUCCESS; |
|
|
|
|
|
} |
|
|
uint64_t res_op_nums = op_check_info_res.opListNum; |
|
|
uint64_t res_op_nums = op_check_info_res.opListNum; |
|
|
|
|
|
GELOGI("Check aicpu type, is without json: %d, res op num: %lu.", op_check_info_res.isWithoutJson, res_op_nums); |
|
|
if (res_op_nums != 0) { |
|
|
if (res_op_nums != 0) { |
|
|
|
|
|
res_ret_code_list.clear(); |
|
|
|
|
|
res_ret_code_list.resize(res_op_nums); |
|
|
|
|
|
res_aicpu_op_info_list.clear(); |
|
|
|
|
|
res_aicpu_op_info_list.resize(res_op_nums); |
|
|
GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.returnCodeList)), sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); |
|
|
GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.returnCodeList)), sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); |
|
|
GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.sysOpInfoList)), sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); |
|
|
GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.sysOpInfoList)), sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); |
|
|
if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) { |
|
|
if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) { |
|
|
GELOGE(FAILED, "Number of retcode is not equal to number of op type."); |
|
|
GELOGE(FAILED, "Number of retcode is not equal to number of op type."); |
|
|
GE_MAKE_GUARD(release, callback); |
|
|
GE_MAKE_GUARD(release, callback); |
|
|
|
|
|
return FAILED; |
|
|
} |
|
|
} |
|
|
std::string fail_reason = "Check aicpu op_type failed. details: "; |
|
|
|
|
|
|
|
|
std::string fail_reason; |
|
|
for (uint32_t i = 0; i < res_op_nums; i++) { |
|
|
for (uint32_t i = 0; i < res_op_nums; i++) { |
|
|
ReturnCode ret_code = res_ret_code_list.at(i); |
|
|
ReturnCode ret_code = res_ret_code_list.at(i); |
|
|
SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i); |
|
|
SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i); |
|
|
|
|
|
GELOGI("Not surpport aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, aicpu_info.kernelsType, aicpu_info.opLen, ret_code); |
|
|
std::vector<char> op_name; |
|
|
std::vector<char> op_name; |
|
|
|
|
|
op_name.clear(); |
|
|
|
|
|
op_name.resize(kOpNameMaxSize); |
|
|
GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast<void *>(aicpu_info.opType), aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST)); |
|
|
GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast<void *>(aicpu_info.opType), aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST)); |
|
|
std::string kernel_type = (static_cast<OpKernelType>(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL"; |
|
|
std::string kernel_type = (static_cast<OpKernelType>(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL"; |
|
|
string op_name_str(op_name.data()); |
|
|
string op_name_str(op_name.data()); |
|
|
fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + " " + to_string(static_cast<int>(ret_code)) + "<0: op_type, 1: format, 2: datatype> not support."; |
|
|
|
|
|
|
|
|
fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + " ret code:" + to_string(static_cast<int>(ret_code)) + "<0: op_type, 1: format, 2: datatype> \n"; |
|
|
} |
|
|
} |
|
|
GELOGE(FAILED, "%s", fail_reason.c_str()); |
|
|
|
|
|
|
|
|
fail_reason += "not support."; |
|
|
|
|
|
GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str()); |
|
|
GE_MAKE_GUARD(release, callback); |
|
|
GE_MAKE_GUARD(release, callback); |
|
|
return FAILED; |
|
|
return FAILED; |
|
|
} |
|
|
} |
|
|
|