Browse Source

!1332 ge geloge and report error

Merge pull request !1332 from ldy2021/master
tags/v1.3.0
计晨 Gitee 3 years ago
parent
commit
f5eded92c3
15 changed files with 546 additions and 304 deletions
  1. +15
    -12
      ge/ir_build/atc_ir_common.cc
  2. +4
    -2
      ge/ir_build/attr_options/keep_dtype_option.cc
  3. +6
    -3
      ge/ir_build/attr_options/weight_compress_option.cc
  4. +65
    -60
      ge/offline/main.cc
  5. +42
    -29
      ge/offline/single_op_parser.cc
  6. +77
    -28
      ge/session/inner_session.cc
  7. +80
    -22
      ge/session/session_manager.cc
  8. +41
    -26
      ge/single_op/single_op.cc
  9. +9
    -4
      ge/single_op/single_op_manager.cc
  10. +53
    -19
      ge/single_op/single_op_model.cc
  11. +18
    -11
      ge/single_op/stream_resource.cc
  12. +13
    -9
      ge/single_op/task/aicpu_kernel_task_builder.cc
  13. +14
    -9
      ge/single_op/task/aicpu_task_builder.cc
  14. +57
    -46
      ge/single_op/task/op_task.cc
  15. +52
    -24
      ge/single_op/task/tbe_task_builder.cc

+ 15
- 12
ge/ir_build/atc_ir_common.cc View File

@@ -135,6 +135,7 @@ bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map
if (!input_format.empty() && !ge::TypeUtils::IsFormatValid(input_format.c_str())) {
GELOGE(ge::PARAM_INVALID,
"[Check][DynamicImagesizeInputShape] input_format [%s] invalid, can not support now.", input_format.c_str());
REPORT_INPUT_ERROR("E10414", std::vector<std::string>({"input_format"}), std::vector<std::string>({input_format}));
return false;
}
int32_t size = 0;
@@ -144,8 +145,7 @@ bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map
if (shape.size() != DIM_DEFAULT_SIZE) {
if (std::count(shape.begin(), shape.end(), kDynamicInputDim) > 0) {
ErrorManager::GetInstance().ATCReportErrMessage("E10019");
GELOGE(ge::PARAM_INVALID,
"[Check][DynamicImagesizeInputShape] --input_shape invalid,"
GELOGE(ge::PARAM_INVALID, "[Check][DynamicImagesizeInputShape] --input_shape invalid,"
" only height and width can be -1 when set --dynamic_image_size.");
return false;
}
@@ -164,8 +164,7 @@ bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map
}
if (size == 0) {
ErrorManager::GetInstance().ATCReportErrMessage("E10019");
GELOGE(ge::PARAM_INVALID,
"[Check][DynamicImagesizeInputShape]--input shape invalid, "
GELOGE(ge::PARAM_INVALID, "[Check][DynamicImagesizeInputShape]--input shape invalid, "
"only height and width can be -1 when set --dynamic_image_size.");
return false;
}
@@ -223,7 +222,7 @@ bool CheckDynamicDimsInputShapeValid(const map<string, vector<int64_t>> &shape_m
}

if (!CheckAndParseDynamicDims(dynamic_dim, dynamic_dims)) {
GELOGE(ge::PARAM_INVALID, "[CheckAndParse][DynamicDims]: %s failed.", dynamic_dims.c_str());
GELOGE(ge::PARAM_INVALID, "[CheckAndParse][DynamicDims]failed, %s invalid.", dynamic_dims.c_str());
return false;
}

@@ -344,7 +343,7 @@ bool ParseSingleShapeRange(std::string &shape_range, vector<pair<int64_t, int64_
ErrorManager::GetInstance().ATCReportErrMessage("E10048", {"shape_range", "reason", "sample"},
{shape_range, kInputShapeRangeInvalid, kInputShapeRangeSample3});
GELOGE(PARAM_INVALID,
"Parse input parameter [--input_shape_range]'s shape range[%s] failed,"
"[Parse][InputParameter] [--input_shape_range]'s shape range[%s] failed,"
"reason: %s, correct sample is %s.",
shape_range.c_str(), kInputShapeRangeInvalid, kInputShapeRangeSample3);
return false;
@@ -388,7 +387,7 @@ bool ParseInputShapeRange(const std::string &shape_range,
string shape_range_str = shape_range_pair_vec[1];
vector<pair<int64_t, int64_t>> shape_range_val;
if (!ParseSingleShapeRange(shape_range_str, shape_range_val)) {
GELOGE(PARAM_INVALID, "[Parse][Param] shape_range_str: %s invalid.", shape_range_str.c_str());
GELOGE(PARAM_INVALID, "[Parse][Parameter] shape_range_str: %s invalid.", shape_range_str.c_str());
return false;
}
shape_range_map.emplace(make_pair(StringUtils::Trim(shape_range_pair_vec[0]), shape_range_val));
@@ -405,7 +404,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i
ErrorManager::GetInstance().ATCReportErrMessage("E10009", {"parameter0", "parameter1", "parameter2"},
{"dynamic_batch_size", "dynamic_image_size", "dynamic_dims"});
GELOGE(ge::PARAM_INVALID,
"[Parse][Param]dynamic_batch_size, dynamic_image_size and dynamic_dims can only be set one");
"[Parse][Parameter]dynamic_batch_size, dynamic_image_size and dynamic_dims can only be set one");
return ge::PARAM_INVALID;
}

@@ -413,7 +412,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i
if (!input_shape_range.empty()) {
std::map<string, std::vector<std::pair<int64_t, int64_t>>> shape_range_map;
if (!ParseInputShapeRange(input_shape_range, shape_range_map)) {
GELOGE(ge::PARAM_INVALID, "Failed to parse input shape range: %s", input_shape_range.c_str());
GELOGE(ge::PARAM_INVALID, "[Parse][InputShapeRange] failed, range: %s", input_shape_range.c_str());
return ge::PARAM_INVALID;
}
}
@@ -425,7 +424,8 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i
is_dynamic_input = true;
if (input_shape.empty()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {"input_shape"});
GELOGE(ge::PARAM_INVALID, "[Check][Param]The input_shape can not be empty in dynamic input size scenario.");
GELOGE(ge::PARAM_INVALID,
"[Check][Parameter:input_shape]The input_shape can not be empty in dynamic input size scenario.");
return ge::PARAM_INVALID;
}

@@ -572,7 +572,7 @@ Status CheckCompressWeightParamValid(const std::string enable_compress_weight,
const std::string compress_weight_conf) {
if ((!compress_weight_conf.empty()) &&
(!CheckInputPathValid(compress_weight_conf, "--compress_weight_conf"))) {
GELOGE(ge::PARAM_INVALID, "[Check][CompressWeight]compress weight config file not found, file_name:%s",
GELOGE(ge::PARAM_INVALID, "[Check][InputPath]compress weight config file not found, file_name:%s",
compress_weight_conf.c_str());
return ge::PARAM_INVALID;
}
@@ -580,7 +580,7 @@ Status CheckCompressWeightParamValid(const std::string enable_compress_weight,
ErrorManager::GetInstance().ATCReportErrMessage(
"E10005", {"parameter", "value"}, {"enable_compress_weight", enable_compress_weight});
GELOGE(ge::PARAM_INVALID,
"[Check][CompressWeight]Input parameter[--enable_compress_weight]'s value[%s] must be true or false.",
"[Check][Param:enable_compress_weight]Input parameter[--enable_compress_weight]'s value:%s must be true or false.",
enable_compress_weight.c_str());
return ge::PARAM_INVALID;
}
@@ -623,10 +623,13 @@ int CheckLogParamValidAndSetLogLevel(const std::string log) {
} else {
GELOGE(ge::PARAM_INVALID,
"[Check][LogParam]log:%s invalid, only support debug, info, warning, error, null", log.c_str());
REPORT_INPUT_ERROR("E10417", std::vector<std::string>({"loglevel"}), std::vector<std::string>({log}));
return ret;
}
if (ret != 0) {
GELOGE(ge::PARAM_INVALID, "[Set][LogLevel] fail, level:%s.",log.c_str());
REPORT_INPUT_ERROR("E10417", std::vector<std::string>({"loglevel"}), std::vector<std::string>({log}));

}
return ret;
}


+ 4
- 2
ge/ir_build/attr_options/keep_dtype_option.cc View File

@@ -60,12 +60,14 @@ graphStatus KeepDtypeFunc(ComputeGraphPtr &graph, const std::string &cfg_path) {
}
std::string real_path = RealPath(cfg_path.c_str());
if (real_path.empty()) {
GELOGE(GRAPH_PARAM_INVALID, "Can not get real path for %s.", cfg_path.c_str());
GELOGE(GRAPH_PARAM_INVALID, "[Get][Path]Can not get real path for %s.", cfg_path.c_str());
REPORT_INPUT_ERROR("E10410", std::vector<std::string>({"cfgpath"}), std::vector<std::string>({cfg_path}));
return GRAPH_PARAM_INVALID;
}
std::ifstream ifs(real_path);
if (!ifs.is_open()) {
GELOGE(GRAPH_FAILED, "Open file %s failed", cfg_path.c_str());
GELOGE(GRAPH_FAILED, "[Open][File] %s failed.", cfg_path.c_str());
REPORT_INNER_ERROR("E19999", "open file:%s failed.", cfg_path.c_str());
return GRAPH_FAILED;
}


+ 6
- 3
ge/ir_build/attr_options/weight_compress_option.cc View File

@@ -30,12 +30,14 @@ graphStatus WeightCompressFunc(ComputeGraphPtr &graph, const string &cfg_path) {
}
std::string real_path = RealPath(cfg_path.c_str());
if (real_path.empty()) {
GELOGE(GRAPH_PARAM_INVALID, "Can not get real path for %s.", cfg_path.c_str());
GELOGE(GRAPH_PARAM_INVALID, "[Get][Path]Can not get real path for %s.", cfg_path.c_str());
REPORT_INPUT_ERROR("E10410", std::vector<std::string>({"cfgpath"}), std::vector<std::string>({cfg_path}));
return GRAPH_PARAM_INVALID;
}
std::ifstream ifs(real_path);
if (!ifs.is_open()) {
GELOGE(GRAPH_FAILED, "Open file %s failed", cfg_path.c_str());
GELOGE(GRAPH_FAILED, "[Open][File] %s failed", cfg_path.c_str());
REPORT_INNER_ERROR("E19999", "open file:%s failed.", cfg_path.c_str());
return GRAPH_FAILED;
}
@@ -55,7 +57,8 @@ graphStatus WeightCompressFunc(ComputeGraphPtr &graph, const string &cfg_path) {
if ((op_desc->GetName() == compress_node_vec[i]) || IsOriginalOpFind(op_desc, compress_node_vec[i])) {
is_find = true;
if (!ge::AttrUtils::SetBool(op_desc, ge::ATTR_NAME_COMPRESS_WEIGHT, true)) {
GELOGE(GRAPH_FAILED, "node %s SetBool failed.", compress_node_vec[i].c_str());
GELOGE(GRAPH_FAILED, "[Set][Bool] failed, node:%s.", compress_node_vec[i].c_str());
REPORT_CALL_ERROR("E19999", "SetBool failed, node:%s.", compress_node_vec[i].c_str());
return GRAPH_FAILED;
}
}


+ 65
- 60
ge/offline/main.cc View File

@@ -341,10 +341,10 @@ class GFlagUtils {
static Status CheckDumpInfershapeJsonFlags() {
Status ret = CheckFrameWorkValid(FLAGS_framework, FLAGS_weight);
GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED,
"check custom aicpu run so failed!");
"[Check][Param:FrameWork]%d value is invalid.", FLAGS_framework);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
FLAGS_weight != "" && !ge::CheckInputPathValid(FLAGS_weight, "--weight"),
return domi::FAILED, "Input parameter[--weight]'s value[%s] is invalid!",
return domi::FAILED, "[Check][Param:weight]value:%s: is invalid, path can not reach.",
FLAGS_weight.c_str());
return domi::SUCCESS;
}
@@ -355,34 +355,34 @@ class GFlagUtils {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
FLAGS_model == "",
ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {"model"});
ret = ge::FAILED, "Input parameter[--model]'s value is empty!");
ret = ge::FAILED, "[Check][Param]Input parameter[--model]'s value is empty!");

// check param disable_reuse_memory
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
ge::CheckDisableReuseMemoryParamValid(to_string(FLAGS_disable_reuse_memory)) != ge::SUCCESS,
ret = ge::FAILED, "check disable_reuse_memory failed!");
ret = ge::FAILED, "[Check][DisableReuseMemory]failed!");

// check optypelist_for_implmode and op_select_implmode
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
ge::CheckImplmodeParamValid(FLAGS_optypelist_for_implmode,
FLAGS_op_select_implmode) != ge::SUCCESS,
ret = ge::FAILED, "check optypelist_for_implmode and op_select_implmode failed!");
ret = ge::FAILED, "[Check][ImplMode]check optypelist_for_implmode and op_select_implmode failed!");
// No output file information passed in
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
FLAGS_mode == GEN_OM_MODEL && FLAGS_output == "",
ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {"output"});
ret = ge::FAILED, "Input parameter[--output]'s value is empty!");
ret = ge::FAILED, "[Check][Param]Input parameter[--output]'s value is empty!");

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
CheckFrameWorkValid(FLAGS_framework, FLAGS_weight) != ge::SUCCESS,
ret = ge::FAILED,
"CheckFrameWorkValid failed");
"[Check][FrameWork] failed for input --FLAGS_framework and --FLAGS_weight invalid.");

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
ge::CheckDynamicInputParamValid(FLAGS_dynamic_batch_size, FLAGS_dynamic_image_size,
FLAGS_dynamic_dims, FLAGS_input_shape, FLAGS_input_shape_range,
FLAGS_input_format, is_dynamic_input) != ge::SUCCESS,
ret = ge::FAILED, "check dynamic size(batch size, image size or dims) failed!");
ret = ge::FAILED, "[Check][DynamicInput]dynamic size(batch size, image size or dims) invalid!");

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
!FLAGS_insert_op_conf.empty() && !FLAGS_dynamic_dims.empty(),
@@ -390,26 +390,26 @@ class GFlagUtils {
{"parameter", "value", "reason"},
{"--insert_op_conf", FLAGS_insert_op_conf,
"dynamic dims function does not support aipp"});
ret = ge::FAILED, "dynamic dims function does not support aipp");
ret = ge::FAILED, "[Check][Param]dynamic dims function does not support aipp");

#if !defined(__ANDROID__) && !defined(ANDROID)
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!CheckEncryptModeValid(FLAGS_encrypt_mode), ret = ge::FAILED,
"encrypt_mode %d not valid!!", FLAGS_encrypt_mode);
"[Check][EncryptMode]value %d not valid!!", FLAGS_encrypt_mode);

if (FLAGS_encrypt_mode == 0) { // Encryption mode
GELOGI("ge will run with encrypt!");

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckInputPathValid(FLAGS_encrypt_key), ret = ge::FAILED,
"encrypt_key file not found!!");
"[Check][InputPath]encrypt_key file not found!!");

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckInputPathValid(FLAGS_certificate), ret = ge::FAILED,
"certificate file not found!!");
"[Check][InputPath]certificate file not found!!");

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckInputPathValid(FLAGS_hardware_key), ret = ge::FAILED,
"hardware_key file not found!!");
"[Check][InputPath]hardware_key file not found!!");

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckInputPathValid(FLAGS_private_key), ret = ge::FAILED,
"private_key file not found!!");
"[Check][InputPath]private_key file not found!!");
} else { // No encryption
GELOGI("ge will run without encrypt!");
}
@@ -420,41 +420,41 @@ class GFlagUtils {
*/
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
FLAGS_model != "" && !ge::CheckInputPathValid(FLAGS_model, "--model"), ret = ge::FAILED,
"model file %s not found!!", FLAGS_model.c_str());
"[Check][InputPath]model file %s not found!!", FLAGS_model.c_str());

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
FLAGS_weight != "" && !ge::CheckInputPathValid(FLAGS_weight, "--weight"),
ret = ge::FAILED, "weight file %s not found!!",
ret = ge::FAILED, "[Check][InputPath]weight file %s not found!!",
FLAGS_weight.c_str());

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
FLAGS_cal_conf != "" && !ge::CheckInputPathValid(FLAGS_cal_conf, "--cal_conf"),
ret = ge::FAILED, "calibration config file %s not found!!",
ret = ge::FAILED, "[Check][InputPath]calibration config file %s not found!!",
FLAGS_cal_conf.c_str());

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
FLAGS_op_name_map != "" && !ge::CheckInputPathValid(FLAGS_op_name_map, "--op_name_map"),
ret = ge::FAILED, "op config file %s not found!!",
ret = ge::FAILED, "[Check][InputPath]op config file %s not found!!",
FLAGS_op_name_map.c_str());

GE_CHK_BOOL_EXEC(ge::CheckInsertOpConfParamValid(std::string(FLAGS_insert_op_conf)) == ge::SUCCESS,
ret = ge::FAILED, "check insert op conf failed!");
ret = ge::FAILED, "[Check][InsertOpConf]failed!");

GE_CHK_BOOL_EXEC(ge::CheckCompressWeightParamValid(
FLAGS_enable_compress_weight, FLAGS_compress_weight_conf) == ge::SUCCESS,
ret = ge::FAILED, "check compress weight failed!");
ret = ge::FAILED, "[Check][CompressWeight]failed!");

GE_CHK_BOOL_EXEC(ge::CheckKeepTypeParamValid(FLAGS_keep_dtype) == ge::SUCCESS,
ret = ge::FAILED, "check keep dtype failed!");
ret = ge::FAILED, "[Check][KeepType]failed!");

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
!ge::CheckOutputPathValid(FLAGS_check_report, "--check_report"), ret = ge::FAILED,
"check_report file %s not found!!", FLAGS_check_report.c_str());
"[Check][OutputPath]]check_report file %s not found!!", FLAGS_check_report.c_str());

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
FLAGS_mode == GEN_OM_MODEL && FLAGS_output != "" &&
(!ge::CheckOutputPathValid(FLAGS_output, "--output") || !CheckPathWithName(FLAGS_output)),
ret = ge::FAILED, "output path %s is not valid!!", FLAGS_output.c_str());
ret = ge::FAILED, "[Check][OutputPath]output path %s is not valid!!", FLAGS_output.c_str());

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
FLAGS_save_original_model != "" &&
@@ -463,18 +463,18 @@ class GFlagUtils {
ErrorManager::GetInstance().ATCReportErrMessage(
"E10005", {"parameter", "value"}, {"save_original_model", FLAGS_save_original_model});
ret = ge::FAILED,
"Input parameter[--save_original_model]'s value[%s] must be true or false.",
"[Check][Parameter]Input parameter[--save_original_model]'s value[%s] must be true or false.",
FLAGS_save_original_model.c_str());
GE_CHK_BOOL_EXEC(ge::CheckBufferOptimizeParamValid(FLAGS_buffer_optimize) == ge::SUCCESS,
ret = ge::FAILED, "check output type failed!");
ret = ge::FAILED, "[Check][BufferOptimize]check output type failed!");

GE_CHK_BOOL_EXEC(
ge::CheckEnableSingleStreamParamValid(std::string(FLAGS_enable_single_stream)) == ge::SUCCESS,
ret = ge::FAILED, "check enable single stream failed!");
ret = ge::FAILED, "[Check][EnableSingleStream]failed!");

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((FLAGS_display_model_info != "0") && (FLAGS_display_model_info != "1"),
ErrorManager::GetInstance().ATCReportErrMessage("E10006", {"parameter"}, {"display_model_info"});
ret = ge::FAILED, "Input parameter[--display_model_info]'s value must be 1 or 0.");
ret = ge::FAILED, "[Check][Parameter]Input parameter[--display_model_info]'s value must be 1 or 0.");

return ret;
}
@@ -491,25 +491,25 @@ class GFlagUtils {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(FLAGS_om == "",
ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {"om"});
ret = ge::FAILED,
"Input parameter[--om]'s value is empty!!");
"[Check][Parameter]Input parameter[--om]'s value is empty!!");

// JSON path not passed in
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(FLAGS_json == "",
ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {"json"});
ret = ge::FAILED,
"Input parameter[--json]'s value is empty!!");
"[Check][Parameter]Input parameter[--json]'s value is empty!!");

// Check if the model path is valid
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
FLAGS_om != "" && !ge::CheckInputPathValid(FLAGS_om, "--om"),
ret = ge::FAILED,
"model file path is invalid: %s.", FLAGS_om.c_str());
"[Check][InputPath]model file path is invalid: %s.", FLAGS_om.c_str());

// Check whether the JSON path is valid
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
FLAGS_json != "" && !ge::CheckOutputPathValid(FLAGS_json, "--json"),
ret = ge::FAILED,
"json file path is invalid: %s.", FLAGS_json.c_str());
"[Check][OutputPath]json file path is invalid: %s.", FLAGS_json.c_str());

return ret;
}
@@ -574,7 +574,8 @@ class GFlagUtils {
if (fileName.size() > static_cast<int>(PATH_MAX)) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E10021", {"parameter", "size"}, {"output", std::to_string(PATH_MAX)});
GELOGE(ge::FAILED, "Input parameter[--output]'s path is too long, it must be less than %d", PATH_MAX);
GELOGE(ge::FAILED,
"[Check][Path]Input parameter[--output]'s path is too long, it must be less than %d", PATH_MAX);
return false;
}

@@ -632,8 +633,8 @@ static bool CheckInputFormat() {
// only support NCHW ND
ErrorManager::GetInstance().ATCReportErrMessage(
"E10001", {"parameter", "value", "reason"}, {"--input_format", FLAGS_input_format, kCaffeFormatSupport});
GELOGE(ge::FAILED,
"Invalid value for --input_format[%s], %s.", FLAGS_input_format.c_str(), kCaffeFormatSupport);
GELOGE(ge::FAILED, "[Check][InputFormat]Invalid value for --input_format[%s], %s.",
FLAGS_input_format.c_str(), kCaffeFormatSupport);
return false;
} else if ((FLAGS_framework == static_cast<int32_t>(domi::TENSORFLOW))) { // tf
if (ge::tf_support_input_format.find(FLAGS_input_format) != ge::tf_support_input_format.end()) {
@@ -642,8 +643,8 @@ static bool CheckInputFormat() {
// only support NCHW NHWC ND NCDHW NDHWC
ErrorManager::GetInstance().ATCReportErrMessage(
"E10001", {"parameter", "value", "reason"}, {"--input_format", FLAGS_input_format, kTFFormatSupport});
GELOGE(ge::FAILED,
"Invalid value for --input_format[%s], %s.", FLAGS_input_format.c_str(), kTFFormatSupport);
GELOGE(ge::FAILED, "[Check][InputFormat]Invalid value for --input_format[%s], %s.",
FLAGS_input_format.c_str(), kTFFormatSupport);
return false;
} else if (FLAGS_framework == static_cast<int32_t>(domi::ONNX)) {
if (ge::onnx_support_input_format.find(FLAGS_input_format) != ge::onnx_support_input_format.end()) {
@@ -652,8 +653,8 @@ static bool CheckInputFormat() {
// only support NCHW ND
ErrorManager::GetInstance().ATCReportErrMessage(
"E10001", {"parameter", "value", "reason"}, {"--input_format", FLAGS_input_format, kONNXFormatSupport});
GELOGE(ge::FAILED,
"Invalid value for --input_format[%s], %s.", FLAGS_input_format.c_str(), kONNXFormatSupport);
GELOGE(ge::FAILED, "[Check][InputFormat]Invalid value for --input_format[%s], %s.",
FLAGS_input_format.c_str(), kONNXFormatSupport);
return false;
}
return true;
@@ -846,11 +847,11 @@ Status CreateInputsForInference(const ge::Graph &graph, vector<ge::GeTensor> &in

domi::Status GenerateInfershapeJson() {
if (!CheckInputFormat()) {
GELOGE(ge::FAILED, "Check input_format failed");
GELOGE(ge::FAILED, "[Check][InputFormat] failed.");
return domi::FAILED;
}
Status ret = GFlagUtils::CheckDumpInfershapeJsonFlags();
GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "Check flags failed!");
GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "[Check][DumpInfershapeJsonFlags] failed!");

ge::GeGenerator ge_generator;
std::map<string, string> options;
@@ -897,13 +898,14 @@ static Status ConvertModelToJson(int fwk_type, const string &model_file, const s
ErrorManager::GetInstance().ATCReportErrMessage(
"E10001", {"parameter", "value", "reason"},
{"--framework", std::to_string(fwk_type), kModelToJsonSupport});
GELOGE(ge::FAILED, "Invalid value for --framework[%d], %s.", fwk_type, kModelToJsonSupport);
GELOGE(ge::FAILED, "[Convert][ModelToJson]Invalid value for --framework[%d], %s.",
fwk_type, kModelToJsonSupport);
ret = ge::FAILED;
}

if (FLAGS_dump_mode != "0" && FLAGS_dump_mode != "1") {
ErrorManager::GetInstance().ATCReportErrMessage("E10006", {"parameter"}, {"dump_mode"});
GELOGE(ge::FAILED, "Input parameter[--dump_mode]'s value must be 1 or 0.");
GELOGE(ge::FAILED, "[Convert][ModelToJson] Input parameter[--dump_mode]'s value must be 1 or 0.");
ret = ge::FAILED;
}

@@ -978,12 +980,13 @@ domi::Status GenerateModel(std::map<string, string> &options, std::string output
graph = load_model.GetGraph();

GE_CHK_STATUS_EXEC(ge::InitDomiOmgContext(FLAGS_input_shape, FLAGS_input_format, "", is_dynamic_input),
GELOGE(ge::FAILED, "ATC Generate call InitDomiOmgContext ret fail");
GELOGE(ge::FAILED, "[Init][DomiOmgContext]ATC Generate call InitDomiOmgContext ret fail");
(void)ge_generator.Finalize(); (void)ge::GELib::GetInstance()->Finalize(); return domi::FAILED);

Status ret = CreateInputsForInference(graph, inputs);
if (ret != ge::SUCCESS) {
GELOGE(ge::FAILED, "create inputs for inference failed.");
GELOGE(ge::FAILED, "[Create][InputsForInference] failed.");
REPORT_CALL_ERROR("E19999", "CreateInputsForInference failed for input --graph and --inputs.");
(void)ge_generator.Finalize();
(void)ge::GELib::GetInstance()->Finalize();
return domi::FAILED;
@@ -1085,7 +1088,7 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) {
// check optypelist_for_implmode and op_select_implmode
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
ge::CheckImplmodeParamValid(FLAGS_optypelist_for_implmode, FLAGS_op_select_implmode) != ge::SUCCESS,
return ge::FAILED, "check optypelist_for_implmode and op_select_implmode failed!");
return ge::FAILED, "[Check][ImplmodeParam] fail for input optypelist_for_implmode and op_select_implmode.");

std::map<string, string> options;
// need to be changed when ge.ini plan is done
@@ -1138,12 +1141,12 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) {

domi::Status GenerateOmModel() {
if (!CheckInputFormat()) {
GELOGE(ge::FAILED, "Check input_format failed");
GELOGE(ge::FAILED, "[Check][InputFormat]failed.");
return domi::FAILED;
}
Status ret = GFlagUtils::CheckFlags();
GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED,
"Check flags failed! Please check whether some atc params that include semicolons[;] use double "
"[Check][Flags] failed! Please check whether some atc params that include semicolons[;] use double "
"quotation marks (\") to enclose each argument such as out_nodes, input_shape, dynamic_image_size");
#if !defined(__ANDROID__) && !defined(ANDROID)
// Load custom operator Library
@@ -1151,7 +1154,7 @@ domi::Status GenerateOmModel() {

SaveCustomCaffeProtoPath();

GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "check custom aicpu run so failed!");
GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "[Check][Flags]check custom aicpu run so failed!");
#endif

const int f_stream_num = 1;
@@ -1250,7 +1253,7 @@ domi::Status GenerateOmModel() {
domi::Status ConvertModelToJson() {
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther);
Status ret = GFlagUtils::CheckConverJsonParamFlags();
GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "Check convert json params flags failed!");
GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "[CheckConver][JsonParamFlags] failed!");

ret = ConvertModelToJson(FLAGS_framework, FLAGS_om, FLAGS_json);

@@ -1264,13 +1267,13 @@ domi::Status DisplayModelInfo() {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(FLAGS_om == "",
ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {"om"});
return ge::FAILED,
"Input parameter[--om]'s value is empty!!");
"[Check][Parameter]Input parameter[--om]'s value is empty!!");

// Check if the model path is valid
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
FLAGS_om != "" && !ge::CheckInputPathValid(FLAGS_om, "--om"),
return ge::FAILED,
"model file path is invalid: %s.", FLAGS_om.c_str());
"[Check][InputPath]model file path is invalid: %s.", FLAGS_om.c_str());

if (FLAGS_framework == -1) {
return ge::ConvertOm(FLAGS_om.c_str(), "", false);
@@ -1311,13 +1314,15 @@ domi::Status ConvertPbtxtToJson() {
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther);
Status ret = GFlagUtils::CheckConverJsonParamFlags();
if (ret != domi::SUCCESS) {
GELOGE(ge::FAILED, "Check convert json params flags failed!");
GELOGE(ge::FAILED, "[CheckConver][JsonParamFlags] failed!");
return domi::FAILED;
}

ret = ge::ConvertPbtxtToJson(FLAGS_om.c_str(), FLAGS_json.c_str());
if (ret != domi::SUCCESS) {
GELOGE(ge::FAILED, "ConvertPbtxtToJson fail.");
GELOGE(ge::FAILED, "[Convert][PbtxtToJson] fail.");
REPORT_CALL_ERROR("E19999", "ConvertPbtxtToJson failed, FLAGS_om:%s, FLAGS_json:%s.",
FLAGS_om.c_str(), FLAGS_json.c_str());
return domi::FAILED;
}

@@ -1386,8 +1391,8 @@ bool CheckMemInfo() {
GELOGI("Get mem available [%lu kB].", current_mem_available);
std::cout << "Current available mem is " << current_mem_available << "kB." << std::endl;
if ((current_mem_available > 0) && (current_mem_available < kMinAvailableMem)) {
GELOGE(ge::PARAM_INVALID, "Current available mem [%lu kB] can not be smaller than [%lu kB] .",
current_mem_available, kMinAvailableMem);
GELOGE(ge::PARAM_INVALID, "[Check][MemSize]Current available mem [%lu kB] can not be smaller than [%lu kB] .",
current_mem_available, kMinAvailableMem);
ErrorManager::GetInstance().ATCReportErrMessage("E10044", {"value", "min_value"},
{to_string(current_mem_available), to_string(kMinAvailableMem)});
return false;
@@ -1407,7 +1412,7 @@ int main(int argc, char* argv[]) {
}
do {
if (!CheckMemInfo()) {
GELOGE(ge::PARAM_INVALID, "Current available mem is too small");
GELOGE(ge::PARAM_INVALID, "[Check][MemInfo]Current available mem is too small.");
ret = domi::FAILED;
break;
}
@@ -1421,17 +1426,17 @@ int main(int argc, char* argv[]) {
GE_IF_BOOL_EXEC(GenerateOmModel() != domi::SUCCESS, ret = domi::FAILED; break);
} else if (MODEL_TO_JSON == FLAGS_mode) { // Mode 1, transfer model to JSON
GE_CHK_BOOL_EXEC(ConvertModelToJson() == domi::SUCCESS, ret = domi::FAILED;
break, "ATC ConvertJson execute failed!!");
break, "[Convert][ModelToJson]ATC ConvertJson execute failed!!");
} else if (FLAGS_mode == ge::RunMode::PBTXT_TO_JSON) {
GE_CHK_BOOL_EXEC(ConvertPbtxtToJson() == domi::SUCCESS, ret = domi::FAILED;
break, "ATC convert pbtxt to json execute failed!!");
break, "[Convert][PbtxtToJson]ATC convert pbtxt to json execute failed!!");
} else if (FLAGS_mode == ge::RunMode::DISPLAY_OM_INFO) {
GE_CHK_BOOL_EXEC(DisplayModelInfo() == domi::SUCCESS, ret = domi::FAILED;
break, "ATC DisplayModelInfo failed!!");
break, "[Display][ModelInfo]ATC DisplayModelInfo failed!!");
} else {
ErrorManager::GetInstance().ATCReportErrMessage(
"E10001", {"parameter", "value", "reason"}, {"--mode", std::to_string(FLAGS_mode), kModeSupport});
GELOGE(ge::PARAM_INVALID, "Invalid value for --mode[%d], %s.", FLAGS_mode, kModeSupport);
GELOGE(ge::PARAM_INVALID, "[Check][Parameter]Invalid value for --mode[%d], %s.", FLAGS_mode, kModeSupport);
ret = domi::FAILED;
break;
}


+ 42
- 29
ge/offline/single_op_parser.cc View File

@@ -217,7 +217,10 @@ void from_json(const Json &j, SingleOpAttr &attr) {
attr.type = j.at(kKeyType).get<string>();
auto it = kAttrTypeDict.find(attr.type);
if (it == kAttrTypeDict.end()) {
GELOGE(UNSUPPORTED, "Parse attr[%s] failed. Unsupported type: %s", attr.name.c_str(), attr.type.c_str());
GELOGE(UNSUPPORTED, "[Find][JsonAttr] name=%s, type=%s failed for Unsupported type.",
attr.name.c_str(), attr.type.c_str());
REPORT_INNER_ERROR("E19999", "Find jsonattr name=%s, type=%s failed for Unsupported type.",
attr.name.c_str(), attr.type.c_str());
return;
}

@@ -253,7 +256,10 @@ void from_json(const Json &j, SingleOpAttr &attr) {
SetAttrValue<DataType>(j, attr);
break;
default:
GELOGE(UNSUPPORTED, "Parse attr[%s] failed. Unsupported type: %s", attr.name.c_str(), attr.type.c_str());
GELOGE(UNSUPPORTED, "[Find][JsonAttr] name=%s, type=%s failed for Unsupported type.",
attr.name.c_str(), attr.type.c_str());
REPORT_INNER_ERROR("E19999", "Find jsonattr name=%s, type=%s failed for Unsupported type.",
attr.name.c_str(), attr.type.c_str());
break;
}
}
@@ -281,22 +287,23 @@ Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) {
std::string real_path = RealPath(file.c_str());
if (real_path.empty()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10023", {"value"}, {file});
GELOGE(FAILED, "Input parameter[--singleop]'s value[%s] is not a valid path.", file.c_str());
GELOGE(FAILED, "[Read][JsonFile]Input parameter[--singleop]'s value[%s] is not a valid path.", file.c_str());
return INTERNAL_ERROR;
}

std::ifstream ifs(real_path);
if (!ifs.is_open()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10024", {"value"}, {file});
GELOGE(FAILED, "Open file[%s] provided in input parameter[--singleop] failed.", file.c_str());
GELOGE(FAILED, "[Open][JsonFile] failed for file[%s] provided in input parameter[--singleop].", file.c_str());
return FAILED;
}
try {
ifs >> json_obj;
} catch (const std::exception &e) {
ErrorManager::GetInstance().ATCReportErrMessage("E10025", {"realpath", "errmsg"}, {real_path, e.what()});
GELOGE(PARAM_INVALID, "Parse file[%s] provided in input parameter[--singleop] failed, exception = %s.",
real_path.c_str(), e.what());
GELOGE(PARAM_INVALID,
"[Parse][JsonFile] fail for file[%s] provided in input parameter[--singleop], exception = %s.",
real_path.c_str(), e.what());
return PARAM_INVALID;
}

@@ -307,7 +314,7 @@ Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) {
bool SingleOpParser::Validate(const SingleOpDesc &op_desc) {
if (op_desc.op.empty()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10026");
GELOGE(PARAM_INVALID, "Op name is empty");
GELOGE(PARAM_INVALID, "[Check][Param] fail for name of input SingleOpDesc is empty.");
return false;
}

@@ -316,14 +323,15 @@ bool SingleOpParser::Validate(const SingleOpDesc &op_desc) {
if (!tensor_desc.GetValidFlag()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"},
{"intput", "datatype or format", std::to_string(index)});
GELOGE(PARAM_INVALID, "Input's dataType or format is invalid when the index is %d", index);
GELOGE(PARAM_INVALID,
"[Check][Param] fail for Input's dataType or format is invalid when the index is %d", index);
return false;
}
if ((tensor_desc.type == DT_UNDEFINED && tensor_desc.format != FORMAT_RESERVED) ||
(tensor_desc.type != DT_UNDEFINED && tensor_desc.format == FORMAT_RESERVED)){
ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"},
{"intput", "datatype or format", std::to_string(index)});
GELOGE(PARAM_INVALID, "Input's dataType or format is invalid when the index is %d", index);
GELOGE(PARAM_INVALID, "[Check][Param]Input's dataType or format is invalid when the index is %d", index);
return false;
}
++index;
@@ -334,20 +342,20 @@ bool SingleOpParser::Validate(const SingleOpDesc &op_desc) {
if (!tensor_desc.GetValidFlag()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"},
{"output", "datatype", std::to_string(index)});
GELOGE(PARAM_INVALID, "Output's dataType is invalid when the index is %d", index);
GELOGE(PARAM_INVALID, "[Check][Param]fail for Output's dataType is invalid when the index is %d", index);
return false;
}
if (tensor_desc.type == DT_UNDEFINED) {
ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"},
{"output", "datatype", std::to_string(index)});
GELOGE(PARAM_INVALID, "Output's dataType is invalid when the index is %d", index);
GELOGE(PARAM_INVALID, "[Check][Param]Output's dataType is invalid when the index is %d", index);
return false;
}

if (tensor_desc.format == FORMAT_RESERVED) {
ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"},
{"output", "format", std::to_string(index)});
GELOGE(PARAM_INVALID, "Output's format is invalid when the index is %d", index);
GELOGE(PARAM_INVALID, "[Check][Param]Output's format is invalid when the index is %d", index);
return false;
}
++index;
@@ -356,13 +364,13 @@ bool SingleOpParser::Validate(const SingleOpDesc &op_desc) {
for (auto &attr : op_desc.attrs) {
if (attr.name.empty()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10029");
GELOGE(PARAM_INVALID, "attr name is empty");
GELOGE(PARAM_INVALID, "[Parse][Attr]attr name is empty");
return false;
}

if (attr.value.IsEmpty()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10030", {"attrname"}, {attr.name});
GELOGE(PARAM_INVALID, "Parse attr \"%s\" failed. ", attr.name.c_str());
GELOGE(PARAM_INVALID, "[Parse][Attr] fail for vale of attr name:\"%s\" is empty. ", attr.name.c_str());
return false;
}
}
@@ -442,7 +450,7 @@ Status SingleOpParser::ConvertToBuildParam(int index,
}

if (VerifyOpInputOutputSizeByIr(*op_desc) != SUCCESS) {
GELOGE(PARAM_INVALID, "Verify op [%s] input or output size failed.", op_desc->GetType().c_str());
GELOGE(PARAM_INVALID, "[Verify][OpInputOutputSize] fail for input op [%s] invalid.", op_desc->GetType().c_str());
return PARAM_INVALID;
}

@@ -462,8 +470,9 @@ Status SingleOpParser::VerifyOpInputOutputSizeByIr(const OpDesc &current_op_desc
string reason = "is smaller than the ir needed input size " + std::to_string(ir_opdesc_inputs_num);
ErrorManager::GetInstance().ATCReportErrMessage("E19014", {"opname", "value", "reason"},
{current_op_desc.GetName(), "input size " + std::to_string(current_opdesc_inputs_num), reason});
GELOGE(PARAM_INVALID, "This op [%s] input size %zu is smaller than the ir needed input size %zu",
current_op_desc.GetName().c_str(), current_opdesc_inputs_num, ir_opdesc_inputs_num);
GELOGE(PARAM_INVALID,
"[Verify][OpInputOutputSize]This op:%s input size %zu is smaller than the ir needed input size %zu",
current_op_desc.GetName().c_str(), current_opdesc_inputs_num, ir_opdesc_inputs_num);
return PARAM_INVALID;
}
size_t current_opdesc_outputs_num = current_op_desc.GetOutputsSize();
@@ -472,8 +481,9 @@ Status SingleOpParser::VerifyOpInputOutputSizeByIr(const OpDesc &current_op_desc
string reason = "is smaller than the ir needed output size " + std::to_string(ir_opdesc_outputs_num);
ErrorManager::GetInstance().ATCReportErrMessage("E19014", {"opname", "value", "reason"},
{current_op_desc.GetName(), "output size " + std::to_string(current_opdesc_outputs_num), reason});
GELOGE(PARAM_INVALID, "This op [%s] output size %zu is smaller than the ir needed output size %zu",
current_op_desc.GetName().c_str(), current_opdesc_outputs_num, ir_opdesc_outputs_num);
GELOGE(PARAM_INVALID,
"[Verify][OpInputOutputSize]This op:%s output size %zu is smaller than the ir needed output size %zu",
current_op_desc.GetName().c_str(), current_opdesc_outputs_num, ir_opdesc_outputs_num);
return PARAM_INVALID;
}
}
@@ -492,7 +502,8 @@ Status SingleOpParser::SetShapeRange(const std::string &op_name,
{op_name,
"shape",
"has unknown rank but dim size is not one"});
GELOGE(PARAM_INVALID, "Invalid tensor shape: [%s]", ge_tensor_desc.MutableShape().ToString().c_str());
GELOGE(PARAM_INVALID, "[Set][ShapeRange]Invalid tensor shape:%s.",
ge_tensor_desc.MutableShape().ToString().c_str());
return PARAM_INVALID;
}
if (!tensor_desc.dim_ranges.empty()) {
@@ -500,7 +511,7 @@ Status SingleOpParser::SetShapeRange(const std::string &op_name,
{op_name,
"shape range",
"is not needed while the rank the shape is unknown"});
GELOGE(PARAM_INVALID, "Shape range is not needed while the rank the shape is unknown");
GELOGE(PARAM_INVALID, "[Set][ShapeRange]Shape range is not needed while the rank the shape is unknown.");
return PARAM_INVALID;
}

@@ -522,7 +533,7 @@ Status SingleOpParser::SetShapeRange(const std::string &op_name,
{op_name,
"shape range size " + std::to_string(num_shape_ranges),
reason});
GELOGE(PARAM_INVALID, "The number of shape_range mismatches that of unknown dims.");
GELOGE(PARAM_INVALID, "[Set][ShapeRange]The number of shape_range mismatches that of unknown dims.");
return PARAM_INVALID;
}

@@ -533,7 +544,8 @@ Status SingleOpParser::SetShapeRange(const std::string &op_name,
{op_name,
"shape range " + std::to_string(range_index),
reason});
GELOGE(PARAM_INVALID, "Invalid shape range entry. index = %zu, size = %zu", range_index, range.size());
GELOGE(PARAM_INVALID, "[Set][ShapeRange]Invalid shape range entry. index = %zu, size = %zu",
range_index, range.size());
return PARAM_INVALID;
}

@@ -550,9 +562,8 @@ Status SingleOpParser::SetShapeRange(const std::string &op_name,
"shape range size " + std::to_string(num_shape_ranges),
reason});
GELOGE(PARAM_INVALID,
"The number of shape_range(%zu) mismatches that of unknown dims(%zu).",
num_shape_ranges,
range_index);
"[Set][ShapeRange]The number of shape_range(%zu) mismatches that of unknown dims(%zu).",
num_shape_ranges, range_index);
return PARAM_INVALID;
}

@@ -577,12 +588,14 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si
GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str());
single_op_desc = single_op_json;
if (UpdateDynamicTensorName(single_op_desc.input_desc) != SUCCESS) {
GELOGE(FAILED, "Update dynamic tensor name failed!");
GELOGE(FAILED, "[Update][DynamicTensorName] failed for invalid input param!");
REPORT_CALL_ERROR("E19999", "UpdateDynamicTensorName failed for invalid input param.");
return FAILED;
}

if (!Validate(single_op_desc)) {
GELOGE(PARAM_INVALID, "Validate the index[%d] of op failed when read json file[%s].", index, file.c_str());
GELOGE(PARAM_INVALID,
"[Check][OpDesc]Validate the index[%d] of op failed when read json file[%s].", index, file.c_str());
return PARAM_INVALID;
}

@@ -599,7 +612,7 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si
} catch (const nlohmann::json::exception &e) {
ErrorManager::GetInstance().ATCReportErrMessage("E10032", {"index", "jsonfile", "exception"},
{std::to_string(index), file, e.what()});
GELOGE(PARAM_INVALID, "Parse the index[%d] of op failed when read json file[%s], exception %s",
GELOGE(PARAM_INVALID, "[Parse][OpList] the index:%d of op failed when read json file:%s, exception:%s",
index, file.c_str(), e.what());
return PARAM_INVALID;
}


+ 77
- 28
ge/session/inner_session.cc View File

@@ -47,7 +47,10 @@ Status CheckReuseMemoryOption(const std::map<string, string> &options) {
} else if (iter->second == "1") {
GELOGD("%s=1, reuse memory is close", OPTION_EXEC_DISABLE_REUSED_MEMORY);
} else {
GELOGE(PARAM_INVALID, "option %s=%s is invalid", OPTION_EXEC_DISABLE_REUSED_MEMORY, iter->second.c_str());
GELOGE(PARAM_INVALID, "[CheckReuse][MemoryOption]option %s=%s is invalid",
OPTION_EXEC_DISABLE_REUSED_MEMORY, iter->second.c_str());
REPORT_INNER_ERROR("E19999", "CheckReuseMemoryOption failed because option %s=%s is invalid.",
OPTION_EXEC_DISABLE_REUSED_MEMORY, iter->second.c_str());
return FAILED;
}
}
@@ -72,7 +75,8 @@ Status InnerSession::Initialize() {

Status ret = CheckReuseMemoryOption(all_options);
if (ret != SUCCESS) {
GELOGE(ret, "[InnerSession:%lu] check reuse memory option failed.", session_id_);
GELOGE(ret, "[CheckReuse][MemoryOption] failed, [InnerSession:%lu].", session_id_);
REPORT_CALL_ERROR("E19999", "CheckReuseMemoryOption failed, InnerSession=%lu.", session_id_);
return ret;
}

@@ -99,20 +103,22 @@ Status InnerSession::Initialize() {

DumpProperties dump_properties;
dump_properties.InitByOptions();
GE_CHK_STATUS_RET(AddDumpProperties(dump_properties), "Add dump properties failed");
GE_CHK_STATUS_RET(AddDumpProperties(dump_properties), "[Add][DumpProperties] failed.");

ret = graph_manager_.Initialize(options_);
if (ret != SUCCESS) {
GELOGE(ret, "[InnerSession:%lu] initialize failed.", session_id_);
GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed");
GELOGE(ret, "[Init][GraphManager] failed, InnerSession:%lu.", session_id_);
REPORT_CALL_ERROR("E19999", "GraphManager initialize failed, InnerSession:%lu.", session_id_);
GE_CHK_STATUS(RemoveDumpProperties(), "[Remove][DumpProperties] failed.");
return ret;
}

ret = VarManager::Instance(session_id_)->SetMemoryMallocSize(all_options);
if (ret != SUCCESS) {
GELOGE(ret, "failed to set malloc size");
GELOGE(ret, "[Set][MemoryMallocSize] failed.");
REPORT_CALL_ERROR("E19999", "VarManager SetMemoryMallocSize failed, InnerSession:%lu.", session_id_);
(void)graph_manager_.Finalize();
GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed");
GE_CHK_STATUS(RemoveDumpProperties(), "[Remove][DumpProperties] failed.");
GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId())));
return ret;
}
@@ -122,8 +128,9 @@ Status InnerSession::Initialize() {
const int DEFAULT_JOB_ID = 0;
ret = VarManager::Instance(session_id_)->Init(version, session_id_, DEFAULT_DEVICE_ID, DEFAULT_JOB_ID);
if (ret != SUCCESS) {
GELOGE(ret, "failed to init session instance");
GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed");
GELOGE(ret, "[Init][VarManager] failed.");
REPORT_CALL_ERROR("E19999", "VarManager init failed, InnerSession:%lu.", session_id_);
GE_CHK_STATUS(RemoveDumpProperties(), "[Remove][DumpProperties] failed.");
}
init_flag_ = true;
return SUCCESS;
@@ -139,7 +146,8 @@ Status InnerSession::Finalize() {
Status ret = graph_manager_.Finalize();
if (ret != SUCCESS) {
// Subsequent code execution is required, so no return is required
GELOGE(ret, "[InnerSession:%lu] finalize failed.", session_id_);
GELOGE(ret, "[Finalize][GraphManager] failed, InnerSession:%lu.", session_id_);
REPORT_CALL_ERROR("E19999", "GraphManager Finalize failed, InnerSession:%lu.", session_id_);
}

ModelManager::GetInstance()->DestroyAicpuSession(session_id_);
@@ -151,7 +159,7 @@ Status InnerSession::Finalize() {
Analyzer::GetInstance()->DestroySessionJsonObject(session_id_);

GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId())));
GE_CHK_STATUS_RET(RemoveDumpProperties(), "Remove dump properties failed");
GE_CHK_STATUS_RET(RemoveDumpProperties(), "[Remove][DumpProperties] failed.");

return ret;
}
@@ -170,13 +178,17 @@ Status InnerSession::AddGraph(uint32_t graph_id, const Graph &graph,
const std::map<std::string, std::string> &options) {
std::lock_guard<std::mutex> lock(resource_mutex_);
if (!init_flag_) {
GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_);
GELOGE(GE_SESS_INIT_FAILED, "[Add][Graph] failed because GraphManager not init, InnerSession:%lu, graph_id:%u.",
session_id_, graph_id);
REPORT_INNER_ERROR("E19999", "AddGraph failed because GraphManager not init, InnerSession:%lu, graph_id:%u.",
session_id_, graph_id);
return GE_SESS_INIT_FAILED;
}
UpdateThreadContext(options);
Status ret = graph_manager_.AddGraph(graph_id, graph, options, domi::GetContext());
if (ret != SUCCESS) {
GELOGE(ret, "[InnerSession:%lu] add graph %u failed.", session_id_, graph_id);
GELOGE(ret, "[Add][Graph] failed, InnerSession:%lu graphid: %u.", session_id_, graph_id);
REPORT_CALL_ERROR("E19999", "GraphManager AddGraph failed, InnerSession:%lu graphid: %u.", session_id_, graph_id);
return ret;
}

@@ -188,13 +200,19 @@ Status InnerSession::AddGraphWithCopy(uint32_t graph_id, const Graph &graph,
const std::map<std::string, std::string> &options) {
std::lock_guard<std::mutex> lock(resource_mutex_);
if (!init_flag_) {
GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_);
GELOGE(GE_SESS_INIT_FAILED, "[Add][Graph] failed because GraphManager not init, InnerSession:%lu, graph_id:%u.",
session_id_, graph_id);
REPORT_INNER_ERROR("E19999",
"AddGraphWithCopy failed because GraphManager not init, InnerSession:%lu, graph_id:%u.",
session_id_, graph_id);
return GE_SESS_INIT_FAILED;
}
UpdateThreadContext(options);
Status ret = graph_manager_.AddGraphWithCopy(graph_id, graph, options, domi::GetContext());
if (ret != SUCCESS) {
GELOGE(ret, "[InnerSession:%lu] add graph %u failed.", session_id_, graph_id);
GELOGE(ret, "[Add][Graph] failed, InnerSession:%lu graphid: %u.", session_id_, graph_id);
REPORT_CALL_ERROR("E19999",
"GraphManager AddGraphWithCopy failed, InnerSession:%lu graphid: %u.", session_id_, graph_id);
return ret;
}

@@ -207,7 +225,10 @@ Status InnerSession::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inpu
if (mutex_.try_lock()) {
std::lock_guard<std::mutex> lock(mutex_, std::adopt_lock);
if (!init_flag_) {
GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_);
GELOGE(GE_SESS_INIT_FAILED, "[Run][Graph]failed because GraphManager not Init, InnerSession:%lu, graph_id:%u.",
session_id_, graph_id);
REPORT_INNER_ERROR("E19999", "RunGraph failed because GraphManager not Init, InnerSession:%lu, graph_id:%u.",
session_id_, graph_id);
return GE_SESS_INIT_FAILED;
}
UpdateThreadContext(graph_id);
@@ -220,7 +241,9 @@ Status InnerSession::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inpu
domi::GetContext().out_nodes_map.clear();
domi::GetContext().user_out_nodes.clear();
if (ret != SUCCESS) {
GELOGE(ret, "[InnerSession:%lu] run graph failed, graph_id=%u.", session_id_, graph_id);
GELOGE(ret, "[Run][Graph]failed, InnerSession:%lu graph_id=%u.", session_id_, graph_id);
REPORT_CALL_ERROR("E19999",
"GraphManager RunGraph failed, InnerSession:%lu graph_id=%u.", session_id_, graph_id);
return ret;
}
outputs.clear();
@@ -231,7 +254,9 @@ Status InnerSession::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inpu
GELOGI("[InnerSession:%lu] run graph success, graph_id=%u.", session_id_, graph_id);
return SUCCESS;
} else {
GELOGE(GE_SESS_ALREADY_RUNNING, "[InnerSession:%lu] run graph failed, graph_id=%u.", session_id_, graph_id);
GELOGE(GE_SESS_ALREADY_RUNNING, "[Run][Graph]failed, InnerSession:%lu, graph_id=%u.", session_id_, graph_id);
REPORT_INNER_ERROR("E19999",
"RunGraph failed because mutex try_lock false, InnerSession:%lu, graph_id=%u.", session_id_, graph_id);
return GE_SESS_ALREADY_RUNNING;
}
}
@@ -239,13 +264,18 @@ Status InnerSession::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inpu
Status InnerSession::RemoveGraph(uint32_t graph_id) {
std::lock_guard<std::mutex> lock(resource_mutex_);
if (!init_flag_) {
GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_);
GELOGE(GE_SESS_INIT_FAILED,
"[Remove][Graph] failed because GraphManager not init, InnerSession:%lu, graph_id=%u.", session_id_, graph_id);
REPORT_INNER_ERROR("E19999",
"RemoveGraph failed, because GraphManager not init, InnerSession:%lu, graph_id=%u.", session_id_, graph_id);
return GE_SESS_INIT_FAILED;
}
UpdateThreadContext(graph_id);
Status ret = graph_manager_.RemoveGraph(graph_id);
if (ret != SUCCESS) {
GELOGE(ret, "[InnerSession:%lu] remove graph failed, graph_id=%u.", session_id_, graph_id);
GELOGE(ret, "[Remove][Graph] failed, InnerSession:%lu, graph_id=%u.", session_id_, graph_id);
REPORT_CALL_ERROR("E19999",
"GraphManager RemoveGraph failed, InnerSession:%lu, graph_id=%u.", session_id_, graph_id);
return ret;
}

@@ -258,13 +288,18 @@ Status InnerSession::RegisterCallBackFunc(
const std::function<Status(uint32_t, const std::map<std::string, ge::Tensor> &)> &callback) {
std::lock_guard<std::mutex> lock(resource_mutex_);
if (!init_flag_) {
GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_);
GELOGE(GE_SESS_INIT_FAILED,
"[Register][CallBackFunc] failed because GraphManager not initialize, InnerSession:%lu.", session_id_);
REPORT_INNER_ERROR("E19999",
"RegisterCallBackFunc failed because GraphManager not init, InnerSession:%lu.", session_id_);
return GE_SESS_INIT_FAILED;
}
UpdateThreadContext(std::map<std::string, std::string>{});
Status ret = graph_manager_.RegisterCallBackFunc(key, callback);
if (ret != SUCCESS) {
GELOGE(ret, "[InnerSession:%lu] register %s callback function failed.", session_id_, key.c_str());
GELOGE(ret, "[Register][CallBackFunc] failed, InnerSession:%lu register %s.", session_id_, key.c_str());
REPORT_CALL_ERROR("E19999",
"GraphManager RegisterCallBackFunc failed, InnerSession:%lu register %s.", session_id_, key.c_str());
return ret;
}

@@ -277,13 +312,18 @@ Status InnerSession::RegisterCallBackFunc(
const std::function<Status(uint32_t, const std::map<AscendString, ge::Tensor> &)> &callback) {
std::lock_guard<std::mutex> lock(resource_mutex_);
if (!init_flag_) {
GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_);
GELOGE(GE_SESS_INIT_FAILED,
"[Register][CallBackFunc]failed because GraphManager not initialize, InnerSession:%lu.", session_id_);
REPORT_INNER_ERROR("E19999",
"RegisterCallBackFunc failed because GraphManager not initialize, InnerSession:%lu.", session_id_);
return GE_SESS_INIT_FAILED;
}
UpdateThreadContext(std::map<std::string, std::string>{});
Status ret = graph_manager_.RegisterCallBackFunc(key, callback);
if (ret != SUCCESS) {
GELOGE(ret, "[InnerSession:%lu] register %s callback function failed.", session_id_, key.c_str());
GELOGE(ret, "[Register][CallBackFunc] failed, InnerSession:%lu register %s.", session_id_, key.c_str());
REPORT_CALL_ERROR("E19999",
"GraphManager RegisterCallBackFunc failed, InnerSession:%lu register %s.", session_id_, key.c_str());
return ret;
}

@@ -308,7 +348,9 @@ Status InnerSession::BuildGraph(uint32_t graph_id, const std::vector<InputTensor
GeRootModelPtr ge_root_model = nullptr;
Status ret = graph_manager_.BuildGraph(graph_id, ge_inputs, ge_root_model, session_id_, true);
if (ret != SUCCESS) {
GELOGE(ret, "[InnerSession:%lu] build graph failed, graph_id=%u.", session_id_, graph_id);
GELOGE(ret, "[Build][Graph] failed, InnerSession:%lu graph_id=%u.", session_id_, graph_id);
REPORT_CALL_ERROR("E19999",
"GraphManager BuildGraph failed, InnerSession:%lu graph_id=%u.", session_id_, graph_id);
return ret;
}
GELOGI("[InnerSession:%lu] build graph success, graph_id=%u.", session_id_, graph_id);
@@ -321,7 +363,9 @@ Status InnerSession::RunGraphAsync(uint32_t graph_id, const std::vector<InputTen
GELOGI("[InnerSession:%lu] run graph on session, graph_id=%u.", session_id_, graph_id);
Status ret = graph_manager_.RunGraphAsync(graph_id, inputs, session_id_, callback);
if (ret != SUCCESS) {
GELOGE(ret, "[InnerSession:%lu] run graph failed, graph_id=%u.", session_id_, graph_id);
GELOGE(ret, "[Run][GraphAsync]failed, InnerSession:%lu graph_id=%u.", session_id_, graph_id);
REPORT_CALL_ERROR("E19999",
"GraphManager RunGraphAsync failed, InnerSession:%lu graph_id=%u.", session_id_, graph_id);
return ret;
}
GELOGI("[InnerSession:%lu] run graph success, graph_id=%u.", session_id_, graph_id);
@@ -369,7 +413,8 @@ Status InnerSession::SaveVariables(const Graph &graph, const std::vector<std::st
Status InnerSession::AddDumpProperties(const DumpProperties &dump_properties) {
if (!is_dump_server_inited_) {
if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) {
GE_IF_BOOL_EXEC(AdxDataDumpServerInit() != kDumpStatus, GELOGE(PARAM_INVALID, "Data dump server init failed");
GE_IF_BOOL_EXEC(AdxDataDumpServerInit() != kDumpStatus,
GELOGE(PARAM_INVALID, "[Init][AdxDataDumpServer] failed, session_id:%lu.", session_id_);
return PARAM_INVALID)
GELOGI("Init adx data dump server success");
is_dump_server_inited_ = true;
@@ -382,7 +427,11 @@ Status InnerSession::AddDumpProperties(const DumpProperties &dump_properties) {
Status InnerSession::RemoveDumpProperties() {
DumpManager::GetInstance().RemoveDumpProperties(session_id_);
if (is_dump_server_inited_ && DumpManager::GetInstance().GetDumpPropertiesMap().empty()) {
GE_IF_BOOL_EXEC(AdxDataDumpServerUnInit() != kDumpStatus, GELOGE(PARAM_INVALID, "Data dump server uninit failed");
GE_IF_BOOL_EXEC(AdxDataDumpServerUnInit() != kDumpStatus,
GELOGE(PARAM_INVALID, "[UnInit][AdxDataDumpServer] failed, session_id:%lu.", session_id_);
REPORT_INNER_ERROR("E19999",
"RemoveDumpProperties failed because AdxDataDumpServerUnInit failed, session_id:%lu.",
session_id_);
return PARAM_INVALID)
GELOGI("UnInit adx data dump server success");
is_dump_server_inited_ = false;


+ 80
- 22
ge/session/session_manager.cc View File

@@ -61,7 +61,8 @@ Status SessionManager::SetRtContext(SessionId session_id, rtContext_t rt_context

Status SessionManager::CreateSession(const std::map<std::string, std::string> &options, SessionId &session_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "[Create][Session]fail for Session manager is not initialized.");
REPORT_INNER_ERROR("E19999", "CreateSession fail for Session manager is not initialized.");
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionId next_session_id = 0;
@@ -92,7 +93,10 @@ Status SessionManager::CreateSession(const std::map<std::string, std::string> &o

Status SessionManager::DestroySession(SessionId session_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "[Destroy][Session]fail for Session manager is not initialized, session_id:%lu.",
session_id);
REPORT_INNER_ERROR("E19999",
"DestroySession fail for Session manager is not initialized, session_id:%lu.", session_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
std::lock_guard<std::mutex> lock(mutex_);
@@ -119,7 +123,12 @@ Status SessionManager::DestroySession(SessionId session_id) {

Status SessionManager::GetVariable(SessionId session_id, const std::string &name, Tensor &val) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Get][Variable]fail for Session manager is not initialized, session_id:%lu, input_name:%s.",
session_id, name.c_str());
REPORT_INNER_ERROR("E19999",
"GetVariable fail for Session manager is not initialized, session_id:%lu, input_name:%s.",
session_id, name.c_str());
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -143,7 +152,10 @@ Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const G
Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const Graph &graph,
const std::map<std::string, std::string> &options) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Add][Graph]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.", session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"AddGraph fail for Session manager is not initialized, session_id:%lu, graph_id:%u.", session_id, graph_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -173,7 +185,12 @@ Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const G
Status SessionManager::AddGraphWithCopy(SessionId session_id, uint32_t graph_id, const Graph &graph,
const std::map<std::string, std::string> &options) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Add][GraphWithCopy]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"AddGraphWithCopy fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -203,7 +220,10 @@ Status SessionManager::AddGraphWithCopy(SessionId session_id, uint32_t graph_id,
Status SessionManager::RunGraph(SessionId session_id, uint32_t graph_id, const std::vector<Tensor> &inputs,
std::vector<Tensor> &outputs) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Run][Graph]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.", session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"RunGraph fail for Session manager is not initialized, session_id:%lu, graph_id:%u.", session_id, graph_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -221,7 +241,12 @@ Status SessionManager::RunGraph(SessionId session_id, uint32_t graph_id, const s

Status SessionManager::RemoveGraph(SessionId session_id, uint32_t graph_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Remove][Graph]fail for Session manager is not initialized, session_id:%lu graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"RemoveGraph fail for Session manager is not initialized, session_id:%lu graph_id:%u.",
session_id, graph_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -239,7 +264,10 @@ Status SessionManager::RemoveGraph(SessionId session_id, uint32_t graph_id) {

bool SessionManager::HasSession(SessionId session_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Has][Session]fail for Session manager is not initialized, session_id:%lu.", session_id);
REPORT_INNER_ERROR("E19999",
"HasSession fail for Session manager is not initialized, session_id:%lu.", session_id);
return false;
}
return session_manager_map_.find(session_id) != session_manager_map_.end();
@@ -247,7 +275,8 @@ bool SessionManager::HasSession(SessionId session_id) {

Status SessionManager::GetNextSessionId(SessionId &next_session_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "[Get][NextSessionId]fail for Session manager is not initialized.");
REPORT_INNER_ERROR("E19999", "GetNextSessionId fail for Session manager is not initialized.");
return GE_SESSION_MANAGER_NOT_INIT;
}
static SessionId session_id = 0;
@@ -260,7 +289,12 @@ Status SessionManager::RegisterCallBackFunc(
SessionId session_id, const std::string &key,
const std::function<Status(uint32_t, const std::map<std::string, ge::Tensor> &)> &callback) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Register][CallBackFunc]fail for Session manager is not initialized, session_id:%lu, input_key:%s.",
session_id, key.c_str());
REPORT_INNER_ERROR("E19999",
"RegisterCallBackFunc fail for Session manager is not initialized, session_id:%lu, input_key:%s.",
session_id, key.c_str());
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -280,7 +314,12 @@ Status SessionManager::RegisterCallBackFunc(
SessionId session_id, const std::string &key,
const std::function<Status(uint32_t, const std::map<AscendString, ge::Tensor> &)> &callback) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Register][CallBackFunc]fail for Session manager is not initialized, session_id:%lu, input_key:%s.",
session_id, key.c_str());
REPORT_INNER_ERROR("E19999",
"RegisterCallBackFunc fail for Session manager is not initialized, session_id:%lu, input_key:%s.",
session_id, key.c_str());
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -298,7 +337,10 @@ Status SessionManager::RegisterCallBackFunc(

Status SessionManager::BuildGraph(SessionId session_id, uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Build][Graph]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.", session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"BuildGraph fail for Session manager is not initialized, session_id:%lu, graph_id:%u.", session_id, graph_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -317,7 +359,12 @@ Status SessionManager::BuildGraph(SessionId session_id, uint32_t graph_id, const
Status SessionManager::RunGraphAsync(SessionId session_id, uint32_t graph_id,
const std::vector<InputTensorInfo> &inputs, RunAsyncCallback callback) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[AsyncRun][Graph]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"RunGraphAsync fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -337,7 +384,10 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std:
std::vector<Tensor> &var_values) {
// step 0: init session manager
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Get][Variables]fail for Session manager is not initialized, session_id:%lu", session_id);
REPORT_INNER_ERROR("E19999",
"GetVariables fail for Session manager is not initialized, session_id:%lu", session_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -355,7 +405,7 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std:
std::map<std::string, GeTensorDesc> all_variables;
Status ret = innerSession->GetAllVariables(all_variables);
if (ret != SUCCESS) {
GELOGE(FAILED, "Get all variables failed.");
GELOGE(FAILED, "[Get][AllVariables]failed.");
return FAILED;
}

@@ -363,7 +413,7 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std:
Graph graph = Graph("checkpoint");
ret = innerSession->GenCheckPointGraph(all_variables, graph);
if (ret != SUCCESS) {
GELOGE(FAILED, "Build check point graph failed.");
GELOGE(FAILED, "[GenCheck][PointGraph] failed.");
return FAILED;
}

@@ -371,7 +421,7 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std:
uint32_t graph_id = GetCurrentSecondTimestap();
ret = AddGraph(session_id, graph_id, graph);
if (ret != SUCCESS) {
GELOGE(FAILED, "Add check point graph failed.");
GELOGE(FAILED, "[Add][Graph] failed.");
return FAILED;
}

@@ -379,7 +429,7 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std:
vector<Tensor> outputs;
ret = RunGraph(session_id, graph_id, inputs, outputs);
if (ret != SUCCESS) {
GELOGE(FAILED, "Run check point graph failed.");
GELOGE(FAILED, "[Run][Graph] failed.");
return FAILED;
}

@@ -388,14 +438,14 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std:
GELOGD("[SessionManager] outputs size is [%zu], var values size is [%zu].", outputs.size(), var_values.size());

if (ret != SUCCESS) {
GELOGE(FAILED, "Save variables failed.");
GELOGE(FAILED, "[Save][Variables] failed.");
return FAILED;
}

// step 5: remove graph
ret = innerSession->RemoveGraph(graph_id);
if (ret != SUCCESS) {
GELOGE(FAILED, "Remove graph failed.");
GELOGE(FAILED, "[Remove][Graph] failed.");
return FAILED;
}
return ret;
@@ -403,7 +453,12 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std:

bool SessionManager::IsGraphNeedRebuild(SessionId session_id, uint32_t graph_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Check][GraphNeedRebuild]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"IsGraphNeedRebuild fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
return true;
}
SessionPtr innerSession = nullptr;
@@ -411,7 +466,10 @@ bool SessionManager::IsGraphNeedRebuild(SessionId session_id, uint32_t graph_id)
std::lock_guard<std::mutex> lock(mutex_);
auto it = session_manager_map_.find(session_id);
if (it == session_manager_map_.end()) {
GELOGE(GE_SESSION_NOT_EXIST, "The session %lu does not exists", session_id);
GELOGE(GE_SESSION_NOT_EXIST, "[Find][InnerSession] fail for %lu does not exists", session_id);
REPORT_INNER_ERROR("E19999",
"IsGraphNeedRebuild fail for InnerSession is not exists, session_id:%lu, graph_id:%u.",
session_id, graph_id);
return true;
} else {
innerSession = it->second;


+ 41
- 26
ge/single_op/single_op.cc View File

@@ -48,7 +48,7 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) {
TaskDescInfo tmp_task_desc_info;
uint32_t model_id;
if (op_task->GetProfilingArgs(tmp_task_desc_info, model_id) != SUCCESS) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get profiling data of task failed");
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Get][ProfilingArgs] failed.");
return ACL_ERROR_GE_PARAM_INVALID;
}
GELOGD("ProfilingReport of op[%s] model[%s] start.",
@@ -81,8 +81,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOp::~SingleOp() {
Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs) {
auto num_inputs = inputs.size();
if (num_inputs != input_sizes_.size()) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Input num mismatch. model expect %zu, but given %zu", input_addr_list_.size(),
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"[Check][Param:inputs]Input num mismatch. model expect %zu, but given %zu", input_addr_list_.size(),
inputs.size());
REPORT_INPUT_ERROR("E10401", std::vector<std::string>({"expect_num", "input_num"}),
std::vector<std::string>({std::to_string(input_addr_list_.size()), std::to_string(num_inputs)}));
return ACL_ERROR_GE_PARAM_INVALID;
}

@@ -92,16 +95,22 @@ Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std::
GELOGI("Input [%zu], aligned_size:%zu, inputs.length:%lu, input_sizes_:%zu",
i, aligned_size, inputs[i].length, input_sizes_[i]);
if (aligned_size < input_sizes_[i]) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Input size mismatch. index = %zu, model expect %zu,"
" but given %zu(after align)", i, input_sizes_[i], aligned_size);
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"[Check][Param:inputs]Input size mismatch. index = %zu, model expect %zu, but given %zu(after align)",
i, input_sizes_[i], aligned_size);
REPORT_INPUT_ERROR("E10402", std::vector<std::string>({"index", "expect_size", "input_size"}),
std::vector<std::string>({std::to_string(i), std::to_string(input_sizes_[i]), std::to_string(aligned_size)})
);
return ACL_ERROR_GE_PARAM_INVALID;
}
}

auto num_outputs = outputs.size();
if (num_outputs != output_sizes_.size()) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "output num mismatch. model expect %zu, but given %zu",
output_sizes_.size(), outputs.size());
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Param:outputs]output num mismatch. model expect %zu, but given %zu",
output_sizes_.size(), outputs.size());
REPORT_INPUT_ERROR("E10403", std::vector<std::string>({"expect_num", "input_num"}),
std::vector<std::string>({std::to_string(output_sizes_.size()), std::to_string(outputs.size())}));
return ACL_ERROR_GE_PARAM_INVALID;
}

@@ -111,8 +120,12 @@ Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std::
GELOGI("Output [%zu], aligned_size:%zu, outputs.length:%lu, output_sizes_:%zu",
i, aligned_size, outputs[i].length, output_sizes_[i]);
if (aligned_size < output_sizes_[i]) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Output size mismatch. index = %zu, model expect %zu,"
"but given %zu(after align)", i, output_sizes_[i], aligned_size);
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"[Check][Param:outputs]Output size mismatch. index = %zu, model expect %zu, but given %zu(after align)",
i, output_sizes_[i], aligned_size);
REPORT_INPUT_ERROR("E10404", std::vector<std::string>({"index", "expect_size", "input_size"}),
std::vector<std::string>({std::to_string(i), std::to_string(output_sizes_[i]), std::to_string(aligned_size)})
);
return ACL_ERROR_GE_PARAM_INVALID;
}
}
@@ -168,9 +181,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c
GELOGD("Memory base changed, new memory base = %p", current_mem_base);
for (auto &task : tasks_) {
auto new_address = BuildTaskUtils::GetAddresses(task->GetOpdesc(), *running_param_);
GE_CHK_STATUS_RET(task->UpdateArgTable(*running_param_),
"[%s] Failed to update arg table",
task->GetOpdesc()->GetName().c_str());
GE_CHK_STATUS_RET(task->UpdateArgTable(*running_param_), "[Update][ArgTable] failed, single op:%s.",
task->GetOpdesc()->GetName().c_str());
}
}
ret = UpdateArgs(inputs, outputs);
@@ -183,7 +195,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c
if (ret != SUCCESS) {
return ret;
}
GE_CHK_STATUS_RET(task->OpenDump(stream_), "Open single op %s dump filed",task->GetOpdesc()->GetName().c_str());
GE_CHK_STATUS_RET(task->OpenDump(stream_), "[Open][Dump]failed, single op:%s.",
task->GetOpdesc()->GetName().c_str());
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(task, kShapeTypeStatic));
}

@@ -204,33 +217,35 @@ Status DynamicSingleOp::ValidateParams(const vector<GeTensorDesc> &input_desc,
std::vector<DataBuffer> &outputs) const {
if (inputs.size() != input_desc.size()) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"Input number mismatches input desc number. Input num = %zu, input desc num = %zu",
inputs.size(),
input_desc.size());
"[Check][Param:inputs]Input number mismatches input desc number. Input num = %zu, input desc num = %zu",
inputs.size(), input_desc.size());
REPORT_INPUT_ERROR("E10405", std::vector<std::string>({"input_num", "input_desc_num"}),
std::vector<std::string>({std::to_string(inputs.size()), std::to_string(input_desc.size())}));
return ACL_ERROR_GE_PARAM_INVALID;
}

if (outputs.size() != output_desc.size()) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"Output number mismatches output desc number. Output num = %zu, output desc num = %zu",
outputs.size(),
output_desc.size());
"[Check][Param:outputs]Output number mismatches output desc number. Output num = %zu, output desc num = %zu",
outputs.size(), output_desc.size());
REPORT_INPUT_ERROR("E10406", std::vector<std::string>({"out_num", "out_desc_num"}),
std::vector<std::string>({std::to_string(outputs.size()), std::to_string(output_desc.size())}));
return ACL_ERROR_GE_PARAM_INVALID;
}

if (input_desc.size() != num_inputs_) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"Input number mismatches. expect %zu, but given %zu",
num_inputs_,
input_desc.size());
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Param:input_desc]Input number mismatches. expect %zu, but given %zu",
num_inputs_, input_desc.size());
REPORT_INPUT_ERROR("E10401", std::vector<std::string>({"expect_num", "input_num"}),
std::vector<std::string>({std::to_string(num_inputs_), std::to_string(input_desc.size())}));
return ACL_ERROR_GE_PARAM_INVALID;
}

if (output_desc.size() != num_outputs_) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"Output number mismatches. expect %zu, but given %zu",
num_outputs_,
output_desc.size());
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Param:output_desc]Output number mismatches. expect %zu, but given %zu",
num_outputs_, output_desc.size());
REPORT_INPUT_ERROR("E10403", std::vector<std::string>({"expect_num", "input_num"}),
std::vector<std::string>({std::to_string(num_outputs_), std::to_string(output_desc.size())}));
return ACL_ERROR_GE_PARAM_INVALID;
}



+ 9
- 4
ge/single_op/single_op_manager.cc View File

@@ -34,7 +34,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::GetOpFr
const uint64_t model_id) {
GELOGI("GetOpFromModel in. model name = %s, model id = %lu", model_name.c_str(), model_id);
if (single_op == nullptr) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "single op is null");
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Param:single_op] is null.");
REPORT_INPUT_ERROR("E10412", std::vector<std::string>({"inputparam"}), std::vector<std::string>({"single_op"}));
return ACL_ERROR_GE_INTERNAL_ERROR;
}

@@ -42,7 +43,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::GetOpFr
GE_CHK_STATUS_RET(GetResourceId(stream, resource_id));
StreamResource *res = GetResource(resource_id, stream);
if (res == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "GetResource failed");
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Get][Resource] failed.");
REPORT_CALL_ERROR("E19999", "GetOpFromModel fail because GetResource return nullptr.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}

@@ -112,7 +114,8 @@ Status SingleOpManager::GetDynamicOpFromModel(const string &model_name,
GE_CHK_STATUS_RET(GetResourceId(stream, resource_id));
StreamResource *res = GetResource(resource_id, stream);
if (res == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "GetResource failed");
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Get][Resource] failed.");
REPORT_CALL_ERROR("E19999", "GetDynamicOpFromModel fail because GetResource return nullptr.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}

@@ -143,7 +146,9 @@ Status SingleOpManager::GetResourceId(rtStream_t stream, uintptr_t &resource_id)
rtContext_t rt_cur_ctx = nullptr;
auto rt_err = rtCtxGetCurrent(&rt_cur_ctx);
if (rt_err != RT_ERROR_NONE) {
GELOGE(rt_err, "get current context failed, runtime result is %d", static_cast<int>(rt_err));
GELOGE(rt_err, "[Get][CurrentContext] failed, runtime result is %d", static_cast<int>(rt_err));
REPORT_CALL_ERROR("E19999",
"GetResourceId failed because rtCtxGetCurrent result is %d", static_cast<int>(rt_err));
return RT_ERROR_TO_GE_STATUS(rt_err);
}
// use current context as resource key instead


+ 53
- 19
ge/single_op/single_op_model.cc View File

@@ -102,7 +102,8 @@ Status SingleOpModel::InitModel() {

auto ret = model_helper_.LoadModel(model);
if (ret != SUCCESS) {
GELOGE(ret, "LoadModel failed");
GELOGE(ret, "[Load][Model] failed.");
REPORT_CALL_ERROR("E19999", "InitModel fail for ModelHelper LoadModel failed.");
return ret;
}

@@ -168,7 +169,11 @@ Status SingleOpModel::ParseInputNode(const OpDescPtr &op_desc) {
vector<int64_t> offsets = op_desc->GetOutputOffset();
if (offsets.size() != kDataOutputNum) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"Data op should have only one output, but got %zu", op_desc->GetOutputOffset().size());
"[Parse][InputNode]Data op should have only one output, but got %zu, op_name:%s, op_type:%s.",
op_desc->GetOutputOffset().size(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
REPORT_INNER_ERROR("E19999",
"ParseInputNode fail for Data op should have only one output, but got %zu, op_name:%s, op_type:%s.",
op_desc->GetOutputOffset().size(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
return ACL_ERROR_GE_PARAM_INVALID;
}

@@ -206,7 +211,9 @@ Status SingleOpModel::LoadAllNodes() {
model_id_ = ge_model->GetModelId();
auto compute_graph = GraphUtils::GetComputeGraph(graph);
if (compute_graph == nullptr) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] compute_graph is null", model_name_.c_str());
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Get][ComputeGraph] fail, model_name:%s.", model_name_.c_str());
REPORT_CALL_ERROR("E19999", "LoadAllNodes fail for GetComputeGraph return nullptr, model_name:%s.",
model_name_.c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}

@@ -314,7 +321,11 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s
single_op.tasks_.emplace_back(task);
} else {
GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID,
"Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type());
"[Check][KernelType]Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u",
context.kernel_type());
REPORT_INNER_ERROR("E19999",
"BuildTaskList fail for %u not supported, Only TBE, AI_CPU, CUST_AI_CPU kernel are supported.",
context.kernel_type());
return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID;
}
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
@@ -340,7 +351,8 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s

void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) {
if (task == nullptr) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "tbe op task is nullptr");
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Parse][ArgTable] fail for input OpTask is nullptr.");
REPORT_INNER_ERROR("E19999", "ParseArgTable fail for input OpTask is nullptr.");
return;
}

@@ -367,13 +379,15 @@ Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask *
task_def.kernel_with_handle().context();
auto iter = op_list_.find(context.op_index());
if (iter == op_list_.end()) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index());
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Param:TaskDef]op desc not found. op index = %u", context.op_index());
REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for op desc not found. op index = %u", context.op_index());
return ACL_ERROR_GE_INTERNAL_ERROR;
}

auto *tbe_task = new (std::nothrow) TbeOpTask();
if (tbe_task == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create tbe op task failed");
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][TbeOpTask]failed.");
REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for new TbeOpTask.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}

@@ -393,19 +407,24 @@ Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiC
bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id) {
auto iter = op_list_.find(kernel_def.op_index());
if (iter == op_list_.end()) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", kernel_def.op_index());
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
"[Check][Param:KernelExDef]op not found. op index = %u", kernel_def.op_index());
REPORT_INNER_ERROR("E19999",
"BuildKernelExTask fail for param kernel_def, because op of kernel_def not found, op index:%u.",
kernel_def.op_index());
return ACL_ERROR_GE_INTERNAL_ERROR;
}

std::unique_ptr<AiCpuTask> aicpu_task(new (std::nothrow) AiCpuTask());
if (aicpu_task == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create aicpu_TF op task failed");
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][AiCpuTask] failed.");
REPORT_INNER_ERROR("E19999", "BuildKernelExTask fail for new AiCpuTask, model_name:%s.", model_name_.c_str());
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
auto builder = AiCpuTaskBuilder(iter->second->GetOpDesc(), kernel_def);
auto ret = builder.BuildTask(*aicpu_task, model_params_, dynamic_flag, kernel_id);
if (ret != SUCCESS) {
GELOGE(ret, "build aicpu_TF op task failed");
GELOGE(ret, "[Build][Task] failed, kernel_id:%lu.", kernel_id);
return ret;
}
depend_compute_flag = (aicpu_task->GetUnknownType() == DEPEND_COMPUTE);
@@ -418,19 +437,25 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa
const auto &context = kernel_def.context();
auto iter = op_list_.find(context.op_index());
if (iter == op_list_.end()) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index());
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
"[Check][Param:KernelDef] op desc not found. op index = %u", context.op_index());
REPORT_INNER_ERROR("E19999",
"BuildCpuKernelTask fail for kernel_def is invalid, because op of kernel_def not found, op index:%u.",
context.op_index());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
std::unique_ptr<AiCpuCCTask> aicpucc_task(new (std::nothrow) AiCpuCCTask());
if (aicpucc_task == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create aicpu_CC op task failed");
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][AiCpuCCTask] failed");
REPORT_INNER_ERROR("E19999", "BuildCpuKernelTask fail for new AiCpuCCTask, model_name:%s.", model_name_.c_str());
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}

auto builder = AiCpuCCTaskBuilder(iter->second->GetOpDesc(), kernel_def);
auto ret = builder.BuildTask(*aicpucc_task, kernel_id, model_params_);
if (ret != SUCCESS) {
GELOGE(ret, "build aicpu_CC op task failed");
GELOGE(ret, "[Build][AiCpuCCTask]failed, kernel_id:%lu.", kernel_id);
REPORT_CALL_ERROR("E19999", "BuildCpuKernelTask fail for build AiCpuTask, kernel_id:%lu.", kernel_id);
return ret;
}

@@ -469,7 +494,11 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
single_op.op_task_.reset(task);
} else {
GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID,
"Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type());
"[Check][Param:TaskDef]Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u",
context.kernel_type());
REPORT_INNER_ERROR("E19999",
"BuildModelTaskKernel fail for got:%u not supported, Only TBE, AI_CPU, CUST_AI_CPU kernel are supported.",
context.kernel_type());
return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID;
}
return SUCCESS;
@@ -487,13 +516,17 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
if (single_op.op_task_ != nullptr) {
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks.");
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks.");
REPORT_INNER_ERROR("E19999",
"BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks.");
return ACL_ERROR_GE_OP_TASK_TYPE_INVALID;
}
GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op));
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
if (single_op.op_task_ != nullptr) {
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks.");
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks.");
REPORT_INNER_ERROR("E19999",
"BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks.");
return ACL_ERROR_GE_OP_TASK_TYPE_INVALID;
}
GELOGD("Building AICPU_TF task");
@@ -505,7 +538,8 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
depend_compute_flag, dynamic_singleop_kernel_id));
if (depend_compute_flag) {
if (i >= tasks.size() - 1) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "The copy task of the fourth operator was not found.");
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Task]The copy task of the fourth operator was not found.");
REPORT_INNER_ERROR("E19999", "The copy task of the fourth operator was not found.");
return ACL_ERROR_GE_PARAM_INVALID;
}
++i;
@@ -541,14 +575,14 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model);
single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model));
GE_CHECK_NOTNULL(single_op.hybrid_model_);
GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "Failed to init hybrid model");
GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "[Init][HybridModel]Failed.");
int32_t device_id = 0;
GE_CHK_RT_RET(rtGetDevice(&device_id));
single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(),
device_id,
resource.GetStream()));
GE_CHECK_NOTNULL(single_op.hybrid_model_executor_);
GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "Failed to init hybrid model");
GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed.");
return SUCCESS;
}
return BuildTaskListForDynamicOp(single_op);


+ 18
- 11
ge/single_op/stream_resource.cc View File

@@ -29,14 +29,14 @@ StreamResource::~StreamResource() {
for (auto mem : memory_list_) {
if (mem != nullptr) {
auto rt_ret = rtFree(mem);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtFree failed"));
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed."));
}
}

for (auto weight : weight_list_) {
if (weight != nullptr) {
auto rt_ret = rtFree(weight);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtFree failed"));
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed."));
}
}
}
@@ -95,16 +95,18 @@ uint8_t *StreamResource::DoMallocMemory(const std::string &purpose,
uint8_t *buffer = nullptr;
auto ret = rtMalloc(reinterpret_cast<void **>(&buffer), size, RT_MEMORY_HBM);
if (ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc failed, size = %zu, ret = %d", size, ret);
GELOGE(RT_FAILED, "[RtMalloc][Memory] failed, size = %zu, ret = %d", size, ret);
REPORT_INNER_ERROR("E19999", "rtMalloc failed, size = %zu, ret = %d, when %s.", size, ret, __FUNCTION__);
return nullptr;
}
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, purpose.c_str(), size)

ret = rtMemset(buffer, size, 0U, size);
if (ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMemset failed, ret = %d", ret);
GELOGE(RT_FAILED, "[RtMemset][Memory] failed, ret = %d", ret);
REPORT_INNER_ERROR("E19999", "rtMemset failed, ret = %d, when %s.", ret, __FUNCTION__);
auto rt_ret = rtFree(buffer);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtFree failed"));
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[RtFree][Memory] failed"));
return nullptr;
}

@@ -129,7 +131,9 @@ uint8_t *StreamResource::MallocWeight(const std::string &purpose, size_t size) {
uint8_t *buffer = nullptr;
auto ret = rtMalloc(reinterpret_cast<void **>(&buffer), size, RT_MEMORY_HBM);
if (ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc failed, size = %zu, ret = %d", size, ret);
GELOGE(RT_FAILED, "[RtMalloc][Memory] failed, size = %zu, ret = %d", size, ret);
REPORT_INNER_ERROR("E19999", "rtMalloc failed, size = %zu, ret = %d when %s.",
size, ret, __FUNCTION__);
return nullptr;
}

@@ -152,7 +156,8 @@ Status StreamResource::BuildDynamicOperator(const ModelData &model_data,
SingleOpModel model(model_name, model_data.model_data, model_data.model_len);
auto ret = model.Init();
if (ret != SUCCESS) {
GELOGE(ret, "Init model failed. model = %s, ret = %u", model_name.c_str(), ret);
GELOGE(ret, "[Init][SingleOpModel] failed. model = %s, ret = %u", model_name.c_str(), ret);
REPORT_CALL_ERROR("E19999", "SingleOpModel init failed, model = %s, ret = %u", model_name.c_str(), ret);
return ret;
}

@@ -161,7 +166,7 @@ Status StreamResource::BuildDynamicOperator(const ModelData &model_data,

GELOGI("To build operator: %s", model_name.c_str());
GE_CHK_STATUS_RET(model.BuildDynamicOp(*this, *new_op),
"Build op failed. op = %s, ret = %u", model_name.c_str(), ret);
"[Build][DynamicOp]failed. op = %s, ret = %u", model_name.c_str(), ret);
*single_op = new_op.get();
dynamic_op_map_[model_id] = std::move(new_op);
return SUCCESS;
@@ -179,18 +184,20 @@ Status StreamResource::BuildOperator(const ModelData &model_data, SingleOp **sin
SingleOpModel model(model_name, model_data.model_data, model_data.model_len);
auto ret = model.Init();
if (ret != SUCCESS) {
GELOGE(ret, "Init model failed. model = %s, ret = %u", model_name.c_str(), ret);
GELOGE(ret, "[Init][SingleOpModel] failed. model = %s, ret = %u", model_name.c_str(), ret);
REPORT_CALL_ERROR("E19999", "SingleOpModel init failed, model = %s, ret = %u", model_name.c_str(), ret);
return ret;
}

auto new_op = std::unique_ptr<SingleOp>(new(std::nothrow) SingleOp(this, &stream_mu_, stream_));
if (new_op == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "new SingleOp failed");
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[New][SingleOp] failed.");
REPORT_INNER_ERROR("E19999", "new SingleOp failed when %s.", __FUNCTION__);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}

GELOGI("To build operator: %s", model_name.c_str());
GE_CHK_STATUS_RET(model.BuildOp(*this, *new_op), "Build op failed. op = %s, ret = %u", model_name.c_str(), ret);
GE_CHK_STATUS_RET(model.BuildOp(*this, *new_op), "[Build][Op] failed. op = %s, ret = %u", model_name.c_str(), ret);

*single_op = new_op.get();
op_map_[model_id] = std::move(new_op);


+ 13
- 9
ge/single_op/task/aicpu_kernel_task_builder.cc View File

@@ -26,7 +26,8 @@ AiCpuCCTaskBuilder::AiCpuCCTaskBuilder(const OpDescPtr &op_desc, const domi::Ker
Status AiCpuCCTaskBuilder::SetKernelArgs(AiCpuCCTask &task, const SingleOpModelParam &param) {
size_t aicpu_arg_size = kernel_def_.args_size();
if (aicpu_arg_size <= sizeof(aicpu::AicpuParamHead)) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "aicpu_arg_size is invalid, value = %zu", aicpu_arg_size);
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]aicpu_arg_size is invalid, value = %zu", aicpu_arg_size);
REPORT_INNER_ERROR("E19999", "aicpu_arg_size is invalid, value = %zu", aicpu_arg_size);
return ACL_ERROR_GE_PARAM_INVALID;
}

@@ -36,13 +37,15 @@ Status AiCpuCCTaskBuilder::SetKernelArgs(AiCpuCCTask &task, const SingleOpModelP
std::unique_ptr<uint8_t[]> aicpu_args;
aicpu_args.reset(new(std::nothrow) uint8_t[aicpu_arg_size]());
if (aicpu_args == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "malloc failed, size = %zu", aicpu_arg_size);
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[New][Memory] failed, size = %zu", aicpu_arg_size);
REPORT_INNER_ERROR("E19999", "new Memory failed, size = %zu", aicpu_arg_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}

auto err = memcpy_s(aicpu_args.get(), aicpu_arg_size, kernel_def_.args().data(), aicpu_arg_size);
if (err != EOK) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "memcpy_s args failed, size = %zu, err = %d", aicpu_arg_size, err);
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Memcpy_s][Args] failed, size = %zu, err = %d", aicpu_arg_size, err);
REPORT_INNER_ERROR("E19999", "memcpy_s aicpu_args failed, size = %zu, err = %d", aicpu_arg_size, err);
return ACL_ERROR_GE_INTERNAL_ERROR;
}

@@ -76,9 +79,9 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id, cons
task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU;
bool loaded = false;
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name, loaded),
"launch cust aicpu so failed");
"[Load][CustAicpuSo] failed.");
if (!loaded) {
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed.");
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "[Launch][CustAicpuSo] failed.");
}
}

@@ -89,18 +92,19 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id, cons
auto &kernel_ext_info = kernel_def_.kernel_ext_info();
auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size();
GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED,
"task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.",
"[Check][Size]task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.",
kernel_ext_info.size(), kernel_ext_info_size);

ret = task.SetExtInfoAndType(kernel_ext_info, kernel_id);
if (ret != SUCCESS) {
GELOGE(ret, "Init ext info failed.");
GELOGE(ret, "[Set][ExtInfoAndType]failed, kernel_id=%lu.", kernel_id);
REPORT_CALL_ERROR("E19999", "SetExtInfoAndType failed, kernel_id=%lu.", kernel_id);
return ret;
}
GE_CHK_STATUS_RET(task.SetInputConst(), "AiCpuCCTask set input_const failed.");
GE_CHK_STATUS_RET(task.SetInputConst(), "[Set][InputConst] failed.");

if (task.GetUnknownType() == DEPEND_COMPUTE) {
GELOGE(FAILED, "AiCpuCCTask unknown type is depend compute, it's not supported now.");
GELOGE(FAILED, "[Get][UnknownType] is depend compute, it's not supported now.");
return FAILED;
}
auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(task.args_.get());


+ 14
- 9
ge/single_op/task/aicpu_task_builder.cc View File

@@ -30,7 +30,8 @@ namespace ge {
auto sec_ret = memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL),
kernel_def_.args().data(), kernel_def_.args().size());
if (sec_ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "memcpy failed, ret: %d", sec_ret);
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Memcpy_s][Param:fwk_op_kernel] failed, ret: %d", sec_ret);
REPORT_INNER_ERROR("E19999", "memcpy_s fwk_op_kernel failed, ret:%d.", sec_ret);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}

@@ -45,7 +46,8 @@ namespace ge {
void *fwk_op_args = nullptr;
auto rt_ret = rtMalloc(&fwk_op_args, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "malloc arg memory failed, ret = %d", rt_ret);
GELOGE(rt_ret, "[RtMalloc][Memory] failed, ret = %d", rt_ret);
REPORT_INNER_ERROR("E19999", "rtMalloc Memory failed, ret = %d", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

@@ -53,7 +55,8 @@ namespace ge {
sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
(void)rtFree(fwk_op_args);
GELOGE(rt_ret, "copy args failed, ret = %d", rt_ret);
GELOGE(rt_ret, "[rtMemcpy][Fwk_Op_Args] failed, ret = %d", rt_ret);
REPORT_INNER_ERROR("E19999", "rtMemcpy fwk_op_args failed, ret = %d", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
*args = fwk_op_args;
@@ -62,8 +65,10 @@ namespace ge {

Status AiCpuTaskBuilder::InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam &param, bool dynamic_flag) {
if (kernel_def_.args_size() > sizeof(STR_FWK_OP_KERNEL)) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d",
sizeof(STR_FWK_OP_KERNEL), kernel_def_.args_size());
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d",
sizeof(STR_FWK_OP_KERNEL), kernel_def_.args_size());
REPORT_INNER_ERROR("E19999", "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d",
sizeof(STR_FWK_OP_KERNEL), kernel_def_.args_size());
return ACL_ERROR_GE_PARAM_INVALID;
}
GE_CHK_RT_RET(rtMalloc(&task.workspace_addr_, kernel_def_.task_info_size(), RT_MEMORY_HBM));
@@ -97,16 +102,16 @@ namespace ge {
auto &kernel_ext_info = kernel_def_.kernel_ext_info();
auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size();
GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, ACL_ERROR_GE_PARAM_INVALID,
"task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.",
"[Check][Size]task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.",
kernel_ext_info.size(), kernel_ext_info_size);
GE_CHK_STATUS_RET(task.SetExtInfoAndType(kernel_ext_info, kernel_id), "Init ext info failed.");
GE_CHK_STATUS_RET(task.SetExtInfoAndType(kernel_ext_info, kernel_id), "[Set][ExtInfoAndType]failed.");

if (task.ext_info_addr_dev_ != nullptr) {
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast<uintptr_t>(task.ext_info_addr_dev_);
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = kernel_ext_info_size;
}
GE_CHK_STATUS_RET(task.SetInputConst(), "AiCpuTask set input_const failed.");
GE_CHK_STATUS_RET(task.InitForSummaryAndCopy(), "AiCpuTask init for summary and copy task failed.");
GE_CHK_STATUS_RET(task.SetInputConst(), "[Set][InputConst] failed.");
GE_CHK_STATUS_RET(task.InitForSummaryAndCopy(), "[Init][SummaryAndCopy] failed.");

fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID = ULLONG_MAX;
fwk_op_kernel.fwkKernelBase.fwk_kernel.kernelID = kernel_id;


+ 57
- 46
ge/single_op/task/op_task.cc View File

@@ -56,9 +56,11 @@ Status OpTask::OpenDump(rtStream_t stream) {
size_t arg_num = 0;
GetIoAddr(arg_base, arg_num);
if (arg_num < input_size + output_size) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "io_addrs_for_dump_ size %zu is not equal input and output size %zu",
arg_num,
input_size + output_size);
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
"[Check][Size]io_addrs_for_dump_ size %zu is not equal input and output size %zu",
arg_num, input_size + output_size);
REPORT_INNER_ERROR("E19999", "io_addrs_for_dump_ size %zu is not equal input and output size %zu",
arg_num, input_size + output_size);
return ACL_ERROR_GE_INTERNAL_ERROR;
}

@@ -74,7 +76,7 @@ Status OpTask::OpenDump(rtStream_t stream) {
op_desc_, input_addrs, output_adds, stream);
auto status = dump_op_.LaunchDumpOp();
if (status != SUCCESS) {
GELOGE(status, "Launch dump op failed in single op");
GELOGE(status, "[Launch][DumpOp] failed in single op.");
return status;
}
return SUCCESS;
@@ -116,7 +118,8 @@ Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id
uint32_t stream_id = 0;
auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Get task_id and stream_id failed ret: 0x%X.", rt_ret);
GELOGE(RT_FAILED, "[Get][TaskIdAndStreamID] failed, ret: 0x%X.", rt_ret);
REPORT_CALL_ERROR("E19999", "rtGetTaskIdAndStreamID failed, ret: 0x%X.", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GE_CHECK_NOTNULL(op_desc_);
@@ -145,10 +148,11 @@ Status OpTask::DoUpdateArgTable(const SingleOpModelParam &param, bool keep_works
size_t arg_num = 0;
GetIoAddr(arg_base, arg_num);
if (arg_num < all_addresses.size()) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] arg number mismatches, expect at least = %zu, but got = %zu",
op_desc_->GetName().c_str(),
all_addresses.size(),
arg_num);
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
"[Check][Size][%s] arg number mismatches, expect at least = %zu, but got = %zu.",
op_desc_->GetName().c_str(), all_addresses.size(), arg_num);
REPORT_INNER_ERROR("E19999", "%s arg number mismatches, expect at least = %zu, but got = %zu.",
op_desc_->GetName().c_str(), all_addresses.size(), arg_num);
return ACL_ERROR_GE_INTERNAL_ERROR;
}

@@ -207,7 +211,8 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) {
}

if (ret != RT_ERROR_NONE) {
GELOGE(ret, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->stub_name_.c_str());
GELOGE(ret, "[Invoke][RtKernelLaunch] failed. ret = %d, task = %s", ret, this->stub_name_.c_str());
REPORT_INNER_ERROR("E19999", "invoke rtKernelLaunch failed, ret = %d, task = %s", ret, this->stub_name_.c_str());
return RT_ERROR_TO_GE_STATUS(ret);
}
GELOGI("[TASK_INFO] %s", this->stub_name_.c_str());
@@ -223,7 +228,8 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve
run_info.block_dim = 0;
auto ret = optiling::OpParaCalculate(*node_, run_info);
if (ret != GRAPH_SUCCESS) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Failed to invoke OpParaCalculate. ret = %u", ret);
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Invoke][OpParaCalculate] failed, ret = %u.", ret);
REPORT_INNER_ERROR("E19999", "invoke OpParaCalculate failed, ret = %u.", ret);
return ACL_ERROR_GE_INTERNAL_ERROR;
}
block_dim_ = run_info.block_dim;
@@ -232,7 +238,7 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve
GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_,
tiling_data_.size(), tiling_key_);

GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces");
GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "[Allocate][Workspaces] failed.");
return SUCCESS;
}

@@ -248,7 +254,7 @@ Status TbeOpTask::UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc
} else {
std::vector<int64_t> storage_shape;
if (!AttrUtils::GetListInt(src_tensor, ge::ATTR_NAME_STORAGE_SHAPE, storage_shape)) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Failed to get storage_shape while storage_format was set");
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Get][ListInt]failed while storage_format was set.");
return ACL_ERROR_GE_INTERNAL_ERROR;
}

@@ -309,7 +315,8 @@ Status TbeOpTask::AllocateWorkspaces(const vector<int64_t> &workspace_sizes) {
GE_CHECK_NOTNULL(stream_resource_);
auto ws_base = stream_resource_->MallocMemory(kPurpose, static_cast<size_t>(total_size));
if (ws_base == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to allocate memory of size: %ld", total_size);
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Malloc][Memory] failed, size: %ld", total_size);
REPORT_INNER_ERROR("E19999", "MallocMemory failed, size: %ld", total_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
GELOGD("Done allocating workspace memory successfully.");
@@ -348,8 +355,8 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
}

if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[%s] Failed to update kernel args.",
node_->GetName().c_str());
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][KernelArgs] failed for [%s].", node_->GetName().c_str());
REPORT_INNER_ERROR("E19999", "update kernel args failed for %s.", node_->GetName().c_str());
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}

@@ -398,17 +405,19 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint
num_outputs_,
unknown_type_));
GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION,
"Malloc aicpu_ext_handle mem failed!");
"[Malloc][Memory] failed for aicpu_ext_handle!");

Status ret = aicpu_ext_handle_->Parse(kernel_ext_info);
if (ret != SUCCESS) {
GELOGE(ret, "Parse kernel ext info failed, kernel_ext_info_size=%zu.", kernel_ext_info.size());
GELOGE(ret, "[Parse][Param:kernel_ext_info] failed, kernel_ext_info_size=%zu.", kernel_ext_info.size());
REPORT_INNER_ERROR("E19999",
"Parse Param:kernel_ext_info failed, kernel_ext_info_size=%zu.", kernel_ext_info.size());
return ret;
}

GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(ULLONG_MAX, kernel_id, false),
"UpdateSessionInfo failed.");
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateExecuteMode(true), "UpdateExecuteMode failed.");
"[Update][SessionInfo] failed.");
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateExecuteMode(true), "[Update][ExecuteMode] failed.");

GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), RT_MEMORY_HBM));
GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(),
@@ -441,7 +450,7 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc,
rtStream_t stream) {
GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_);
GE_CHECK_NOTNULL(aicpu_ext_handle_);
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateExecuteMode(false), "UpdateExecuteMode failed.");
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateExecuteMode(false), "[Update][ExecuteMode] failed.");

if (num_inputs_ == 0 && num_outputs_ == 0) {
GELOGI("No input and output, no need update ext info.");
@@ -455,21 +464,20 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc,
auto const_input_desc = op_desc_->MutableInputDesc(static_cast<uint32_t>(input_index));
GE_CHECK_NOTNULL(const_input_desc);
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateInputShapeAndType(input_index, *const_input_desc),
"Input[%zu] update input shape failed.", input_index);
"[Update][InputShapeAndType] failed, input_index:%zu.", input_index);
continue;
}
GE_CHK_BOOL_RET_STATUS(non_const_index < input_desc.size(), ACL_ERROR_GE_PARAM_INVALID,
"Input_desc size is %zu, but get non_const_index is %zu",
input_desc.size(), non_const_index);
"[Check][Size]Input_desc size is %zu, but get non_const_index is %zu", input_desc.size(), non_const_index);
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateInputShapeAndType(input_index, input_desc[non_const_index]),
"Input[%zu] update input shape failed.", input_index);
"[Update][InputShapeAndType]failed, input_index:%zu.", input_index);
non_const_index++;
}

if (unknown_type_ != DEPEND_COMPUTE) {
for (size_t j = 0; j < num_outputs_; ++j) {
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]),
"Output[%zu] UpdateOutputShapeAndType failed.", j);
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]),
"[Update][OutputShapeAndType] failed, Output:%zu.", j);
}
}

@@ -498,11 +506,10 @@ Status AiCpuBaseTask::UpdateOutputShape(vector<GeTensorDesc> &output_desc) {
GeShape shape;
DataType data_type;
aicpu_ext_handle_->GetOutputShapeAndType(i, shape, data_type);
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), "AiCpuCCTask Update [%zu]th output shape failed.",
i);
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]),
"[Update][ShapeToOutputDesc] failed, output:%zu.", i);
if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) {
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "AiCpuCCTask Update [%zu]th output desc failed.",
i);
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "[Update][OutputDesc] failed, output:%zu.", i);
}
}
GELOGD("Update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape finished.");
@@ -527,7 +534,7 @@ Status AiCpuBaseTask::UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensor
auto trans_ret = formats::TransShape(format, shape_new.GetDims(),
output_desc.GetDataType(), origin_format, origin_dims_new);
GE_CHK_STATUS_RET(trans_ret,
"AiCpuTask originFormat[%d] is not same as format[%d], but TransShape failed, shape=%s.",
"[Trans][Shape] failed, AiCpuTask originFormat[%d] is not same as format[%d], shape=%s.",
origin_format, format, shape_new.ToString().c_str());

auto origin_shape_new = GeShape(origin_dims_new);
@@ -553,8 +560,7 @@ Status AiCpuBaseTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vecto
continue;
}
GE_CHK_BOOL_RET_STATUS(non_const_index < inputs.size(), ACL_ERROR_GE_PARAM_INVALID,
"Input size is %zu, but get non_const_index is %zu",
inputs.size(), non_const_index);
"[Check][Size] Input size is %zu, but get non_const_index is %zu", inputs.size(), non_const_index);
auto addr = inputs[non_const_index].data;
GE_CHECK_NOTNULL(addr);
GELOGD("AICpuTask input[%zu] addr = %p", input_index, addr);
@@ -602,14 +608,16 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) {
RT_MEMCPY_HOST_TO_DEVICE_EX,
stream);
if (ret != RT_ERROR_NONE) {
GELOGE(ret, "rtMemcpyAsync workspace data failed. ret = %d, task = %s", ret, this->op_type_.c_str());
GELOGE(ret, "[MemcpyAsync][Date] failed. ret = %d, task = %s", ret, this->op_type_.c_str());
REPORT_CALL_ERROR("E19999", "rtMemcpyAsync data failed, ret = %d, task = %s", ret, this->op_type_.c_str());
return RT_ERROR_TO_GE_STATUS(ret);
}

GELOGI("To invoke rtKernelLaunchEx. task = %s", this->op_type_.c_str());
ret = rtKernelLaunchEx(args_, arg_size_, 0, stream);
if (ret != RT_ERROR_NONE) {
GELOGE(ret, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->op_type_.c_str());
GELOGE(ret, "[Invoke][rtKernelLaunch] failed. ret = %d, task = %s", ret, this->op_type_.c_str());
REPORT_CALL_ERROR("E19999", "invoke rtKernelLaunchEx failed, ret = %d, task = %s", ret, this->op_type_.c_str());
return RT_ERROR_TO_GE_STATUS(ret);
}
GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str());
@@ -706,10 +714,9 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) {
}

GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]),
"AiCpuTask update [%zu]th output shape failed.", i);
"[Update][ShapeToOutputDesc] failed , output:%zu.", i);
if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) {
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "AiCpuTask update [%zu]th output desc failed.",
i);
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(i, output_desc[i]), "[Update][OutputDesc] failed, output:%zu.", i);
}
}
return SUCCESS;
@@ -731,13 +738,13 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output
}
out_shape_hbm_.clear();
GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(),
"Read ResultSummary and update output shape failed.");
"[Read][ResultSummaryAndPrepareMemory] failed.");

GE_CHK_STATUS_RET(CopyDataToHbm(outputs, stream),
"Copy data to output failed.");
"[Copy][DataToHbm] failed.");

GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc),
"Update shape by hbm buffer failed.");
"[Update][ShapeByHbmBuffer] failed.");

for (auto out_shape : out_shape_hbm_) {
FreeHbm(out_shape);
@@ -787,8 +794,10 @@ Status AiCpuTask::InitForSummaryAndCopy() {

Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) {
if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d",
sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size());
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d",
sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size());
REPORT_INNER_ERROR("E19999", "[sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d",
sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size());
return ACL_ERROR_GE_PARAM_INVALID;
}
GE_CHK_RT_RET(rtMalloc(&copy_workspace_buf_, kernel_def.task_info_size(), RT_MEMORY_HBM));
@@ -799,7 +808,8 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) {
auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL),
kernel_def.args().data(), kernel_def.args().size());
if (sec_ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "memcpy failed, ret: %d", sec_ret);
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][TaskArgs] failed, ret: %d", sec_ret);
REPORT_INNER_ERROR("E19999", "update STR_FWK_OP_KERNEL args failed because memcpy_s return %d.", sec_ret);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}

@@ -883,7 +893,8 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) {
block_dim_, args_.get(), static_cast<uint32_t>(arg_size_),
sm_desc, stream, dump_flag_);
if (ret != RT_ERROR_NONE) {
GELOGE(ret, "Invoke rtCpuKernelLaunch failed. ret = %d", ret);
GELOGE(ret, "[Invoke][rtCpuKernelLaunchWithFlag] failed. ret = %d.", ret);
REPORT_CALL_ERROR("E19999", "invoke rtCpuKernelLaunchWithFlag failed, ret:%d.", ret);
return RT_ERROR_TO_GE_STATUS(ret);
}
GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str());


+ 52
- 24
ge/single_op/task/tbe_task_builder.cc View File

@@ -112,8 +112,10 @@ Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bi
ret = rtDevBinaryRegister(&binary, bin_handle);
}
if (ret != RT_ERROR_NONE) {
GELOGE(ret, "DoRegisterBinary failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(),
param.core_type, static_cast<int>(ret));
GELOGE(ret, "[DoRegister][Binary] failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(),
param.core_type, static_cast<int>(ret));
REPORT_CALL_ERROR("E19999", "DoRegisterBinary failed, bin key = %s, core_type = %ld, rt ret = %d",
stub_name_.c_str(), param.core_type, static_cast<int>(ret));
return ret;
}

@@ -127,8 +129,10 @@ Status TbeTaskBuilder::DoRegisterMeta(void *bin_handle) {
if (!meta_data.empty()) {
auto rt_ret = rtMetadataRegister(bin_handle, meta_data.c_str());
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMetadataRegister failed. bin key = %s, meta_data = %s, rt ret = %d", stub_name_.c_str(),
meta_data.c_str(), static_cast<int>(rt_ret));
GELOGE(rt_ret, "[Invoke][rtMetadataRegister] failed. bin key = %s, meta_data = %s, rt ret = %d",
stub_name_.c_str(), meta_data.c_str(), static_cast<int>(rt_ret));
REPORT_CALL_ERROR("E19999", "rtMetadataRegister failed, bin key = %s, meta_data = %s, rt ret = %d",
stub_name_.c_str(), meta_data.c_str(), static_cast<int>(rt_ret));
return rt_ret;
}
}
@@ -139,8 +143,10 @@ Status TbeTaskBuilder::DoRegisterMeta(void *bin_handle) {
Status TbeTaskBuilder::DoRegisterFunction(void *bin_handle, const char *stub_name, const char *kernel_name) {
auto rt_ret = rtFunctionRegister(bin_handle, stub_name, stub_name, kernel_name, FUNC_MODE_NORMAL);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtFunctionRegister failed. bin key = %s, kernel name = %s, rt ret = %d", stub_name, kernel_name,
static_cast<int>(rt_ret));
GELOGE(rt_ret, "[Invoke][rtFunctionRegister] failed. bin key = %s, kernel name = %s, rt ret = %d",
stub_name, kernel_name, static_cast<int>(rt_ret));
REPORT_CALL_ERROR("E19999", "rtFunctionRegister failed. bin key = %s, kernel name = %s, rt ret = %d",
stub_name, kernel_name, static_cast<int>(rt_ret));
return rt_ret;
}

@@ -197,27 +203,32 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam

auto tbe_kernel = GetTbeKernel(op_desc_);
if (tbe_kernel == nullptr) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s",
op_desc_->GetName().c_str());
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Get][TbeKernel] fail for OP EXT ATTR NAME TBE_KERNEL not found. op = %s",
op_desc_->GetName().c_str());
REPORT_CALL_ERROR("E19999", "GetTbeKernel fail for OP EXT ATTR NAME TBE_KERNEL not found. op = %s",
op_desc_->GetName().c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}

auto holder = std::unique_ptr<KernelHolder>(new (std::nothrow) KernelHolder(stub_func, tbe_kernel));
if (holder == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create KernelHodler failed.");
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][KernelHodler] failed.");
REPORT_INNER_ERROR("E19999", "Create KernelHodler failed.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}

void *bin_handle = nullptr;
auto ret = DoRegisterKernel(*tbe_kernel, stub_func, &bin_handle, param);
if (ret != SUCCESS) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. stub name = %s", stub_name_.c_str());
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Register][Kernel] failed. stub name = %s", stub_name_.c_str());
REPORT_CALL_ERROR("E19999", "DoRegisterKernel failed, stub name = %s", stub_name_.c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
holder->SetBinHandle(bin_handle);
if (!registry.AddKernel(stub_name_, std::move(holder))) {
// should not happen. only one thread can reach here
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str());
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Add][Kernel] failed. stub name = %s", stub_name_.c_str());
REPORT_CALL_ERROR("E19999", "AddKernel failed. stub name = %s", stub_name_.c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
}
@@ -231,24 +242,29 @@ Status TbeTaskBuilder::RegisterKernelWithHandle(TbeOpTask &task, const SingleOpM
HandleRegistry &registry = HandleRegistry::GetInstance();
auto tbe_kernel = GetTbeKernel(op_desc_);
if (tbe_kernel == nullptr) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s",
op_desc_->GetName().c_str());
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Get][TbeKernel] fail for OP EXT ATTR NAME TBE_KERNEL not found. op = %s",
op_desc_->GetName().c_str());
REPORT_CALL_ERROR("E19999", "GetTbeKernel fail for OP EXT ATTR NAME TBE_KERNEL not found. op = %s",
op_desc_->GetName().c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
void *bin_handle = nullptr;
auto ret = DoRegisterKernel(*tbe_kernel, nullptr, &bin_handle, param);
if (ret != SUCCESS) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. node name = %s", op_desc_->GetName().c_str());
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Register][Kernel] failed. node name = %s", op_desc_->GetName().c_str());
REPORT_CALL_ERROR("E19999", "DoRegisterKernel failed, node name = %s", op_desc_->GetName().c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
handle_ = bin_handle;
auto holder = std::unique_ptr<HandleHolder>(new (std::nothrow) HandleHolder(handle_));
if (holder == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed.");
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][HandleHolder] failed.");
REPORT_INNER_ERROR("E19999", "Create HandleHolder failed.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
if (!registry.AddHandle(std::move(holder))) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc_->GetName().c_str());
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Add][Handle] failed. node name = %s", op_desc_->GetName().c_str());
REPORT_CALL_ERROR("E19999", "AddHandle failed, node name = %s", op_desc_->GetName().c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}

@@ -274,14 +290,16 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam &param

auto rt_ret = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemAllocManaged failed, ret: %d", static_cast<int>(rt_ret));
GELOGE(rt_ret, "[Invoke][rtMemAllocManaged] failed, ret: %d.", static_cast<int>(rt_ret));
REPORT_CALL_ERROR("E19999", "rtMemAllocManaged failed, ret: %d.", static_cast<int>(rt_ret));
return rt_ret;
}

rt_ret = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
(void)rtMemFreeManaged(*sm_desc);
GELOGE(rt_ret, "rtMemcpy, ret: %d", static_cast<int>(rt_ret));
GELOGE(rt_ret, "[Update][Param:sm_desc] fail for rtMemcpy return: %d.", static_cast<int>(rt_ret));
REPORT_INNER_ERROR("E19999", "rtMemcpy failed, ret:%d.", static_cast<int>(rt_ret));
return rt_ret;
}
}
@@ -296,7 +314,9 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &

auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret));
GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy failed, size = %zu, ret = %d",
arg_size, static_cast<int>(rt_ret));
REPORT_INNER_ERROR("E19999", "rtMemcpy failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret));
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

@@ -315,7 +335,8 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size();
rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret));
GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret));
REPORT_INNER_ERROR("E19999", "rtMemcpy failed, ret = %d", static_cast<int>(rt_ret));
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}
@@ -332,7 +353,9 @@ Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpMo

auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret));
GELOGE(rt_ret, "[Update][Kernel_def:args]rtMemcpy failed, size = %zu, ret = %d",
arg_size, static_cast<int>(rt_ret));
REPORT_INNER_ERROR("E19999", "rtMemcpy failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret));
return rt_ret;
}

@@ -351,7 +374,8 @@ Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpMo
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size();
rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret));
GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret));
REPORT_INNER_ERROR("E19999", "rtMemcpy failed, ret = %d", static_cast<int>(rt_ret));
return rt_ret;
}
}
@@ -384,7 +408,8 @@ Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam &para
void *stub_func = nullptr;
auto rt_ret = rtGetFunctionByName(stub_name_.c_str(), &stub_func);
if (rt_ret != SUCCESS) {
GELOGE(rt_ret, "rtGetFunctionByName failed.");
GELOGE(rt_ret, "[Get][FunctionByName] failed. stub_name:%s.", stub_name_.c_str());
REPORT_CALL_ERROR("E19999", "rtGetFunctionByName failed, stub_name:%s.", stub_name_.c_str());
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
task.SetStubFunc(stub_name_, stub_func);
@@ -399,7 +424,10 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) {
(void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size);
GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size);
if (max_size < 0) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc_->GetName().c_str(), max_size);
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Get][Int] %s Invalid op_param_size: %ld.",
op_desc_->GetName().c_str(), max_size);
REPORT_CALL_ERROR("E19999", "AttrUtils::GetInt failed, %s Invalid op_param_size: %ld.",
op_desc_->GetName().c_str(), max_size);
return ACL_ERROR_GE_PARAM_INVALID;
}
void *tiling_buffer = nullptr;


Loading…
Cancel
Save