From: @nicholas_yhr Reviewed-by: @lilongfei15,@ljl0711 Signed-off-by: @lilongfei15tags/v1.2.0
| @@ -221,7 +221,10 @@ ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_ | |||||
| try { | try { | ||||
| json_file_ << jsn.dump(kJsonDumpLevel) << std::endl; | json_file_ << jsn.dump(kJsonDumpLevel) << std::endl; | ||||
| } catch (nlohmann::detail::type_error &e) { | } catch (nlohmann::detail::type_error &e) { | ||||
| GELOGE(FAILED, "[Json.dump][GraphInfo]json.dump to analyze file [%s] failed because [%s], session_id:%lu, graph_id:%lu", json_file_name_.c_str(), e.what(), session_id, graph_id); | |||||
| GELOGE(FAILED, | |||||
| "[Json.dump][GraphInfo]json.dump to analyze file [%s] failed because [%s]," | |||||
| "session_id:%lu, graph_id:%lu", | |||||
| json_file_name_.c_str(), e.what(), session_id, graph_id); | |||||
| ret_failed = true; | ret_failed = true; | ||||
| } | } | ||||
| json_file_.close(); | json_file_.close(); | ||||
| @@ -241,7 +244,9 @@ ge::Status Analyzer::DoAnalyze(DataInfo &data_info) { | |||||
| GE_CHECK_NOTNULL(graph_info); | GE_CHECK_NOTNULL(graph_info); | ||||
| auto status = SaveOpInfo(desc, data_info, graph_info); | auto status = SaveOpInfo(desc, data_info, graph_info); | ||||
| if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
| GELOGE(status, "[Check][SaveOpInfo]save op info: desc_name [%s] desc_type [%s] failed!", desc->GetName().c_str(), desc->GetType().c_str()); | |||||
| GELOGE(status, | |||||
| "[Check][SaveOpInfo]save op info: desc_name [%s] desc_type [%s] failed!", | |||||
| desc->GetName().c_str(), desc->GetType().c_str()); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| // create json file | // create json file | ||||
| @@ -154,7 +154,8 @@ Status DataTypeTransfer::TransDataType(const CastArgs &args, TransResult &result | |||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to alloc the memory for dst buf %zu, data size %zu", total_size, args.src_data_size); | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to alloc the memory for dst buf %zu, data size %zu", total_size, args.src_data_size); | |||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| } | } | ||||
| @@ -73,7 +73,8 @@ Status CheckArgsForC1hwncoc0ToHwcn(const TransArgs &args) { | |||||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) { | ||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| @@ -94,7 +94,8 @@ Status TransFormatDhwckToFz3D(const TransArgs &args, TransResult &result) { | |||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| @@ -122,7 +123,8 @@ Status TransFormatDhwckToFz3D(const TransArgs &args, TransResult &result) { | |||||
| args.data + src_idx * data_size, static_cast<size_t>(data_size)); | args.data + src_idx * data_size, static_cast<size_t>(data_size)); | ||||
| } | } | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
| "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
| dst_offset, ret, pad_zero); | dst_offset, ret, pad_zero); | ||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
| } | } | ||||
| @@ -95,7 +95,8 @@ Status TransFormatDhwncToFz3DTranspose(const TransArgs &args, TransResult &resul | |||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| @@ -123,7 +124,8 @@ Status TransFormatDhwncToFz3DTranspose(const TransArgs &args, TransResult &resul | |||||
| args.data + src_idx * data_size, static_cast<size_t>(data_size)); | args.data + src_idx * data_size, static_cast<size_t>(data_size)); | ||||
| } | } | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
| "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
| dst_offset, ret, pad_zero); | dst_offset, ret, pad_zero); | ||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
| } | } | ||||
| @@ -139,7 +139,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con | |||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| @@ -175,7 +176,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con | |||||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
| static_cast<size_t>(size * w0)); | static_cast<size_t>(size * w0)); | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
| } | } | ||||
| } | } | ||||
| @@ -189,7 +191,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con | |||||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
| static_cast<size_t>(size)); | static_cast<size_t>(size)); | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
| } | } | ||||
| } | } | ||||
| @@ -210,7 +213,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| @@ -246,7 +250,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||||
| ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
| static_cast<size_t>(size * w0)); | static_cast<size_t>(size * w0)); | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
| } | } | ||||
| } | } | ||||
| @@ -260,7 +265,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||||
| ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
| static_cast<size_t>(size)); | static_cast<size_t>(size)); | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
| } | } | ||||
| } | } | ||||
| @@ -274,14 +280,16 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con | |||||
| Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult &result) { | Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult &result) { | ||||
| if (!IsDataTypeSupport(args.src_data_type)) { | if (!IsDataTypeSupport(args.src_data_type)) { | ||||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||||
| "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
| return ACL_ERROR_GE_DATATYPE_INVALID; | return ACL_ERROR_GE_DATATYPE_INVALID; | ||||
| } | } | ||||
| if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) { | if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) { | ||||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||||
| "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
| @@ -325,7 +333,8 @@ Status FormatTransferFractalNz::TransShape(Format src_format, const ShapeVector | |||||
| Status FormatTransferFractalNzND::TransFormat(const TransArgs &args, TransResult &result) { | Status FormatTransferFractalNzND::TransFormat(const TransArgs &args, TransResult &result) { | ||||
| if (!IsDataTypeSupport(args.src_data_type)) { | if (!IsDataTypeSupport(args.src_data_type)) { | ||||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||||
| "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
| @@ -333,7 +342,8 @@ Status FormatTransferFractalNzND::TransFormat(const TransArgs &args, TransResult | |||||
| } | } | ||||
| if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) { | if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) { | ||||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||||
| "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
| @@ -127,7 +127,8 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { | |||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
| dst == nullptr, | dst == nullptr, | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION;); | return ACL_ERROR_GE_MEMORY_ALLOCATION;); | ||||
| @@ -173,8 +174,9 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { | |||||
| } | } | ||||
| } | } | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d pad mode %d", offset, | |||||
| ret, need_pad_zero); | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
| "Failed to operate the dst memory at offset %ld, error-code %d pad mode %d", | |||||
| offset, ret, need_pad_zero); | |||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
| } | } | ||||
| } | } | ||||
| @@ -213,7 +215,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { | |||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
| dst == nullptr, | dst == nullptr, | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION;); | return ACL_ERROR_GE_MEMORY_ALLOCATION;); | ||||
| @@ -235,7 +238,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { | |||||
| static_cast<size_t>(data_size)); | static_cast<size_t>(data_size)); | ||||
| } else { | } else { | ||||
| if (protected_size < data_size) { | if (protected_size < data_size) { | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||||
| "Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||||
| protected_size, data_size); | protected_size, data_size); | ||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| @@ -247,7 +251,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { | |||||
| } | } | ||||
| } | } | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
| "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
| dst_offset, ret, pad_zero); | dst_offset, ret, pad_zero); | ||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
| } | } | ||||
| @@ -288,7 +293,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { | |||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
| dst == nullptr, | dst == nullptr, | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION;); | return ACL_ERROR_GE_MEMORY_ALLOCATION;); | ||||
| @@ -310,7 +316,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { | |||||
| static_cast<size_t>(data_size)); | static_cast<size_t>(data_size)); | ||||
| } else { | } else { | ||||
| if (protected_size < data_size) { | if (protected_size < data_size) { | ||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||||
| "Failed to operate the dst memory, protected_size is %ld and size is %ld", | |||||
| protected_size, data_size); | protected_size, data_size); | ||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| @@ -322,7 +329,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { | |||||
| } | } | ||||
| } | } | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
| "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d", | |||||
| dst_offset, ret, pad_zero); | dst_offset, ret, pad_zero); | ||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
| } | } | ||||
| @@ -140,7 +140,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con | |||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| @@ -179,7 +180,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con | |||||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
| static_cast<size_t>(size * w0)); | static_cast<size_t>(size * w0)); | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
| } | } | ||||
| } | } | ||||
| @@ -195,7 +197,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con | |||||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
| static_cast<size_t>(size)); | static_cast<size_t>(size)); | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
| } | } | ||||
| } | } | ||||
| @@ -217,7 +220,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| @@ -257,7 +261,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
| static_cast<size_t>(size * w0)); | static_cast<size_t>(size * w0)); | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
| } | } | ||||
| } | } | ||||
| @@ -273,7 +278,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||||
| auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset, | ||||
| static_cast<size_t>(size)); | static_cast<size_t>(size)); | ||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, | |||||
| "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret); | |||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||
| } | } | ||||
| } | } | ||||
| @@ -288,14 +294,16 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con | |||||
| Status FormatTransferFractalZz::TransFormat(const TransArgs &args, TransResult &result) { | Status FormatTransferFractalZz::TransFormat(const TransArgs &args, TransResult &result) { | ||||
| if (!IsDataTypeSupport(args.src_data_type)) { | if (!IsDataTypeSupport(args.src_data_type)) { | ||||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||||
| "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
| return ACL_ERROR_GE_DATATYPE_INVALID; | return ACL_ERROR_GE_DATATYPE_INVALID; | ||||
| } | } | ||||
| if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) { | if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) { | ||||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||||
| "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
| @@ -339,7 +347,8 @@ Status FormatTransferFractalZz::TransShape(Format src_format, const ShapeVector | |||||
| Status FormatTransferFractalZzND::TransFormat(const TransArgs &args, TransResult &result) { | Status FormatTransferFractalZzND::TransFormat(const TransArgs &args, TransResult &result) { | ||||
| if (!IsDataTypeSupport(args.src_data_type)) { | if (!IsDataTypeSupport(args.src_data_type)) { | ||||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
| GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, | |||||
| "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
| @@ -347,7 +356,8 @@ Status FormatTransferFractalZzND::TransFormat(const TransArgs &args, TransResult | |||||
| } | } | ||||
| if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) { | if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) { | ||||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||||
| "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(), | ||||
| ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str()); | ||||
| @@ -66,7 +66,7 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) { | |||||
| FmtToStr(ShapeToString(dst_shape)); | FmtToStr(ShapeToString(dst_shape)); | ||||
| GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_SHAPE_INVALID, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_SHAPE_INVALID, error.c_str()); | ||||
| return ACL_ERROR_GE_SHAPE_INVALID; | return ACL_ERROR_GE_SHAPE_INVALID; | ||||
| } | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -74,7 +74,8 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) { | |||||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | ||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| @@ -59,9 +59,10 @@ Status CheckArgsForFracZToNchw(const TransArgs &args) { | |||||
| } | } | ||||
| int64_t c1 = Ceil(dst_shape.at(kNchwC), c0); | int64_t c1 = Ceil(dst_shape.at(kNchwC), c0); | ||||
| int64_t n0 = Ceil(dst_shape.at(kNchwN), static_cast<int64_t>(kNiSize)); | int64_t n0 = Ceil(dst_shape.at(kNchwN), static_cast<int64_t>(kNiSize)); | ||||
| if (src_shape.at(kFracZHWC1) != dst_shape.at(kNchwH) * dst_shape.at(kNchwW) * c1 || src_shape.at(kFracZC0) != c0 || | |||||
| src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||||
| if (src_shape.at(kFracZHWC1) != dst_shape.at(kNchwH) * dst_shape.at(kNchwW) * c1 || | |||||
| src_shape.at(kFracZC0) != c0 || src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||||
| "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||||
| ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | ||||
| return ACL_ERROR_GE_SHAPE_INVALID; | return ACL_ERROR_GE_SHAPE_INVALID; | ||||
| } | } | ||||
| @@ -72,7 +73,8 @@ Status CheckArgsForFracZToNchw(const TransArgs &args) { | |||||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | ||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| @@ -59,9 +59,10 @@ Status CheckArgsForFracZToNhwc(const TransArgs &args) { | |||||
| } | } | ||||
| int64_t c1 = Ceil(dst_shape.at(kNhwcC), c0); | int64_t c1 = Ceil(dst_shape.at(kNhwcC), c0); | ||||
| int64_t n0 = Ceil(dst_shape.at(kNhwcN), static_cast<int64_t>(kNiSize)); | int64_t n0 = Ceil(dst_shape.at(kNhwcN), static_cast<int64_t>(kNiSize)); | ||||
| if (src_shape.at(kFracZHWC1) != dst_shape.at(kNhwcH) * dst_shape.at(kNhwcW) * c1 || src_shape.at(kFracZC0) != c0 || | |||||
| src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||||
| GELOGE(PARAM_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||||
| if (src_shape.at(kFracZHWC1) != dst_shape.at(kNhwcH) * dst_shape.at(kNhwcW) * c1 || | |||||
| src_shape.at(kFracZC0) != c0 || src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) { | |||||
| GELOGE(PARAM_INVALID, | |||||
| "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||||
| ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| @@ -72,7 +73,8 @@ Status CheckArgsForFracZToNhwc(const TransArgs &args) { | |||||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) { | ||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| GELOGE(OUT_OF_MEMORY, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
| return OUT_OF_MEMORY; | return OUT_OF_MEMORY; | ||||
| @@ -140,7 +142,7 @@ Status FormatTransferFracZNhwc::TransFormat(const TransArgs &args, TransResult & | |||||
| } | } | ||||
| GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, | GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, | ||||
| ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||||
| ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| GELOGD("Begin to trans format from FracZ to NHWC, src shape %s, data type %s, dst shape %s, memory size %ld", | GELOGD("Begin to trans format from FracZ to NHWC, src shape %s, data type %s, dst shape %s, memory size %ld", | ||||
| @@ -91,7 +91,8 @@ Status CheckArgsForHwcnToC1hwncoc0(const TransArgs &args) { | |||||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | ||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| @@ -72,7 +72,8 @@ Status CheckArgsForNc1hwc0ToNchw(const TransArgs &args) { | |||||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | ||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| GELOGE(OUT_OF_MEMORY, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
| return OUT_OF_MEMORY; | return OUT_OF_MEMORY; | ||||
| @@ -61,7 +61,8 @@ Status CheckArgsForNc1hwc0ToNhwc(const TransArgs &args) { | |||||
| if (src_shape.at(kNc1hwc0H) != dst_shape.at(kNhwcH) || src_shape.at(kNc1hwc0W) != dst_shape.at(kNhwcW) || | if (src_shape.at(kNc1hwc0H) != dst_shape.at(kNhwcH) || src_shape.at(kNc1hwc0W) != dst_shape.at(kNhwcW) || | ||||
| src_shape.at(kNc1hwc0N) != dst_shape.at(kNhwcN) || src_shape.at(kNc1hwc0C0) != c0 || | src_shape.at(kNc1hwc0N) != dst_shape.at(kNhwcN) || src_shape.at(kNc1hwc0C0) != c0 || | ||||
| src_shape.at(kNc1hwc0C1) != (Ceil(dst_shape.at(kNhwcC), c0))) { | src_shape.at(kNc1hwc0C1) != (Ceil(dst_shape.at(kNhwcC), c0))) { | ||||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||||
| GELOGE(ACL_ERROR_GE_SHAPE_INVALID, | |||||
| "Failed to check relationship between src and dst shape, src shape %s, dst shape %s", | |||||
| ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str()); | ||||
| return ACL_ERROR_GE_SHAPE_INVALID; | return ACL_ERROR_GE_SHAPE_INVALID; | ||||
| } | } | ||||
| @@ -72,7 +73,8 @@ Status CheckArgsForNc1hwc0ToNhwc(const TransArgs &args) { | |||||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | ||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| @@ -125,7 +125,8 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) { | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR); | return ACL_ERROR_GE_INTERNAL_ERROR); | ||||
| auto t1 = h_o * w_o; | auto t1 = h_o * w_o; | ||||
| auto t2 = n_o * c_o; | auto t2 = n_o * c_o; | ||||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), | |||||
| GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR); | return ACL_ERROR_GE_INTERNAL_ERROR); | ||||
| int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | ||||
| @@ -140,7 +141,8 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) { | |||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| @@ -212,7 +214,8 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR); | return ACL_ERROR_GE_INTERNAL_ERROR); | ||||
| auto t1 = h_o * w_o; | auto t1 = h_o * w_o; | ||||
| auto t2 = n_o * c_o; | auto t2 = n_o * c_o; | ||||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||||
| GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,"int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR); | return ACL_ERROR_GE_INTERNAL_ERROR); | ||||
| int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | ||||
| @@ -228,7 +231,8 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin | |||||
| dst.reset(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | dst.reset(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| @@ -275,7 +279,8 @@ Status FormatTransferNchwToFZC04::TransFormat(const TransArgs &args, TransResult | |||||
| } | } | ||||
| std::vector<int64_t> expect_shape; | std::vector<int64_t> expect_shape; | ||||
| ret = TransShape(args_tmp.src_format, args_tmp.src_shape, args_tmp.src_data_type, args_tmp.dst_format, expect_shape); | |||||
| ret = TransShape(args_tmp.src_format, args_tmp.src_shape, args_tmp.src_data_type, | |||||
| args_tmp.dst_format, expect_shape); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -92,7 +92,8 @@ Status CheckArgsForNhwcToNc1hwc0(const TransArgs &args) { | |||||
| Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) { | ||||
| std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>()); | ||||
| if (dst == nullptr) { | if (dst == nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, | |||||
| "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s", | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str()); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| @@ -592,8 +592,8 @@ Status GeGenerator::SetModelNameForDump(const GeRootModelPtr &ge_root_model) { | |||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); | ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); | ||||
| GELOGE(FAILED, "[Check][GetModelNameStep]Get model_name failed. Param --output is invalid, root graph name: %s", | GELOGE(FAILED, "[Check][GetModelNameStep]Get model_name failed. Param --output is invalid, root graph name: %s", | ||||
| ge_root_model->GetRootGraph()->GetName().c_str()); | ge_root_model->GetRootGraph()->GetName().c_str()); | ||||
| REPORT_CALL_ERROR("E19999", "Get model_name failed. Param --output is invalid,", | |||||
| "root graph name: %s", ge_root_model->GetRootGraph()->GetName().c_str()); | |||||
| REPORT_CALL_ERROR("E19999", "Get model_name failed. Param --output is invalid, root graph name: %s", | |||||
| ge_root_model->GetRootGraph()->GetName().c_str()); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | ||||
| @@ -597,11 +597,13 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||||
| int64_t size = 0; | int64_t size = 0; | ||||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); | GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); | ||||
| GE_IF_BOOL_EXEC(size < 0, | GE_IF_BOOL_EXEC(size < 0, | ||||
| GELOGE(FAILED, "[Check][TensorSize]tensor_size:%ld is invalid, maybe it is unknown shape node, Node_name:%s", | |||||
| size, node_op_desc->GetName().c_str()); | |||||
| REPORT_INNER_ERROR("E19999", "tensor_size:%ld is invalid, maybe it is unknown shape node, Node_name:%s", | |||||
| size, node_op_desc->GetName().c_str()); | |||||
| return;); | |||||
| GELOGE(FAILED, "[Check][TensorSize]tensor_size:%ld is invalid, " | |||||
| "maybe it is unknown shape node, Node_name:%s", | |||||
| size, node_op_desc->GetName().c_str()); | |||||
| REPORT_INNER_ERROR("E19999", "tensor_size:%ld is invalid, " | |||||
| "maybe it is unknown shape node, Node_name:%s", | |||||
| size, node_op_desc->GetName().c_str()); | |||||
| return;); | |||||
| batch_all_memory_size[batch_label].emplace_back(size); | batch_all_memory_size[batch_label].emplace_back(size); | ||||
| if (batch_total_size.find(batch_label) == batch_total_size.end()) { | if (batch_total_size.find(batch_label) == batch_total_size.end()) { | ||||
| batch_total_size[batch_label] = size; | batch_total_size[batch_label] = size; | ||||
| @@ -692,23 +694,23 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||||
| auto out_anchor = n->GetOutDataAnchor(out_index); | auto out_anchor = n->GetOutDataAnchor(out_index); | ||||
| GE_IF_BOOL_EXEC(out_anchor == nullptr, | GE_IF_BOOL_EXEC(out_anchor == nullptr, | ||||
| GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] anchor is null.", | GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] anchor is null.", | ||||
| n->GetName().c_str(), out_index); | |||||
| n->GetName().c_str(), out_index); | |||||
| REPORT_INNER_ERROR("E19999", "output anchor is null, node_name: %s output_index: %u.", | REPORT_INNER_ERROR("E19999", "output anchor is null, node_name: %s output_index: %u.", | ||||
| n->GetName().c_str(), out_index); | |||||
| n->GetName().c_str(), out_index); | |||||
| return false;); | return false;); | ||||
| for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { | for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { | ||||
| GE_IF_BOOL_EXEC(peer_in_anchor == nullptr, | GE_IF_BOOL_EXEC(peer_in_anchor == nullptr, | ||||
| GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] peer_in_anchor 0 is null.", | GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] peer_in_anchor 0 is null.", | ||||
| n->GetName().c_str(), out_index); | |||||
| n->GetName().c_str(), out_index); | |||||
| REPORT_INNER_ERROR("E19999", "output anchor peer is null, node_name: %s output_index: %u.", | REPORT_INNER_ERROR("E19999", "output anchor peer is null, node_name: %s output_index: %u.", | ||||
| n->GetName().c_str(), out_index); | |||||
| n->GetName().c_str(), out_index); | |||||
| return false;); | return false;); | ||||
| auto peer_node = peer_in_anchor->GetOwnerNode(); | auto peer_node = peer_in_anchor->GetOwnerNode(); | ||||
| GE_IF_BOOL_EXEC(peer_node == nullptr, | GE_IF_BOOL_EXEC(peer_node == nullptr, | ||||
| GELOGE(FAILED, "[Check][Node]Node[%s] output[%u] peer node is null.", | GELOGE(FAILED, "[Check][Node]Node[%s] output[%u] peer node is null.", | ||||
| n->GetName().c_str(), out_index); | |||||
| n->GetName().c_str(), out_index); | |||||
| REPORT_INNER_ERROR("E19999", "output anchor peer node is null, node_name: %s output_index: %u.", | REPORT_INNER_ERROR("E19999", "output anchor peer node is null, node_name: %s output_index: %u.", | ||||
| n->GetName().c_str(), out_index); | |||||
| n->GetName().c_str(), out_index); | |||||
| return false;); | return false;); | ||||
| // Get the continuous input type of the node, default is false | // Get the continuous input type of the node, default is false | ||||
| @@ -716,9 +718,9 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||||
| auto peer_in_node_desc = peer_node->GetOpDesc(); | auto peer_in_node_desc = peer_node->GetOpDesc(); | ||||
| GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, | GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, | ||||
| GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] nodedesc is null.", | GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] nodedesc is null.", | ||||
| n->GetName().c_str(), out_index); | |||||
| n->GetName().c_str(), out_index); | |||||
| REPORT_INNER_ERROR("E19999", "output anchor peer op_desc is null, node_name:%s output_index:%u.", | REPORT_INNER_ERROR("E19999", "output anchor peer op_desc is null, node_name:%s output_index:%u.", | ||||
| n->GetName().c_str(), out_index); | |||||
| n->GetName().c_str(), out_index); | |||||
| return false;); | return false;); | ||||
| // If GetBool fail, is_input_continuous is false. | // If GetBool fail, is_input_continuous is false. | ||||
| @@ -819,7 +821,7 @@ bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr & | |||||
| (in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || | (in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || | ||||
| (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { | (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { | ||||
| GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] peer input node desc is null.", | GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] peer input node desc is null.", | ||||
| n->GetName().c_str(), out_index); | |||||
| n->GetName().c_str(), out_index); | |||||
| REPORT_INNER_ERROR("E19999", "get output anchor peer op_desc fail, node_name: %s output_index: %u.", | REPORT_INNER_ERROR("E19999", "get output anchor peer op_desc fail, node_name: %s output_index: %u.", | ||||
| n->GetName().c_str(), out_index); | n->GetName().c_str(), out_index); | ||||
| return false; | return false; | ||||
| @@ -1105,9 +1107,10 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
| OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, | OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, | ||||
| const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | ||||
| const bool continuous, int64_t memory_type) { | const bool continuous, int64_t memory_type) { | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed"); | |||||
| return nullptr, "[Check][Param]Input parameter n(type:node_ptr) is null."); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
| n == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed"); | |||||
| return nullptr, "[Check][Param]Input parameter n(type:node_ptr) is null."); | |||||
| auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
| GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr); | GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr); | ||||
| std::string batch_label; | std::string batch_label; | ||||
| @@ -1159,10 +1162,12 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
| } | } | ||||
| auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory, memory_type); | auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory, memory_type); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "new a memoryblock object failed. node_name:%s out_index:%u", | |||||
| n->GetName().c_str(), out_index); | |||||
| return nullptr, "[New][Object]new MemoryBlock failed, node_name:%s out_index:%u", n->GetName().c_str(), out_index); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
| block == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "new a memoryblock object failed. node_name:%s out_index:%u", | |||||
| n->GetName().c_str(), out_index); | |||||
| return nullptr, | |||||
| "[New][Object]new MemoryBlock failed, node_name:%s out_index:%u", n->GetName().c_str(), out_index); | |||||
| // Data and netoutput need zero copy block | // Data and netoutput need zero copy block | ||||
| block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | ||||
| @@ -1221,13 +1226,15 @@ void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutpu | |||||
| Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | ||||
| const bool is_op_reuse_mem) { | const bool is_op_reuse_mem) { | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null"); | |||||
| return INTERNAL_ERROR, "[check][param]Input parameter n(type:NodePtr) is null."); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
| n == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null"); | |||||
| return INTERNAL_ERROR, "[check][param]Input parameter n(type:NodePtr) is null."); | |||||
| auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); | |||||
| return INTERNAL_ERROR, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
| node_op_desc == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); | |||||
| return INTERNAL_ERROR, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); | |||||
| // continuous output support ref only when all output ref input | // continuous output support ref only when all output ref input | ||||
| bool isAllOutputRef = true; | bool isAllOutputRef = true; | ||||
| @@ -1242,7 +1249,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
| if (!isAllOutputRef && isOutputHasRef) { | if (!isAllOutputRef && isOutputHasRef) { | ||||
| REPORT_INNER_ERROR("E19999", "continuous output node ref part input, not support now. node_name:%s", | REPORT_INNER_ERROR("E19999", "continuous output node ref part input, not support now. node_name:%s", | ||||
| n->GetName().c_str()); | |||||
| n->GetName().c_str()); | |||||
| GELOGE(INTERNAL_ERROR, "[Check][OutRefStatus]continuous output node ref part input, not support, node_name:%s", | GELOGE(INTERNAL_ERROR, "[Check][OutRefStatus]continuous output node ref part input, not support, node_name:%s", | ||||
| n->GetName().c_str()); | n->GetName().c_str()); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| @@ -1255,7 +1262,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
| auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | ||||
| if (output_op_desc == nullptr) { | if (output_op_desc == nullptr) { | ||||
| REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", | REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", | ||||
| n->GetName().c_str(), index); | |||||
| n->GetName().c_str(), index); | |||||
| GELOGE(INTERNAL_ERROR, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); | GELOGE(INTERNAL_ERROR, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| @@ -1268,7 +1275,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
| int64_t size = 0; | int64_t size = 0; | ||||
| if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | ||||
| REPORT_CALL_ERROR("E19999", "get tensor_size failed, node_name:%s, output_index:%u", | REPORT_CALL_ERROR("E19999", "get tensor_size failed, node_name:%s, output_index:%u", | ||||
| n->GetName().c_str(), index); | |||||
| n->GetName().c_str(), index); | |||||
| GELOGE(INTERNAL_ERROR, "[Get][TensorSize]node_name:%s, output_index:%u", n->GetName().c_str(), index); | GELOGE(INTERNAL_ERROR, "[Get][TensorSize]node_name:%s, output_index:%u", n->GetName().c_str(), index); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| @@ -1310,7 +1317,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
| ++(block->ref_count_); | ++(block->ref_count_); | ||||
| } else { | } else { | ||||
| REPORT_CALL_ERROR("E19999", "apply continuousMemory failed, node_name:%s, total_size:%ld", | REPORT_CALL_ERROR("E19999", "apply continuousMemory failed, node_name:%s, total_size:%ld", | ||||
| n->GetName().c_str(), total_size); | |||||
| n->GetName().c_str(), total_size); | |||||
| GELOGE(INTERNAL_ERROR, "[Apply][ContinuousMemory]node_name:%s, total_size:%ld", n->GetName().c_str(), total_size); | GELOGE(INTERNAL_ERROR, "[Apply][ContinuousMemory]node_name:%s, total_size:%ld", n->GetName().c_str(), total_size); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| @@ -1319,26 +1326,33 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
| MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | ||||
| const bool is_op_reuse_mem, const bool continuous) { | const bool is_op_reuse_mem, const bool continuous) { | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:NodePtr) is null"); | |||||
| return nullptr, "[Check][Param]Input parameter n(type:NodePtr) is null"); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
| n == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:NodePtr) is null"); | |||||
| return nullptr, "[Check][Param]Input parameter n(type:NodePtr) is null"); | |||||
| auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); | |||||
| return nullptr, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
| node_op_desc == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); | |||||
| return nullptr, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); | |||||
| MemoryBlock *block = nullptr; | MemoryBlock *block = nullptr; | ||||
| NodeIndexIO node_index_io(n, index, kOut); | NodeIndexIO node_index_io(n, index, kOut); | ||||
| int64_t size = 0; | int64_t size = 0; | ||||
| auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | ||||
| GE_IF_BOOL_EXEC(output_op_desc == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
| GELOGE(FAILED, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
| return nullptr); | |||||
| GE_IF_BOOL_EXEC( | |||||
| output_op_desc == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", | |||||
| n->GetName().c_str(), index); | |||||
| GELOGE(FAILED, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
| return nullptr); | |||||
| GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | ||||
| size_t no_align_size = 0; | size_t no_align_size = 0; | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | |||||
| REPORT_CALL_ERROR("E19999", "Get no align size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
| return nullptr, "[Get][TensorSize]Get no align size, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
| GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | |||||
| REPORT_CALL_ERROR("E19999", "Get no align size failed, node_name:%s, output_index:%u", | |||||
| n->GetName().c_str(), index); | |||||
| return nullptr, | |||||
| "[Get][TensorSize]Get no align size, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
| std::string symbol; | std::string symbol; | ||||
| bool reuse_input = false; | bool reuse_input = false; | ||||
| @@ -1346,9 +1360,9 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
| block = symbol_blocks_[symbol]; | block = symbol_blocks_[symbol]; | ||||
| GE_IF_BOOL_EXEC(block == nullptr, | GE_IF_BOOL_EXEC(block == nullptr, | ||||
| REPORT_INNER_ERROR("E19999", "get ref block failed, node_name:%s, symbol:%s", | REPORT_INNER_ERROR("E19999", "get ref block failed, node_name:%s, symbol:%s", | ||||
| node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); | |||||
| node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); | |||||
| GELOGE(FAILED, "[Get][RefBlock]node_name:%s, symbol:%s", | GELOGE(FAILED, "[Get][RefBlock]node_name:%s, symbol:%s", | ||||
| node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); | |||||
| node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); | |||||
| return nullptr); | return nullptr); | ||||
| // reduce old size | // reduce old size | ||||
| size_t align_size = block->Size(); | size_t align_size = block->Size(); | ||||
| @@ -1392,24 +1406,28 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
| vector<bool> workspace_reuse_flag; | vector<bool> workspace_reuse_flag; | ||||
| block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index, | block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index, | ||||
| workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); | workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); | ||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, | |||||
| REPORT_CALL_ERROR("E19999", "apply out Memory failed, node_name:%s, block_size:%ld, out_index:%u", | |||||
| n->GetName().c_str(), block_size, index); | |||||
| return nullptr, "[Apply][Memory]node_name:%s, block_size:%ld, out_index:%u", | |||||
| GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||||
| block == nullptr, | |||||
| REPORT_CALL_ERROR("E19999", "apply out Memory failed, node_name:%s, block_size:%ld, out_index:%u", | |||||
| n->GetName().c_str(), block_size, index); | |||||
| return nullptr, | |||||
| "[Apply][Memory]node_name:%s, block_size:%ld, out_index:%u", | |||||
| n->GetName().c_str(), block_size, index); | n->GetName().c_str(), block_size, index); | ||||
| } | } | ||||
| int out_count = 0; | int out_count = 0; | ||||
| GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), | |||||
| REPORT_INNER_ERROR("E19999", "out index:%u exceed out_size:%lu, node_name:%s", | |||||
| index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); | |||||
| GELOGE(FAILED, "[Check][OutIndex]index:%u exceed out_size:%lu, node_name:%s", | |||||
| index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); | |||||
| return nullptr); | |||||
| GE_IF_BOOL_EXEC( | |||||
| index >= n->GetAllOutDataAnchors().size(), | |||||
| REPORT_INNER_ERROR("E19999", "out index:%u exceed out_size:%lu, node_name:%s", | |||||
| index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); | |||||
| GELOGE(FAILED, "[Check][OutIndex]index:%u exceed out_size:%lu, node_name:%s", | |||||
| index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); | |||||
| return nullptr); | |||||
| auto out_data_anchor = n->GetOutDataAnchor(index); | auto out_data_anchor = n->GetOutDataAnchor(index); | ||||
| GE_IF_BOOL_EXEC(out_data_anchor == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "out anchor is null, index:%u, node_name:%s", index, n->GetName().c_str()); | |||||
| GELOGE(FAILED, "[Check][OutAnchor]is null, index:%u, node_name:%s", index, n->GetName().c_str()); | |||||
| return nullptr); | |||||
| GE_IF_BOOL_EXEC( | |||||
| out_data_anchor == nullptr, | |||||
| REPORT_INNER_ERROR("E19999", "out anchor is null, index:%u, node_name:%s", index, n->GetName().c_str()); | |||||
| GELOGE(FAILED, "[Check][OutAnchor]is null, index:%u, node_name:%s", index, n->GetName().c_str()); | |||||
| return nullptr); | |||||
| for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | ||||
| auto owner_node = in_anchor->GetOwnerNode(); | auto owner_node = in_anchor->GetOwnerNode(); | ||||
| auto op_desc = owner_node->GetOpDesc(); | auto op_desc = owner_node->GetOpDesc(); | ||||
| @@ -1616,12 +1634,13 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
| op_desc->GetOutputsSize(), memorys_type.size()); | op_desc->GetOutputsSize(), memorys_type.size()); | ||||
| if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) { | if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) { | ||||
| REPORT_INNER_ERROR("E19999", "Attr[%s] size:%zu not equal to node output size:%zu, node_name:%s", | REPORT_INNER_ERROR("E19999", "Attr[%s] size:%zu not equal to node output size:%zu, node_name:%s", | ||||
| ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), | |||||
| op_desc->GetOutputsSize(), op_desc->GetName().c_str()); | |||||
| GELOGE(INTERNAL_ERROR, | |||||
| "[Check][MemTypeAttr]Attr %s size:%zu not equal to node output size:%zu, node_name:%s", | |||||
| ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), | |||||
| op_desc->GetOutputsSize(), op_desc->GetName().c_str()); | |||||
| ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), | |||||
| op_desc->GetOutputsSize(), op_desc->GetName().c_str()); | |||||
| GELOGE( | |||||
| INTERNAL_ERROR, | |||||
| "[Check][MemTypeAttr]Attr %s size:%zu not equal to node output size:%zu, node_name:%s", | |||||
| ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), | |||||
| op_desc->GetOutputsSize(), op_desc->GetName().c_str()); | |||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| @@ -1748,9 +1767,11 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
| if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) { | if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) { | ||||
| REPORT_INNER_ERROR("E19999", "Attr[%s]size:%zu is not equal to workspace size:%zu, node_name:%s", | REPORT_INNER_ERROR("E19999", "Attr[%s]size:%zu is not equal to workspace size:%zu, node_name:%s", | ||||
| TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), temp.size(), n->GetName().c_str()); | |||||
| TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), | |||||
| temp.size(), n->GetName().c_str()); | |||||
| GELOGE(INTERNAL_ERROR, "[Check][Attr]Attr %s size:%zu is not equal to workspace size:%zu, node_name:%s", | GELOGE(INTERNAL_ERROR, "[Check][Attr]Attr %s size:%zu is not equal to workspace size:%zu, node_name:%s", | ||||
| TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), temp.size(), n->GetName().c_str()); | |||||
| TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), | |||||
| temp.size(), n->GetName().c_str()); | |||||
| return; | return; | ||||
| } | } | ||||
| for (size_t i = 0; i < temp.size(); i++) { | for (size_t i = 0; i < temp.size(); i++) { | ||||
| @@ -2160,10 +2181,11 @@ bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, | |||||
| ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type); | ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type); | ||||
| if (has_workspace_mem_type_attr && (workspace_memory_type.size() <= index)) { | if (has_workspace_mem_type_attr && (workspace_memory_type.size() <= index)) { | ||||
| REPORT_INNER_ERROR("E19999", "get workspace mem_type failed, " | REPORT_INNER_ERROR("E19999", "get workspace mem_type failed, " | ||||
| "index %zu invalid, bigger than attr %s size:%zu, node_name:%s", | |||||
| index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str()); | |||||
| "index %zu invalid, bigger than attr %s size:%zu, node_name:%s", | |||||
| index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), | |||||
| workspace_memory_type.size(), node->GetName().c_str()); | |||||
| GELOGE(INTERNAL_ERROR, "[Get][WorkspaceMemType]index %zu invalid, bigger than attr %s size:%zu, node_name:%s", | GELOGE(INTERNAL_ERROR, "[Get][WorkspaceMemType]index %zu invalid, bigger than attr %s size:%zu, node_name:%s", | ||||
| index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str()); | |||||
| index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str()); | |||||
| return false; | return false; | ||||
| } | } | ||||
| memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; | memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; | ||||
| @@ -496,7 +496,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
| REPORT_INNER_ERROR("E19999", "find memory offset fail for mem_type:%ld, " | REPORT_INNER_ERROR("E19999", "find memory offset fail for mem_type:%ld, " | ||||
| "when assign continuous input memory for node:%s, ", memory_type, node->GetName().c_str()); | "when assign continuous input memory for node:%s, ", memory_type, node->GetName().c_str()); | ||||
| GELOGE(FAILED, "[Find][MemOffset]fail for mem_type:%ld, when AssignContinuousInputMemory for node:%s", | GELOGE(FAILED, "[Find][MemOffset]fail for mem_type:%ld, when AssignContinuousInputMemory for node:%s", | ||||
| memory_type, node->GetName().c_str()); | |||||
| memory_type, node->GetName().c_str()); | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| // The head and tail of hcom continuous input should be added 512 | // The head and tail of hcom continuous input should be added 512 | ||||
| @@ -929,8 +929,8 @@ Status GraphMemoryAssigner::AssignReferenceMemory() { | |||||
| if (out_op_desc->GetOutputsSize() > output_list.size()) { | if (out_op_desc->GetOutputsSize() > output_list.size()) { | ||||
| REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, judge invalid in node:%s " | REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, judge invalid in node:%s " | ||||
| "when AssignReferenceMemory", | |||||
| out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||||
| "when AssignReferenceMemory", | |||||
| out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||||
| GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s", | GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s", | ||||
| out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | ||||
| return ge::FAILED; | return ge::FAILED; | ||||
| @@ -2875,23 +2875,16 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||||
| GELOGI("DavinciModel::UpdateKnownNodeArgs in"); | GELOGI("DavinciModel::UpdateKnownNodeArgs in"); | ||||
| GE_CHK_STATUS_RET(CreateKnownZeroCopyMap(inputs, outputs), | GE_CHK_STATUS_RET(CreateKnownZeroCopyMap(inputs, outputs), | ||||
| "DavinciModel::UpdateKnownNodeArgs create map for input/output zero copy."); | "DavinciModel::UpdateKnownNodeArgs create map for input/output zero copy."); | ||||
| if (!base_addr_not_changed_) { | |||||
| total_io_addrs_.clear(); | |||||
| orig_total_io_addrs_.clear(); | |||||
| for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | |||||
| auto &task = task_list_[task_index]; | |||||
| if (task != nullptr) { | |||||
| Status ret = task->UpdateArgs(); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(FAILED, "task %zu created by davinci model is nullptr.", task_index); | |||||
| return FAILED; | |||||
| } | |||||
| total_io_addrs_.clear(); | |||||
| for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | |||||
| auto &task = task_list_[task_index]; | |||||
| if (task != nullptr) { | |||||
| Status ret = task->UpdateArgs(); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(FAILED, "task %zu created by davinci model is nullptr.", task_index); | |||||
| return FAILED; | |||||
| } | } | ||||
| } | } | ||||
| // cache latest iterator io addr | |||||
| orig_total_io_addrs_ = total_io_addrs_; | |||||
| } else { | |||||
| total_io_addrs_ = orig_total_io_addrs_; | |||||
| } | } | ||||
| GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_, false), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_, false), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | ||||
| @@ -2931,6 +2924,14 @@ Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status DavinciModel::CheckCapability(rtFeatureType_t featureType, int32_t featureInfo, bool &is_support) const { | |||||
| int64_t value = RT_CAPABILITY_SUPPORT; | |||||
| auto rt_ret = rtGetRtCapability(featureType, featureInfo, &value); | |||||
| GE_CHK_BOOL_RET_STATUS(rt_ret == RT_ERROR_NONE, FAILED, "call rtGetRtCapability failed!"); | |||||
| is_support = (value == RT_CAPABILITY_SUPPORT) ? true : false; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status DavinciModel::MallocKnownArgs() { | Status DavinciModel::MallocKnownArgs() { | ||||
| GELOGI("DavinciModel::MallocKnownArgs in"); | GELOGI("DavinciModel::MallocKnownArgs in"); | ||||
| const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | ||||
| @@ -2949,20 +2950,22 @@ Status DavinciModel::MallocKnownArgs() { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| } | } | ||||
| rtError_t rt_ret; | |||||
| // malloc args memory | // malloc args memory | ||||
| if (total_args_size_ == 0) { | |||||
| GELOGW("DavinciModel::MallocKnownArgs total_args_size_ equals to zero."); | |||||
| return SUCCESS; | |||||
| } | |||||
| bool is_support = false; | |||||
| GE_CHK_STATUS_RET_NOLOG(CheckCapability(FEATURE_TYPE_MEMORY, MEMORY_INFO_TS_4G_LIMITED, is_support)); | |||||
| auto mem_type = is_support ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; | |||||
| rtError_t rt_ret = rtMalloc(&args_, total_args_size_, RT_MEMORY_HBM); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| if (total_args_size_ != 0) { | |||||
| rt_ret = rtMalloc(&args_, total_args_size_, mem_type); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| } | } | ||||
| // malloc dynamic and static hybrid memory | // malloc dynamic and static hybrid memory | ||||
| if (total_hybrid_args_size_ != 0) { | if (total_hybrid_args_size_ != 0) { | ||||
| rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM); | |||||
| rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, mem_type); | |||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| @@ -2971,7 +2974,7 @@ Status DavinciModel::MallocKnownArgs() { | |||||
| // malloc fixed addr memory, eg: rts op | // malloc fixed addr memory, eg: rts op | ||||
| if (total_fixed_addr_size_ != 0) { | if (total_fixed_addr_size_ != 0) { | ||||
| GELOGI("Begin to allocate fixed addr."); | GELOGI("Begin to allocate fixed addr."); | ||||
| rt_ret = rtMalloc(&fixed_addrs_, total_fixed_addr_size_, RT_MEMORY_HBM); | |||||
| rt_ret = rtMalloc(&fixed_addrs_, total_fixed_addr_size_, mem_type); | |||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| @@ -530,11 +530,11 @@ class DavinciModel { | |||||
| } | } | ||||
| void SetKnownNode(bool known_node) { known_node_ = known_node; } | void SetKnownNode(bool known_node) { known_node_ = known_node; } | ||||
| bool IsKnownNode() { return known_node_; } | bool IsKnownNode() { return known_node_; } | ||||
| Status CheckCapability(rtFeatureType_t featureType, int32_t featureInfo, bool &is_support) const; | |||||
| Status MallocKnownArgs(); | Status MallocKnownArgs(); | ||||
| Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
| Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
| Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args = true); | Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args = true); | ||||
| void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | |||||
| Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const; | Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const; | ||||
| Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims, | Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims, | ||||
| @@ -1007,8 +1007,6 @@ class DavinciModel { | |||||
| map<const void *, void *> known_input_data_info_; | map<const void *, void *> known_input_data_info_; | ||||
| map<const void *, void *> known_output_data_info_; | map<const void *, void *> known_output_data_info_; | ||||
| vector<void *> total_io_addrs_; | vector<void *> total_io_addrs_; | ||||
| vector<void *> orig_total_io_addrs_; | |||||
| bool base_addr_not_changed_ = false; | |||||
| vector<vector<int64_t>> batch_info_; | vector<vector<int64_t>> batch_info_; | ||||
| vector<vector<int64_t>> combined_batch_info_; | vector<vector<int64_t>> combined_batch_info_; | ||||
| @@ -384,7 +384,8 @@ Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDesc | |||||
| switch (mem_type) { | switch (mem_type) { | ||||
| case RT_MEMORY_RDMA_HBM: | case RT_MEMORY_RDMA_HBM: | ||||
| if (offset < 0) { | if (offset < 0) { | ||||
| GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast<uint8_t *>(offset)); | |||||
| GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", | |||||
| reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset))); | |||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| var_addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset)); | var_addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset)); | ||||
| @@ -124,7 +124,8 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| ret = InitTVMTask(args_offset_tmp[0], kernel_def); | |||||
| io_addr_offset_ = args_offset_tmp[0]; | |||||
| ret = InitTVMTask(io_addr_offset_, kernel_def); | |||||
| } else if (kernel_type_ == ccKernelType::CUSTOMIZED) { | } else if (kernel_type_ == ccKernelType::CUSTOMIZED) { | ||||
| ret = InitAICPUCustomTask(context.op_index(), kernel_def); | ret = InitAICPUCustomTask(context.op_index(), kernel_def); | ||||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | ||||
| @@ -380,7 +381,8 @@ Status KernelTaskInfo::Distribute() { | |||||
| GELOGD("KernelTaskInfo Distribute Start."); | GELOGD("KernelTaskInfo Distribute Start."); | ||||
| if (davinci_model_->IsKnownNode()) { | if (davinci_model_->IsKnownNode()) { | ||||
| if (kernel_type_ == ccKernelType::TE) { | if (kernel_type_ == ccKernelType::TE) { | ||||
| args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||||
| args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_) | |||||
| : davinci_model_->GetCurrentArgsAddr(args_offset_); | |||||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | ||||
| args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); | args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); | ||||
| } | } | ||||
| @@ -449,29 +451,41 @@ void KernelTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) { | |||||
| } | } | ||||
| } | } | ||||
| Status KernelTaskInfo::CopyNoncontinuousArgs(uint16_t offset) { | |||||
| GE_CHECK_NOTNULL(davinci_model_); | |||||
| // copy new io addrs | |||||
| vector<void *> io_addrs = io_addrs_; | |||||
| davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); | |||||
| auto addr_size = kAddrLen * io_addrs.size(); | |||||
| // copy io addr | |||||
| errno_t sec_ret = memcpy_s(args_addr.get() + offset, addr_size, io_addrs.data(), addr_size); | |||||
| if (sec_ret != EOK) { | |||||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
| return FAILED; | |||||
| } | |||||
| // copy args to device | |||||
| rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| GELOGD("Copy noncontinuous args success, kernel type %d.", kernel_type_); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status KernelTaskInfo::UpdateArgs() { | Status KernelTaskInfo::UpdateArgs() { | ||||
| GELOGI("KernelTaskInfo::UpdateArgs in."); | GELOGI("KernelTaskInfo::UpdateArgs in."); | ||||
| GE_CHECK_NOTNULL(davinci_model_); | |||||
| if (kernel_type_ == ccKernelType::TE) { | if (kernel_type_ == ccKernelType::TE) { | ||||
| if (l2_buffer_on_) { | |||||
| return CopyNoncontinuousArgs(io_addr_offset_); | |||||
| } | |||||
| davinci_model_->SetTotalIOAddrs(io_addrs_); | davinci_model_->SetTotalIOAddrs(io_addrs_); | ||||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | ||||
| vector<void *> io_addrs = io_addrs_; | |||||
| davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); | |||||
| uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead); | |||||
| auto addrs_size = sizeof(uint64_t) * io_addrs.size(); | |||||
| errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size); | |||||
| if (sec_ret != EOK) { | |||||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
| return FAILED; | |||||
| } | |||||
| // copy args to device | |||||
| rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| return CopyNoncontinuousArgs(sizeof(aicpu::AicpuParamHead)); | |||||
| } | } | ||||
| GELOGI("KernelTaskInfo::UpdateArgs success."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -516,8 +530,8 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| char *sm_contrl = const_cast<char *>(sm_desc.data()); | |||||
| rtL2Ctrl_t *l2_ctrl_info = reinterpret_cast<rtL2Ctrl_t *>(sm_contrl); | |||||
| char *sm_control = const_cast<char *>(sm_desc.data()); | |||||
| rtL2Ctrl_t *l2_ctrl_info = reinterpret_cast<rtL2Ctrl_t *>(sm_control); | |||||
| uint64_t gen_base_addr = davinci_model_->GetRtBaseAddr(); | uint64_t gen_base_addr = davinci_model_->GetRtBaseAddr(); | ||||
| // There is no weight for te op now. Update L2_mirror_addr by data memory base. | // There is no weight for te op now. Update L2_mirror_addr by data memory base. | ||||
| @@ -545,19 +559,31 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void KernelTaskInfo::SetContinuousArgs(uint32_t args_size, DavinciModel *davinci_model) { | |||||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||||
| davinci_model->SetTotalArgsSize(args_size); | |||||
| } | |||||
| void KernelTaskInfo::SetNoncontinuousArgs(uint32_t args_size, DavinciModel *davinci_model) { | |||||
| hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); | |||||
| davinci_model->SetHybridArgsSize(args_size); | |||||
| } | |||||
| Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | ||||
| GE_CHECK_NOTNULL(davinci_model); | |||||
| const domi::KernelDef &kernel_def = task_def.kernel(); | const domi::KernelDef &kernel_def = task_def.kernel(); | ||||
| const domi::KernelContext &context = kernel_def.context(); | const domi::KernelContext &context = kernel_def.context(); | ||||
| kernel_type_ = static_cast<ccKernelType>(context.kernel_type()); | kernel_type_ = static_cast<ccKernelType>(context.kernel_type()); | ||||
| uint32_t args_size = kernel_def.args_size(); | |||||
| if (kernel_type_ == ccKernelType::TE) { | if (kernel_type_ == ccKernelType::TE) { | ||||
| uint32_t args_size = kernel_def.args_size(); | |||||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||||
| davinci_model->SetTotalArgsSize(args_size); | |||||
| GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||||
| if (kernel_def.sm_desc().empty()) { | |||||
| SetContinuousArgs(args_size, davinci_model); | |||||
| return SUCCESS; | |||||
| } | |||||
| l2_buffer_on_ = true; | |||||
| SetNoncontinuousArgs(args_size, davinci_model); | |||||
| } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | ||||
| hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); | |||||
| davinci_model->SetHybridArgsSize(kernel_def.args_size()); | |||||
| GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_); | |||||
| SetNoncontinuousArgs(args_size, davinci_model); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -568,8 +594,23 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
| // get tvm op desc | // get tvm op desc | ||||
| OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex); | OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | |||||
| errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | |||||
| if (sec_ret != EOK) { | |||||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
| return FAILED; | |||||
| } | |||||
| Status ge_ret = UpdateL2Data(kernel_def); | |||||
| // update origin l2 data | |||||
| if (ge_ret != SUCCESS) { | |||||
| return ge_ret; | |||||
| } | |||||
| if (davinci_model_->IsKnownNode()) { | if (davinci_model_->IsKnownNode()) { | ||||
| args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||||
| args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_) | |||||
| : davinci_model_->GetCurrentArgsAddr(args_offset_); | |||||
| InitDumpTask(offset); | InitDumpTask(offset); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -609,12 +650,6 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| vector<uint8_t> args_info(args_size_); | |||||
| errno_t sec_ret = memcpy_s(args_info.data(), args_size_, kernel_def.args().data(), args_size_); | |||||
| if (sec_ret != EOK) { | |||||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
| return FAILED; | |||||
| } | |||||
| if ((args_size_ <= offset) || (args_size_ - offset < kAddrLen * tensor_device_addrs.size())) { | if ((args_size_ <= offset) || (args_size_ - offset < kAddrLen * tensor_device_addrs.size())) { | ||||
| GELOGE(FAILED, "offset >= kernelInfo.argsSize or copy content beyond applied memory."); | GELOGE(FAILED, "offset >= kernelInfo.argsSize or copy content beyond applied memory."); | ||||
| @@ -628,7 +663,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
| GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
| } | } | ||||
| sec_ret = memcpy_s(args_info.data() + offset, args_size_ - offset, tensor_device_addrs.data(), | |||||
| sec_ret = memcpy_s(args_addr.get() + offset, args_size_ - offset, tensor_device_addrs.data(), | |||||
| kAddrLen * tensor_device_addrs.size()); | kAddrLen * tensor_device_addrs.size()); | ||||
| if (sec_ret != EOK) { | if (sec_ret != EOK) { | ||||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | ||||
| @@ -640,19 +675,13 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
| GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast<char *>(args_) + offset, | GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast<char *>(args_) + offset, | ||||
| "Op debug is open in TVM task info"); | "Op debug is open in TVM task info"); | ||||
| Status ge_ret = UpdateL2Data(kernel_def); | |||||
| // update origin l2 data | |||||
| if (ge_ret != SUCCESS) { | |||||
| return ge_ret; | |||||
| } | |||||
| vector<void *> virtual_io_addrs; // use virtual address for zero copy key. | vector<void *> virtual_io_addrs; // use virtual address for zero copy key. | ||||
| virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | ||||
| virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | ||||
| if (op_desc->GetType() == ATOMICADDRCLEAN) { | if (op_desc->GetType() == ATOMICADDRCLEAN) { | ||||
| virtual_io_addrs.insert(virtual_io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | virtual_io_addrs.insert(virtual_io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | ||||
| } | } | ||||
| davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_info.data(), args_, args_size_, offset); | |||||
| davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_addr.get(), args_, args_size_, offset); | |||||
| GELOGD("Do InitTVMTask end"); | GELOGD("Do InitTVMTask end"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -129,6 +129,9 @@ class KernelTaskInfo : public TaskInfo { | |||||
| bool IsL1FusionOp(const OpDescPtr &op_desc); | bool IsL1FusionOp(const OpDescPtr &op_desc); | ||||
| void SetIoAddrs(const OpDescPtr &op_desc); | void SetIoAddrs(const OpDescPtr &op_desc); | ||||
| void InitDumpTask(uint32_t offset); | void InitDumpTask(uint32_t offset); | ||||
| void SetContinuousArgs(uint32_t args_size, DavinciModel *davinci_model); | |||||
| void SetNoncontinuousArgs(uint32_t args_size, DavinciModel *davinci_model); | |||||
| Status CopyNoncontinuousArgs(uint16_t offset); | |||||
| // For super kernel | // For super kernel | ||||
| Status SaveSKTDumpInfo(); | Status SaveSKTDumpInfo(); | ||||
| @@ -163,6 +166,8 @@ class KernelTaskInfo : public TaskInfo { | |||||
| uint32_t hybrid_args_offset_ = 0; | uint32_t hybrid_args_offset_ = 0; | ||||
| int64_t fixed_addr_offset_ = 0; | int64_t fixed_addr_offset_ = 0; | ||||
| std::unique_ptr<uint8_t[]> args_addr = nullptr; | std::unique_ptr<uint8_t[]> args_addr = nullptr; | ||||
| uint16_t io_addr_offset_ = 0; | |||||
| bool l2_buffer_on_ = false; | |||||
| bool call_save_dump_ = false; | bool call_save_dump_ = false; | ||||
| // aicpu ext_info device mem | // aicpu ext_info device mem | ||||
| @@ -30,8 +30,15 @@ constexpr int kMaxRePassTimes = 10000; | |||||
| constexpr size_t kMaxOneInNodes = 1000; | constexpr size_t kMaxOneInNodes = 1000; | ||||
| // Each iteration, we take about 0.3k memory on the stack, we should change the recursion to loop later | // Each iteration, we take about 0.3k memory on the stack, we should change the recursion to loop later | ||||
| constexpr int kMaxRecursiveDepth = 20; | constexpr int kMaxRecursiveDepth = 20; | ||||
| struct DuringPassNodeSets { | |||||
| std::unordered_set<Node *> nodes_seen; | |||||
| std::unordered_set<NodePtr> nodes_deleted; | |||||
| std::unordered_set<NodePtr> nodes_re_pass; | |||||
| std::unordered_set<NodePtr> nodes_re_pass_immediately; | |||||
| std::unordered_set<NodePtr> nodes_last; | |||||
| }; | |||||
| void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &input_edge_nodes, | |||||
| void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::deque<NodePtr> &input_edge_nodes, | |||||
| std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) { | std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) { | ||||
| nodes_last.clear(); | nodes_last.clear(); | ||||
| for (auto &node : graph->GetDirectNode()) { | for (auto &node : graph->GetDirectNode()) { | ||||
| @@ -40,7 +47,7 @@ void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &i | |||||
| } | } | ||||
| size_t in_nums = node->GetInNodes().size(); | size_t in_nums = node->GetInNodes().size(); | ||||
| if (in_nums == 0) { | if (in_nums == 0) { | ||||
| input_edge_nodes.push(node); | |||||
| input_edge_nodes.push_back(node); | |||||
| nodes_seen.insert(node.get()); | nodes_seen.insert(node.get()); | ||||
| } else if (in_nums > kMaxOneInNodes) { | } else if (in_nums > kMaxOneInNodes) { | ||||
| nodes_last.insert(node); | nodes_last.insert(node); | ||||
| @@ -48,7 +55,7 @@ void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &i | |||||
| } | } | ||||
| } | } | ||||
| void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::queue<NodePtr> &nodes_to_pass, | |||||
| void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::deque<NodePtr> &nodes_to_pass, | |||||
| std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) { | std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) { | ||||
| for (auto &node : nodes) { | for (auto &node : nodes) { | ||||
| if (node == nullptr) { | if (node == nullptr) { | ||||
| @@ -60,13 +67,30 @@ void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::queue<NodePtr> &n | |||||
| bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen); | bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen); | ||||
| if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) { | if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) { | ||||
| nodes_to_pass.push(node); | |||||
| nodes_to_pass.push_back(node); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unordered_set<NodePtr> &nodes_re_pass, | |||||
| std::unordered_set<NodePtr> &nodes_deleted, std::unordered_set<Node *> &nodes_seen) { | |||||
| void PushToRePassIfSeen(NodePtr &node, const std::pair<std::string, BaseNodePass *> &name_to_pass, | |||||
| std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_to_re_pass, | |||||
| std::unordered_set<NodePtr> &nodes_re_pass) { | |||||
| for (const auto &node_to_re_pass : nodes_to_re_pass) { | |||||
| if (node_to_re_pass == nullptr) { | |||||
| GELOGW("Found null re-pass node when executing %s on node %s type %s", name_to_pass.first.c_str(), | |||||
| node->GetName().c_str(), node->GetType().c_str()); | |||||
| continue; | |||||
| } | |||||
| if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) { | |||||
| GELOGD("The node %s will be re-pass.", node_to_re_pass->GetName().c_str()); | |||||
| nodes_re_pass.insert(node_to_re_pass); | |||||
| } else { | |||||
| GELOGD("The node %s are not all seen, don't set repass this time", node_to_re_pass->GetName().c_str()); | |||||
| } | |||||
| } | |||||
| } | |||||
| Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, DuringPassNodeSets &during_pass_node_set) { | |||||
| if (node == nullptr) { | if (node == nullptr) { | ||||
| GELOGE(FAILED, "parameter is null."); | GELOGE(FAILED, "parameter is null."); | ||||
| return FAILED; | return FAILED; | ||||
| @@ -90,22 +114,15 @@ Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unorder | |||||
| } | } | ||||
| auto nodes_to_re_pass = name_to_pass.second->GetNodesNeedRePass(); | auto nodes_to_re_pass = name_to_pass.second->GetNodesNeedRePass(); | ||||
| for (const auto &node_to_re_pass : nodes_to_re_pass) { | |||||
| if (node_to_re_pass == nullptr) { | |||||
| GELOGW("Found null re-pass node when executing %s on node %s type %s", name_to_pass.first.c_str(), | |||||
| node->GetName().c_str(), node->GetType().c_str()); | |||||
| continue; | |||||
| } | |||||
| if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) { | |||||
| GELOGD("The node %s will be re-pass later", node_to_re_pass->GetName().c_str()); | |||||
| nodes_re_pass.insert(node_to_re_pass); | |||||
| } else { | |||||
| GELOGD("The node %s are not all seen, don't set repass this time", node_to_re_pass->GetName().c_str()); | |||||
| } | |||||
| } | |||||
| PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass, | |||||
| during_pass_node_set.nodes_re_pass); | |||||
| auto nodes_to_re_pass_immediately = name_to_pass.second->GetNodesNeedRePassImmediately(); | |||||
| PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass_immediately, | |||||
| during_pass_node_set.nodes_re_pass_immediately); | |||||
| auto nodes_deleted_by_pass = name_to_pass.second->GetNodesDeleted(); | auto nodes_deleted_by_pass = name_to_pass.second->GetNodesDeleted(); | ||||
| nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end()); | |||||
| during_pass_node_set.nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end()); | |||||
| if (nodes_deleted_by_pass.count(node) > 0) { | if (nodes_deleted_by_pass.count(node) > 0) { | ||||
| GELOGD("The node %s was deleted by pass %s, stop the remain passes", node->GetName().c_str(), | GELOGD("The node %s was deleted by pass %s, stop the remain passes", node->GetName().c_str(), | ||||
| name_to_pass.first.c_str()); | name_to_pass.first.c_str()); | ||||
| @@ -181,36 +198,33 @@ Status GEPass::Run(const NamesToPass &names_to_passes) { | |||||
| Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { | Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { | ||||
| GELOGD("Begin to run pass on graph, passes count %zu", names_to_passes.size()); | GELOGD("Begin to run pass on graph, passes count %zu", names_to_passes.size()); | ||||
| std::queue<NodePtr> nodes; | |||||
| std::unordered_set<Node *> nodes_seen; | |||||
| std::unordered_set<NodePtr> nodes_deleted; | |||||
| std::unordered_set<NodePtr> nodes_re_pass; | |||||
| std::unordered_set<NodePtr> nodes_last; | |||||
| GetAllNodesNoInputEdge(graph_, nodes, nodes_seen, nodes_last); | |||||
| std::deque<NodePtr> nodes; | |||||
| DuringPassNodeSets during_pass_node_set; | |||||
| GetAllNodesNoInputEdge(graph_, nodes, during_pass_node_set.nodes_seen, during_pass_node_set.nodes_last); | |||||
| GELOGD("Start points count %zu", nodes.size()); | GELOGD("Start points count %zu", nodes.size()); | ||||
| int re_pass_times = 0; | int re_pass_times = 0; | ||||
| do { | do { | ||||
| for (auto &node : nodes_re_pass) { | |||||
| nodes.push(node); | |||||
| nodes_seen.insert(node.get()); | |||||
| for (auto &node : during_pass_node_set.nodes_re_pass) { | |||||
| nodes.push_back(node); | |||||
| during_pass_node_set.nodes_seen.insert(node.get()); | |||||
| } | } | ||||
| nodes_re_pass.clear(); | |||||
| during_pass_node_set.nodes_re_pass.clear(); | |||||
| while (!nodes.empty()) { | while (!nodes.empty()) { | ||||
| NodePtr node = nodes.front(); | NodePtr node = nodes.front(); | ||||
| nodes.pop(); | |||||
| nodes.pop_front(); | |||||
| (void)nodes_re_pass.erase(node); | |||||
| (void)during_pass_node_set.nodes_re_pass.erase(node); | |||||
| GE_IF_BOOL_EXEC(node == nullptr, GELOGW("node is null"); continue); | GE_IF_BOOL_EXEC(node == nullptr, GELOGW("node is null"); continue); | ||||
| if (nodes_deleted.count(node) > 0) { | |||||
| if (during_pass_node_set.nodes_deleted.count(node) > 0) { | |||||
| GELOGD("The node %s was deleted before, skip it.", node->GetName().c_str()); | GELOGD("The node %s was deleted before, skip it.", node->GetName().c_str()); | ||||
| continue; | continue; | ||||
| } | } | ||||
| AddNextIterNodes(node->GetOutNodes(), nodes, nodes_seen, nodes_last); | |||||
| AddNextIterNodes(node->GetOutNodes(), nodes, during_pass_node_set.nodes_seen, during_pass_node_set.nodes_last); | |||||
| auto ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen); | |||||
| auto ret = RunPasses(node, names_to_passes, during_pass_node_set); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", | GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", | ||||
| node->GetName().c_str(), node->GetType().c_str(), ret); | node->GetName().c_str(), node->GetType().c_str(), ret); | ||||
| @@ -227,7 +241,7 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { | |||||
| if (has_sub_graph) { | if (has_sub_graph) { | ||||
| GELOGD("There are subgraphs on node %s, run passes for for the second time", node->GetName().c_str()); | GELOGD("There are subgraphs on node %s, run passes for for the second time", node->GetName().c_str()); | ||||
| SetFlagOption(kOptimizeAfterSubGraph, names_to_passes); | SetFlagOption(kOptimizeAfterSubGraph, names_to_passes); | ||||
| ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen); | |||||
| ret = RunPasses(node, names_to_passes, during_pass_node_set); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", | GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", | ||||
| node->GetName().c_str(), node->GetType().c_str(), ret); | node->GetName().c_str(), node->GetType().c_str(), ret); | ||||
| @@ -239,16 +253,21 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { | |||||
| // should be called each time at the begin of the iteration | // should be called each time at the begin of the iteration | ||||
| ClearOption(names_to_passes); | ClearOption(names_to_passes); | ||||
| } | } | ||||
| for (const auto &node : during_pass_node_set.nodes_re_pass_immediately) { | |||||
| GELOGD("The node %s will be re-pass immediately.", node->GetName().c_str()); | |||||
| nodes.push_front(node); | |||||
| } | |||||
| during_pass_node_set.nodes_re_pass_immediately.clear(); | |||||
| } | } | ||||
| for (auto &node : nodes_last) { | |||||
| bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen); | |||||
| if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) { | |||||
| nodes.push(node); | |||||
| for (auto &node : during_pass_node_set.nodes_last) { | |||||
| bool all_in_nodes_seen = node->IsAllInNodesSeen(during_pass_node_set.nodes_seen); | |||||
| if (all_in_nodes_seen && during_pass_node_set.nodes_seen.insert(node.get()).second) { | |||||
| nodes.push_back(node); | |||||
| } | } | ||||
| } | } | ||||
| nodes_last.clear(); | |||||
| } while ((!nodes_re_pass.empty() || !nodes.empty()) && ++re_pass_times < kMaxRePassTimes); | |||||
| during_pass_node_set.nodes_last.clear(); | |||||
| } while ((!during_pass_node_set.nodes_re_pass.empty() || !nodes.empty()) && ++re_pass_times < kMaxRePassTimes); | |||||
| if (re_pass_times == kMaxRePassTimes) { | if (re_pass_times == kMaxRePassTimes) { | ||||
| GELOGW("re_pass_times should not come to %d", kMaxRePassTimes); | GELOGW("re_pass_times should not come to %d", kMaxRePassTimes); | ||||
| @@ -53,6 +53,8 @@ class BaseNodePass { | |||||
| std::unordered_set<NodePtr> GetNodesNeedRePass() { return nodes_need_re_pass_; } | std::unordered_set<NodePtr> GetNodesNeedRePass() { return nodes_need_re_pass_; } | ||||
| std::unordered_set<NodePtr> GetNodesNeedRePassImmediately() { return nodes_need_re_pass_immediately_; } | |||||
| std::unordered_set<NodePtr> GetNodesDeleted() { return nodes_deleted_; } | std::unordered_set<NodePtr> GetNodesDeleted() { return nodes_deleted_; } | ||||
| void SetOption(NodePassOption option, const std::string &value) { options_[option] = value; } | void SetOption(NodePassOption option, const std::string &value) { options_[option] = value; } | ||||
| @@ -62,6 +64,7 @@ class BaseNodePass { | |||||
| void init() { | void init() { | ||||
| nodes_need_re_pass_.clear(); | nodes_need_re_pass_.clear(); | ||||
| nodes_deleted_.clear(); | nodes_deleted_.clear(); | ||||
| nodes_need_re_pass_immediately_.clear(); | |||||
| } | } | ||||
| protected: | protected: | ||||
| @@ -79,6 +82,14 @@ class BaseNodePass { | |||||
| /// | /// | ||||
| void AddRePassNode(NodePtr &node) { nodes_need_re_pass_.insert(node); } | void AddRePassNode(NodePtr &node) { nodes_need_re_pass_.insert(node); } | ||||
| /// | |||||
| /// Add a node to be optimized immediately again. If you add a new node to the graph, or | |||||
| /// change a node connections, and you want to make sure the node will be | |||||
| /// optimized by other passes, call this function. | |||||
| /// @param node | |||||
| /// | |||||
| void AddImmediateRePassNode(NodePtr &node) { nodes_need_re_pass_immediately_.insert(node); } | |||||
| /// | /// | ||||
| /// Add a node and it's input/output data nodes to be optimized again. | /// Add a node and it's input/output data nodes to be optimized again. | ||||
| /// @param node | /// @param node | ||||
| @@ -109,6 +120,7 @@ class BaseNodePass { | |||||
| private: | private: | ||||
| std::unordered_set<NodePtr> nodes_need_re_pass_; | std::unordered_set<NodePtr> nodes_need_re_pass_; | ||||
| std::unordered_set<NodePtr> nodes_need_re_pass_immediately_; | |||||
| std::unordered_set<NodePtr> nodes_deleted_; | std::unordered_set<NodePtr> nodes_deleted_; | ||||
| std::map<NodePassOption, std::string> options_; | std::map<NodePassOption, std::string> options_; | ||||
| }; | }; | ||||
| @@ -25,6 +25,7 @@ | |||||
| namespace ge { | namespace ge { | ||||
| Status InferShapePass::Run(NodePtr &node) { | Status InferShapePass::Run(NodePtr &node) { | ||||
| // kOptimizeAfterSubGraph exist means after subgraph | |||||
| auto ret = ShapeRefiner::InferShapeAndType(node, !OptionExists(kOptimizeAfterSubGraph)); | auto ret = ShapeRefiner::InferShapeAndType(node, !OptionExists(kOptimizeAfterSubGraph)); | ||||
| if (ret != GRAPH_SUCCESS) { | if (ret != GRAPH_SUCCESS) { | ||||
| // select INFERSHAPE failed info | // select INFERSHAPE failed info | ||||
| @@ -41,6 +42,20 @@ Status InferShapePass::Run(NodePtr &node) { | |||||
| GELOGE(GE_GRAPH_INFERSHAPE_FAILED, "infershape failed. node: %s", node->GetName().c_str()); | GELOGE(GE_GRAPH_INFERSHAPE_FAILED, "infershape failed. node: %s", node->GetName().c_str()); | ||||
| return GE_GRAPH_INFERSHAPE_FAILED; | return GE_GRAPH_INFERSHAPE_FAILED; | ||||
| } | } | ||||
| bool need_repass = false; | |||||
| auto has_attr = AttrUtils::GetBool(node->GetOpDesc(), "need_infer_again_", need_repass); | |||||
| if (has_attr) { | |||||
| if (!OptionExists(kOptimizeAfterSubGraph)) { | |||||
| return SUCCESS; | |||||
| } | |||||
| if (need_repass) { | |||||
| AddImmediateRePassNode(node); | |||||
| GELOGD("Node %s need repass immediately.", node->GetName().c_str()); | |||||
| } else { | |||||
| // clear attr on while | |||||
| node->GetOpDesc()->DelAttr("need_infer_again_"); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -1772,8 +1772,8 @@ Status GraphPrepare::CheckUserInput(const std::vector<GeTensor> &user_input) { | |||||
| if (dim < UNKNOWN_DIM_NUM) { | if (dim < UNKNOWN_DIM_NUM) { | ||||
| std::string situation = "data dim[" + std::to_string(i) + "][" + std::to_string(dim) + "]" ; | std::string situation = "data dim[" + std::to_string(i) + "][" + std::to_string(dim) + "]" ; | ||||
| std::string reason = "it need >= -2"; | std::string reason = "it need >= -2"; | ||||
| REPORT_INPUT_ERROR( | |||||
| "E19025", std::vector<std::string>({"situation", "reason"}),std::vector<std::string>({situation, reason})); | |||||
| REPORT_INPUT_ERROR("E19025", std::vector<std::string>({"situation", "reason"}), | |||||
| std::vector<std::string>({situation, reason})); | |||||
| GELOGE(GE_GRAPH_INIT_FAILED, "[Check][InputDim]data dim %zu is not supported, need >= -2, real:%ld.", i, dim); | GELOGE(GE_GRAPH_INIT_FAILED, "[Check][InputDim]data dim %zu is not supported, need >= -2, real:%ld.", i, dim); | ||||
| return GE_GRAPH_INIT_FAILED; | return GE_GRAPH_INIT_FAILED; | ||||
| } | } | ||||
| @@ -212,7 +212,7 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), | |||||
| GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), | |||||
| PARAM_INVALID, | PARAM_INVALID, | ||||
| "Can not config part of outputs of Data node to support AIPP, config all " | "Can not config part of outputs of Data node to support AIPP, config all " | ||||
| "of the outputs of Data to support AIPP, or config none of them"); | "of the outputs of Data to support AIPP, or config none of them"); | ||||
| @@ -407,7 +407,8 @@ Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGe | |||||
| // check input data type | // check input data type | ||||
| auto x_data_type = tensor0->GetTensorDesc().GetDataType(); | auto x_data_type = tensor0->GetTensorDesc().GetDataType(); | ||||
| if (supported_type.find(x_data_type) == supported_type.end()) { | if (supported_type.find(x_data_type) == supported_type.end()) { | ||||
| GELOGI("GatherV2Kernel does not support this Data type:%s.", TypeUtils::DataTypeToSerialString(x_data_type).c_str()); | |||||
| GELOGI("GatherV2Kernel does not support this Data type:%s.", | |||||
| TypeUtils::DataTypeToSerialString(x_data_type).c_str()); | |||||
| return NOT_CHANGED; | return NOT_CHANGED; | ||||
| } | } | ||||
| // calc output shape | // calc output shape | ||||
| @@ -67,6 +67,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &lis | |||||
| future_ = std::async(std::launch::async, [&]() -> Status { | future_ = std::async(std::launch::async, [&]() -> Status { | ||||
| GetThreadLocalContext() = *executor_->GetContext()->ge_context; | GetThreadLocalContext() = *executor_->GetContext()->ge_context; | ||||
| GetContext().SetSessionId(executor_->GetContext()->session_id); | GetContext().SetSessionId(executor_->GetContext()->session_id); | ||||
| GetContext().SetContextId(executor_->GetContext()->context_id); | |||||
| return RunInternal(); | return RunInternal(); | ||||
| }); | }); | ||||
| @@ -105,7 +106,7 @@ Status HybridModelAsyncExecutor::Init() { | |||||
| executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_)); | executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_)); | ||||
| GE_CHECK_NOTNULL(executor_); | GE_CHECK_NOTNULL(executor_); | ||||
| GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine"); | GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine"); | ||||
| GE_CHK_STATUS_RET(DumpOpDebug(),"Dump op debug failed in hybrid engine"); | |||||
| GE_CHK_STATUS_RET(DumpOpDebug(), "Dump op debug failed in hybrid engine"); | |||||
| GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups()); | GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups()); | ||||
| if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) { | if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) { | ||||
| @@ -166,6 +167,7 @@ Status HybridModelAsyncExecutor::RunInternal() { | |||||
| } else { | } else { | ||||
| GELOGI("HybridModel will execute in singleline mode"); | GELOGI("HybridModel will execute in singleline mode"); | ||||
| ge::GetContext().SetSessionId(executor_->GetContext()->session_id); | ge::GetContext().SetSessionId(executor_->GetContext()->session_id); | ||||
| ge::GetContext().SetContextId(executor_->GetContext()->context_id); | |||||
| ret = executor_->Execute(args); | ret = executor_->Execute(args); | ||||
| } | } | ||||
| ret = HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); | ret = HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); | ||||
| @@ -35,12 +35,14 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item( | |||||
| node_item.NodeName().c_str(), | node_item.NodeName().c_str(), | ||||
| this->num_pending_shapes_); | this->num_pending_shapes_); | ||||
| for (int i = 0; i < node_item.num_inputs; ++i){ | |||||
| input_tensor_desc.emplace_back(*node_item.MutableInputDesc(i)); | |||||
| input_tensor_desc.resize(node_item.num_inputs); | |||||
| for (int i = 0; i < node_item.num_inputs; ++i) { | |||||
| node_item.GetInputDesc(i, input_tensor_desc[i]); | |||||
| } | } | ||||
| for (int i = 0; i < node_item.num_outputs; ++i){ | |||||
| output_tensor_desc.emplace_back(*node_item.MutableOutputDesc(i)); | |||||
| output_tensor_desc.resize(node_item.num_outputs); | |||||
| for (int i = 0; i < node_item.num_outputs; ++i) { | |||||
| node_item.GetOutputDesc(i, output_tensor_desc[i]); | |||||
| } | } | ||||
| } | } | ||||
| @@ -227,6 +227,7 @@ Status SubgraphExecutor::PrepareNodes(int group) { | |||||
| if (node_item.is_dynamic) { | if (node_item.is_dynamic) { | ||||
| auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status { | auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status { | ||||
| GetContext().SetSessionId(context_->session_id); | GetContext().SetSessionId(context_->session_id); | ||||
| GetContext().SetContextId(context_->context_id); | |||||
| GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state)); | GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state)); | ||||
| return PrepareForExecution(context_, *p_node_state); | return PrepareForExecution(context_, *p_node_state); | ||||
| }); | }); | ||||
| @@ -273,10 +274,8 @@ Status SubgraphExecutor::PrepareNodes(int group) { | |||||
| } | } | ||||
| Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const { | Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const { | ||||
| GetContext().SetSessionId(context_->context_id); | |||||
| HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), | HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), | ||||
| "[%s] Failed to InferShape.", node_state.GetName().c_str()); | "[%s] Failed to InferShape.", node_state.GetName().c_str()); | ||||
| GetContext().SetSessionId(context_->session_id); | |||||
| HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state), | HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state), | ||||
| "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); | "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -345,6 +344,7 @@ Status SubgraphExecutor::ScheduleTasks(int group) { | |||||
| GELOGD("[%s] Start to schedule prepare workers.", graph_item_->GetName().c_str()); | GELOGD("[%s] Start to schedule prepare workers.", graph_item_->GetName().c_str()); | ||||
| auto prepare_future = std::async(std::launch::async, [&]() -> Status { | auto prepare_future = std::async(std::launch::async, [&]() -> Status { | ||||
| GetContext().SetSessionId(context_->session_id); | GetContext().SetSessionId(context_->session_id); | ||||
| GetContext().SetContextId(context_->context_id); | |||||
| auto ret = PrepareNodes(group); | auto ret = PrepareNodes(group); | ||||
| ready_queue_.Push(nullptr); | ready_queue_.Push(nullptr); | ||||
| return ret; | return ret; | ||||
| @@ -135,6 +135,7 @@ class HybridModel { | |||||
| std::string model_name_; | std::string model_name_; | ||||
| GeRootModelPtr ge_root_model_; | GeRootModelPtr ge_root_model_; | ||||
| std::map<uint32_t, NodeItem *> input_nodes_; | std::map<uint32_t, NodeItem *> input_nodes_; | ||||
| ComputeGraphPtr root_graph_; | |||||
| std::map<std::string, NodePtr> device_variable_nodes_; //lint !e148 | std::map<std::string, NodePtr> device_variable_nodes_; //lint !e148 | ||||
| std::map<std::string, NodePtr> host_variable_nodes_; //lint !e148 | std::map<std::string, NodePtr> host_variable_nodes_; //lint !e148 | ||||
| std::map<std::string, std::unique_ptr<TensorValue>> variable_tensors_; | std::map<std::string, std::unique_ptr<TensorValue>> variable_tensors_; | ||||
| @@ -136,12 +136,12 @@ Status HybridModelBuilder::Build() { | |||||
| GE_CHK_STATUS_RET(RecoverGraphUnknownFlag(), "[%s] Failed to RecoverGraphUnknownFlag", GetGraphName()); | GE_CHK_STATUS_RET(RecoverGraphUnknownFlag(), "[%s] Failed to RecoverGraphUnknownFlag", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(IndexSpecialNodes(), "[%s] Failed to index nodes", GetGraphName()); | GE_CHK_STATUS_RET(IndexSpecialNodes(), "[%s] Failed to index nodes", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(IndexTaskDefs(), "[%s] Failed to index task defs", GetGraphName()); | GE_CHK_STATUS_RET(IndexTaskDefs(), "[%s] Failed to index task defs", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName()); | |||||
| GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName()); | GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(AssignUninitializedConstantOps(), "[%s] Failed to assign uninitialized constants", GetGraphName()); | GE_CHK_STATUS_RET(AssignUninitializedConstantOps(), "[%s] Failed to assign uninitialized constants", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(TransAllVarData(), "[%s] Failed to trans all var data", GetGraphName()); | GE_CHK_STATUS_RET(TransAllVarData(), "[%s] Failed to trans all var data", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(CopyVarData(), "[%s] Failed to copy var data", GetGraphName()); | GE_CHK_STATUS_RET(CopyVarData(), "[%s] Failed to copy var data", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(InitModelMem(), "[%s] Failed to init memory", GetGraphName()); | GE_CHK_STATUS_RET(InitModelMem(), "[%s] Failed to init memory", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName()); | |||||
| GE_CHK_STATUS_RET(InitConstantOps(), "[%s] Failed to init constant op", GetGraphName()); | GE_CHK_STATUS_RET(InitConstantOps(), "[%s] Failed to init constant op", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(InitVariableTensors(), "[%s] Failed to init variables", GetGraphName()); | GE_CHK_STATUS_RET(InitVariableTensors(), "[%s] Failed to init variables", GetGraphName()); | ||||
| GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName()); | GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName()); | ||||
| @@ -599,9 +599,10 @@ Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGraphPtr &merged_graph) { | |||||
| Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeGraphPtr &merged_graph) { | |||||
| merged_graph = MakeShared<ComputeGraph>("MergedGraph"); | merged_graph = MakeShared<ComputeGraph>("MergedGraph"); | ||||
| for (const auto &node : root_graph.GetDirectNode()) { | |||||
| merged_graph->SetGraphUnknownFlag(root_graph->GetGraphUnknownFlag()); | |||||
| for (const auto &node : root_graph->GetDirectNode()) { | |||||
| GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
| @@ -631,7 +632,7 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGrap | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, *merged_graph, *subgraph), | |||||
| GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, merged_graph, *subgraph), | |||||
| "[%s] Failed to merge subgraph.", | "[%s] Failed to merge subgraph.", | ||||
| subgraph->GetName().c_str()); | subgraph->GetName().c_str()); | ||||
| } | } | ||||
| @@ -647,18 +648,19 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGrap | |||||
| return a_level < b_level; | return a_level < b_level; | ||||
| }); | }); | ||||
| for (auto &remained_subgraph : root_graph.GetAllSubgraphs()) { | |||||
| for (auto &remained_subgraph : root_graph->GetAllSubgraphs()) { | |||||
| GELOGD("Adding subgraph [%s] to merged-graph.", remained_subgraph->GetName().c_str()); | GELOGD("Adding subgraph [%s] to merged-graph.", remained_subgraph->GetName().c_str()); | ||||
| GE_CHK_GRAPH_STATUS_RET(merged_graph->AddSubgraph(remained_subgraph), | GE_CHK_GRAPH_STATUS_RET(merged_graph->AddSubgraph(remained_subgraph), | ||||
| "Failed to add subgraph [%s]", | "Failed to add subgraph [%s]", | ||||
| remained_subgraph->GetName().c_str()); | remained_subgraph->GetName().c_str()); | ||||
| remained_subgraph->SetParentGraph(merged_graph); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph, | |||||
| ComputeGraph &parent_graph, | |||||
| Status HybridModelBuilder::UnfoldSubgraph(ComputeGraphPtr &root_graph, | |||||
| ComputeGraphPtr &parent_graph, | |||||
| ComputeGraph &sub_graph) { | ComputeGraph &sub_graph) { | ||||
| auto parent_node = sub_graph.GetParentNode(); | auto parent_node = sub_graph.GetParentNode(); | ||||
| GE_CHECK_NOTNULL(parent_node); | GE_CHECK_NOTNULL(parent_node); | ||||
| @@ -687,15 +689,23 @@ Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph, | |||||
| } | } | ||||
| } | } | ||||
| parent_graph.AddNode(sub_node); | |||||
| if (!sub_node->GetOpDesc()->GetSubgraphInstanceNames().empty()) { | |||||
| for (size_t i = 0; i < sub_node->GetOpDesc()->GetSubgraphInstanceNames().size(); ++i) { | |||||
| auto sub_sub_graph = NodeUtils::GetSubgraph(*sub_node, i); | |||||
| GE_CHECK_NOTNULL(sub_sub_graph); | |||||
| sub_sub_graph->SetParentGraph(parent_graph); | |||||
| } | |||||
| } | |||||
| parent_graph->AddNode(sub_node); | |||||
| GELOGD("[%s::%s] added to parent graph: [%s].", | GELOGD("[%s::%s] added to parent graph: [%s].", | ||||
| sub_graph.GetName().c_str(), | sub_graph.GetName().c_str(), | ||||
| sub_node->GetName().c_str(), | sub_node->GetName().c_str(), | ||||
| parent_graph.GetName().c_str()); | |||||
| parent_graph->GetName().c_str()); | |||||
| sub_node->SetOwnerComputeGraph(parent_graph); | |||||
| } | } | ||||
| GELOGD("[%s] Done merging subgraph. remove it from root graph.", sub_graph.GetName().c_str()); | GELOGD("[%s] Done merging subgraph. remove it from root graph.", sub_graph.GetName().c_str()); | ||||
| root_graph.RemoveSubgraph(sub_graph.GetName()); | |||||
| root_graph->RemoveSubgraph(sub_graph.GetName()); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -747,14 +757,14 @@ Status HybridModelBuilder::LoadGraph() { | |||||
| GELOGI("Before merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", | GELOGI("Before merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", | ||||
| root_graph->GetDirectNodesSize(), | root_graph->GetDirectNodesSize(), | ||||
| root_graph->GetAllNodesSize()); | root_graph->GetAllNodesSize()); | ||||
| GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(*root_graph, merged_graph), "Failed to unfold subgraphs."); | |||||
| GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(root_graph, merged_graph), "Failed to unfold subgraphs."); | |||||
| root_graph = std::move(merged_graph); | root_graph = std::move(merged_graph); | ||||
| GELOGI("After merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", | GELOGI("After merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", | ||||
| root_graph->GetDirectNodesSize(), | root_graph->GetDirectNodesSize(), | ||||
| root_graph->GetAllNodesSize()); | root_graph->GetAllNodesSize()); | ||||
| } | } | ||||
| root_graph_ = root_graph; | |||||
| hybrid_model_.root_graph_ = root_graph; | |||||
| // Reset node id by topological order across all subgraphs | // Reset node id by topological order across all subgraphs | ||||
| int64_t index = 0; | int64_t index = 0; | ||||
| for (const auto &node : root_graph->GetAllNodes()) { | for (const auto &node : root_graph->GetAllNodes()) { | ||||
| @@ -1030,9 +1040,13 @@ Status HybridModelBuilder::InitWeights() { | |||||
| GELOGI("Init weight mem successfully, weight base %p, weight size = %zu", | GELOGI("Init weight mem successfully, weight base %p, weight size = %zu", | ||||
| weight_base, | weight_base, | ||||
| sub_weight_buffer->GetSize()); | sub_weight_buffer->GetSize()); | ||||
| auto root_graph = GraphUtils::GetComputeGraph(subgraph_model.second->GetGraph()); | |||||
| hybrid_model_.weight_buffer_map_.emplace(root_graph->GetName(),std::move(sub_weight_buffer)); | |||||
| for (auto &node : root_graph->GetDirectNode()) { | |||||
| auto subgraph = GraphUtils::GetComputeGraph(subgraph_model.second->GetGraph()); | |||||
| if (subgraph != ge_root_model_->GetRootGraph()) { | |||||
| subgraph = ge_root_model_->GetRootGraph()->GetSubgraph(subgraph_model.first); | |||||
| } | |||||
| GE_CHECK_NOTNULL(subgraph); | |||||
| hybrid_model_.weight_buffer_map_.emplace(subgraph->GetName(), std::move(sub_weight_buffer)); | |||||
| for (auto &node : subgraph->GetDirectNode()) { | |||||
| if (node->GetType() != CONSTANT) { | if (node->GetType() != CONSTANT) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -2044,7 +2058,7 @@ Status HybridModelBuilder::CollectParallelGroups(NodeItem *node_item) { | |||||
| GELOGD("[%s] Start to get parallel group from subgraph: %s", | GELOGD("[%s] Start to get parallel group from subgraph: %s", | ||||
| node_item->NodeName().c_str(), | node_item->NodeName().c_str(), | ||||
| subgraph_name.c_str()); | subgraph_name.c_str()); | ||||
| auto subgraph = root_graph_->GetSubgraph(subgraph_name); | |||||
| auto subgraph = hybrid_model_.root_graph_->GetSubgraph(subgraph_name); | |||||
| GE_CHECK_NOTNULL(subgraph); | GE_CHECK_NOTNULL(subgraph); | ||||
| for (const auto &sub_node : subgraph->GetAllNodes()) { | for (const auto &sub_node : subgraph->GetAllNodes()) { | ||||
| std::string parallel_group; | std::string parallel_group; | ||||
| @@ -47,8 +47,8 @@ class HybridModelBuilder { | |||||
| static Status HandleDtString(const GeTensor &tensor, void *var_addr); | static Status HandleDtString(const GeTensor &tensor, void *var_addr); | ||||
| static Status MergeInputNodes(ComputeGraph &compute_graph); | static Status MergeInputNodes(ComputeGraph &compute_graph); | ||||
| static Status MergeNetOutputNode(ComputeGraph &compute_graph); | static Status MergeNetOutputNode(ComputeGraph &compute_graph); | ||||
| static Status UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGraphPtr &merged_graph); | |||||
| static Status UnfoldSubgraph(ComputeGraph &root_graph, ComputeGraph &parent_graph, ComputeGraph &sub_graph); | |||||
| static Status UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeGraphPtr &merged_graph); | |||||
| static Status UnfoldSubgraph(ComputeGraphPtr &root_graph, ComputeGraphPtr &parent_graph, ComputeGraph &sub_graph); | |||||
| static Status BuildInputMapping(GraphItem &graph_item, | static Status BuildInputMapping(GraphItem &graph_item, | ||||
| std::vector<NodeItem *> &data_nodes, | std::vector<NodeItem *> &data_nodes, | ||||
| bool is_root_graph); | bool is_root_graph); | ||||
| @@ -100,7 +100,6 @@ class HybridModelBuilder { | |||||
| NodeItem *MutableNodeItem(const NodePtr &node); | NodeItem *MutableNodeItem(const NodePtr &node); | ||||
| GeRootModelPtr ge_root_model_; | GeRootModelPtr ge_root_model_; | ||||
| ComputeGraphPtr root_graph_; | |||||
| std::map<std::string, GeModelPtr> subgraph_models_; | std::map<std::string, GeModelPtr> subgraph_models_; | ||||
| std::map<std::string, NodePtr> constant_op_nodes_; | std::map<std::string, NodePtr> constant_op_nodes_; | ||||
| std::map<std::string, std::set<NodeItem *>> parallel_group_to_nodes_; | std::map<std::string, std::set<NodeItem *>> parallel_group_to_nodes_; | ||||
| @@ -297,7 +297,7 @@ void NodeItem::SetToDynamic() { | |||||
| } | } | ||||
| } | } | ||||
| GeTensorDescPtr NodeItem::MutableInputDesc(int index) const { | |||||
| GeTensorDescPtr NodeItem::DoGetInputDesc(int index) const { | |||||
| if (!has_optional_inputs) { | if (!has_optional_inputs) { | ||||
| return op_desc->MutableInputDesc(static_cast<uint32_t>(index)); | return op_desc->MutableInputDesc(static_cast<uint32_t>(index)); | ||||
| } | } | ||||
| @@ -314,6 +314,40 @@ GeTensorDescPtr NodeItem::MutableInputDesc(int index) const { | |||||
| return op_desc->MutableInputDesc(input_desc_indices_[index]); | return op_desc->MutableInputDesc(input_desc_indices_[index]); | ||||
| } | } | ||||
| GeTensorDescPtr NodeItem::MutableInputDesc(int index) const { | |||||
| std::lock_guard<std::mutex> lk(mu_); | |||||
| return DoGetInputDesc(index); | |||||
| } | |||||
| Status NodeItem::GetInputDesc(int index, GeTensorDesc &tensor_desc) const { | |||||
| std::lock_guard<std::mutex> lk(mu_); | |||||
| auto input_desc = DoGetInputDesc(index); | |||||
| GE_CHECK_NOTNULL(input_desc); | |||||
| tensor_desc = *input_desc; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status NodeItem::GetOutputDesc(int index, GeTensorDesc &tensor_desc) const { | |||||
| std::lock_guard<std::mutex> lk(mu_); | |||||
| auto output_desc = op_desc->MutableOutputDesc(static_cast<uint32_t>(index)); | |||||
| GE_CHECK_NOTNULL(output_desc); | |||||
| tensor_desc = *output_desc; | |||||
| return SUCCESS; | |||||
| } | |||||
| GeTensorDescPtr NodeItem::MutableOutputDesc(int index) const { | |||||
| std::lock_guard<std::mutex> lk(mu_); | |||||
| return op_desc->MutableOutputDesc(static_cast<uint32_t>(index)); | |||||
| } | |||||
| Status NodeItem::UpdateInputDesc(int index, const GeTensorDesc &tensor_desc) { | |||||
| std::lock_guard<std::mutex> lk(mu_); | |||||
| auto input_desc = DoGetInputDesc(index); | |||||
| GE_CHECK_NOTNULL(input_desc); | |||||
| *input_desc = tensor_desc; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status NodeItem::GetCanonicalInputIndex(uint32_t index, int &canonical_index) const { | Status NodeItem::GetCanonicalInputIndex(uint32_t index, int &canonical_index) const { | ||||
| if (!has_optional_inputs) { | if (!has_optional_inputs) { | ||||
| canonical_index = index; | canonical_index = index; | ||||
| @@ -17,6 +17,7 @@ | |||||
| #ifndef GE_HYBRID_MODEL_NODE_ITEM_H_ | #ifndef GE_HYBRID_MODEL_NODE_ITEM_H_ | ||||
| #define GE_HYBRID_MODEL_NODE_ITEM_H_ | #define GE_HYBRID_MODEL_NODE_ITEM_H_ | ||||
| #include <mutex> | |||||
| #include <vector> | #include <vector> | ||||
| #include "external/ge/ge_api_error_codes.h" | #include "external/ge/ge_api_error_codes.h" | ||||
| #include "graph/node.h" | #include "graph/node.h" | ||||
| @@ -57,12 +58,16 @@ struct NodeItem { | |||||
| bool IsInputShapeStatic(int index) const; | bool IsInputShapeStatic(int index) const; | ||||
| GeTensorDescPtr MutableOutputDesc(int index) const { | |||||
| return op_desc->MutableOutputDesc(static_cast<uint32_t>(index)); | |||||
| } | |||||
| GeTensorDescPtr MutableOutputDesc(int index) const; | |||||
| Status UpdateInputDesc(int index, const GeTensorDesc &tensor_desc); | |||||
| GeTensorDescPtr MutableInputDesc(int index) const; | GeTensorDescPtr MutableInputDesc(int index) const; | ||||
| Status GetInputDesc(int index, GeTensorDesc &tensor_desc) const; | |||||
| Status GetOutputDesc(int index, GeTensorDesc &tensor_desc) const; | |||||
| Status GetCanonicalInputIndex(uint32_t index, int &canonical_index) const; | Status GetCanonicalInputIndex(uint32_t index, int &canonical_index) const; | ||||
| bool IsControlOp() const; | bool IsControlOp() const; | ||||
| @@ -113,9 +118,11 @@ struct NodeItem { | |||||
| Status ResolveDynamicState(); | Status ResolveDynamicState(); | ||||
| Status ResolveStaticInputsAndOutputs(); | Status ResolveStaticInputsAndOutputs(); | ||||
| void ResolveUnknownShapeType(); | void ResolveUnknownShapeType(); | ||||
| GeTensorDescPtr DoGetInputDesc(int index) const; | |||||
| std::vector<bool> is_input_shape_static_; | std::vector<bool> is_input_shape_static_; | ||||
| std::vector<uint32_t> input_desc_indices_; | std::vector<uint32_t> input_desc_indices_; | ||||
| mutable std::mutex mu_; | |||||
| }; | }; | ||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -307,11 +307,9 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { | |||||
| auto execution_context = context.GetExecutionContext(); | auto execution_context = context.GetExecutionContext(); | ||||
| GetContext().SetSessionId(execution_context->context_id); | |||||
| RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CalcTilingInfo] Start"); | RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CalcTilingInfo] Start"); | ||||
| GE_CHK_STATUS_RET(CalcTilingInfo(node, tiling_info)); | GE_CHK_STATUS_RET(CalcTilingInfo(node, tiling_info)); | ||||
| RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CalcTilingInfo] End"); | RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CalcTilingInfo] End"); | ||||
| GetContext().SetSessionId(execution_context->session_id); | |||||
| // update op args by tiling info | // update op args by tiling info | ||||
| block_dim_ = static_cast<uint32_t>(tiling_info.block_dim); | block_dim_ = static_cast<uint32_t>(tiling_info.block_dim); | ||||
| @@ -105,11 +105,6 @@ Status KnownNodeTask::Init(TaskContext &context) { | |||||
| "known node task allocate workspace failed."); | "known node task allocate workspace failed."); | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), | ||||
| "[KnownNodeTask_AllocateWorkspace] End, size %zu", davinci_model_->TotalMemSize()); | "[KnownNodeTask_AllocateWorkspace] End, size %zu", davinci_model_->TotalMemSize()); | ||||
| bool addr_not_changed = false; | |||||
| if (davinci_model_->GetRuntimeParam().mem_base == buffer) { | |||||
| addr_not_changed = true; | |||||
| } | |||||
| davinci_model_->SetKnownNodeAddrNotChanged(addr_not_changed); | |||||
| // update mem base | // update mem base | ||||
| davinci_model_->UpdateMemBase(static_cast<uint8_t *>(buffer)); | davinci_model_->UpdateMemBase(static_cast<uint8_t *>(buffer)); | ||||
| GELOGI("KnownNodeTask::Init mem base is %p, size %lu.", | GELOGI("KnownNodeTask::Init mem base is %p, size %lu.", | ||||
| @@ -237,8 +237,8 @@ Status WhileOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::fun | |||||
| } | } | ||||
| bool is_continue = false; | bool is_continue = false; | ||||
| GE_CHK_STATUS_RET(ExecuteOneLoop(task_context, is_continue), | |||||
| "[%s] Failed to execute iteration 0.", | |||||
| GE_CHK_STATUS_RET(ExecuteCond(task_context, is_continue), | |||||
| "[%s] Failed to execute cond-subgraph", | |||||
| task_context.GetNodeName()); | task_context.GetNodeName()); | ||||
| if (!is_continue) { | if (!is_continue) { | ||||
| for (int i = 0; i < task_context.NumInputs(); ++i) { | for (int i = 0; i < task_context.NumInputs(); ++i) { | ||||
| @@ -259,42 +259,28 @@ Status WhileOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::fun | |||||
| } | } | ||||
| // backup original input tensor desc | // backup original input tensor desc | ||||
| std::vector<GeTensorDesc> ori_input_desc; | |||||
| std::vector<GeTensorDesc> ori_input_desc(task_context.NumInputs()); | |||||
| for (int i = 0; i < task_context.NumInputs(); ++i) { | for (int i = 0; i < task_context.NumInputs(); ++i) { | ||||
| auto tensor_desc = task_context.GetInputDesc(i); | |||||
| GE_CHECK_NOTNULL(tensor_desc); | |||||
| ori_input_desc.emplace_back(*tensor_desc); | |||||
| GE_CHK_STATUS_RET_NOLOG(task_context.GetInputDesc(i, ori_input_desc[i])); | |||||
| } | } | ||||
| int iteration = 1; | |||||
| while (true) { | |||||
| int iteration = 0; | |||||
| while (is_continue) { | |||||
| ++iteration; | |||||
| GELOGD("[%s] Start to execute, iteration = %d", task_context.GetNodeName(), iteration); | GELOGD("[%s] Start to execute, iteration = %d", task_context.GetNodeName(), iteration); | ||||
| GE_CHK_STATUS_RET(ExecuteOneLoop(task_context, is_continue), | GE_CHK_STATUS_RET(ExecuteOneLoop(task_context, is_continue), | ||||
| "[%s] Failed to execute iteration %d.", | "[%s] Failed to execute iteration %d.", | ||||
| task_context.GetNodeName(), | task_context.GetNodeName(), | ||||
| iteration); | iteration); | ||||
| if (!is_continue) { | |||||
| GELOGD("[%s] Quit from loop. current iteration = %d", task_context.GetNodeName(), iteration); | |||||
| break; | |||||
| } | |||||
| ++iteration; | |||||
| } | } | ||||
| for (int i = 0; i < task_context.NumInputs(); ++i) { | |||||
| auto input_tensor = task_context.GetInput(i); | |||||
| auto tensor_desc = task_context.MutableInputDesc(i); | |||||
| GE_CHECK_NOTNULL(input_tensor); | |||||
| GE_CHECK_NOTNULL(tensor_desc); | |||||
| // restore original input tensor desc | |||||
| *tensor_desc = std::move(ori_input_desc[i]); | |||||
| GE_CHK_STATUS_RET_NOLOG(task_context.SetOutput(i, *input_tensor)); | |||||
| } | |||||
| GELOGD("[%s] Quit from loop. current iteration = %d", task_context.GetNodeName(), iteration); | |||||
| if (done_callback) { | if (done_callback) { | ||||
| done_callback(); | done_callback(); | ||||
| } | } | ||||
| for (int i = 0; i < task_context.NumInputs(); ++i) { | |||||
| GE_CHK_STATUS_RET_NOLOG(task_context.UpdateInputDesc(i, ori_input_desc[i])); | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -379,13 +365,6 @@ Status WhileOpNodeTask::MoveOutputs2Inputs(TaskContext &task_context) { | |||||
| } | } | ||||
| Status WhileOpNodeTask::ExecuteOneLoop(TaskContext &task_context, bool &is_continue) const { | Status WhileOpNodeTask::ExecuteOneLoop(TaskContext &task_context, bool &is_continue) const { | ||||
| GE_CHK_STATUS_RET(ExecuteCond(task_context, is_continue), | |||||
| "[%s] Failed to execute cond-subgraph", | |||||
| task_context.GetNodeName()); | |||||
| if (!is_continue) { | |||||
| return SUCCESS; | |||||
| } | |||||
| GELOGD("[%s] Start to execute body-subgraph.", task_context.GetNodeName()); | GELOGD("[%s] Start to execute body-subgraph.", task_context.GetNodeName()); | ||||
| GE_CHK_STATUS_RET(ExecuteSubgraph(body_, task_context, nullptr), | GE_CHK_STATUS_RET(ExecuteSubgraph(body_, task_context, nullptr), | ||||
| "[%s] Failed to execute cond-subgraph", task_context.GetNodeName()); | "[%s] Failed to execute cond-subgraph", task_context.GetNodeName()); | ||||
| @@ -396,6 +375,17 @@ Status WhileOpNodeTask::ExecuteOneLoop(TaskContext &task_context, bool &is_conti | |||||
| "[%s] Failed to move outputs to inputs", | "[%s] Failed to move outputs to inputs", | ||||
| task_context.GetNodeName()); | task_context.GetNodeName()); | ||||
| GE_CHK_STATUS_RET(ExecuteCond(task_context, is_continue), | |||||
| "[%s] Failed to execute cond-subgraph", | |||||
| task_context.GetNodeName()); | |||||
| if (!is_continue) { | |||||
| for (int i = 0; i < task_context.NumInputs(); ++i) { | |||||
| auto input_desc = task_context.GetInput(i); | |||||
| GE_CHECK_NOTNULL(input_desc); | |||||
| GE_CHK_STATUS_RET_NOLOG(task_context.SetOutput(i, *input_desc)); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -80,7 +80,6 @@ class WhileOpNodeTask : public ControlOpNodeTask { | |||||
| Status ExecuteCond(TaskContext &task_context, bool &is_continue) const; | Status ExecuteCond(TaskContext &task_context, bool &is_continue) const; | ||||
| static Status MoveOutputs2Inputs(TaskContext &task_context); | static Status MoveOutputs2Inputs(TaskContext &task_context); | ||||
| Status ExecuteOneLoop(TaskContext &task_context, bool &is_continue) const; | Status ExecuteOneLoop(TaskContext &task_context, bool &is_continue) const; | ||||
| private: | private: | ||||
| @@ -554,5 +554,16 @@ NodeState *TaskContext::GetNodeState() const { | |||||
| return node_state_; | return node_state_; | ||||
| } | } | ||||
| Status TaskContext::GetInputDesc(int index, GeTensorDesc &tensor_desc) const { | |||||
| return node_item_->GetInputDesc(index, tensor_desc); | |||||
| } | |||||
| Status TaskContext::UpdateInputDesc(int index, const GeTensorDesc &tensor_desc) { | |||||
| return const_cast<NodeItem *>(node_item_)->UpdateInputDesc(index, tensor_desc); | |||||
| } | |||||
| Status TaskContext::GetOutputDesc(int index, GeTensorDesc &tensor_desc) const { | |||||
| return node_item_->GetOutputDesc(index, tensor_desc); | |||||
| } | |||||
| } // namespace hybrid | } // namespace hybrid | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -50,9 +50,12 @@ class TaskContext { | |||||
| const char *GetNodeName() const; | const char *GetNodeName() const; | ||||
| TensorValue *MutableInput(int index); | TensorValue *MutableInput(int index); | ||||
| ConstGeTensorDescPtr GetInputDesc(int index) const; | ConstGeTensorDescPtr GetInputDesc(int index) const; | ||||
| Status GetInputDesc(int index, GeTensorDesc &tensor_desc) const; | |||||
| ConstGeTensorDescPtr GetOutputDesc(int index) const; | ConstGeTensorDescPtr GetOutputDesc(int index) const; | ||||
| Status GetOutputDesc(int index, GeTensorDesc &tensor_desc) const; | |||||
| GeTensorDescPtr MutableInputDesc(int index) const; | GeTensorDescPtr MutableInputDesc(int index) const; | ||||
| GeTensorDescPtr MutableOutputDesc(int index) const; | GeTensorDescPtr MutableOutputDesc(int index) const; | ||||
| Status UpdateInputDesc(int index, const GeTensorDesc &tensor_desc); | |||||
| void ReleaseInputsAndOutputs(); | void ReleaseInputsAndOutputs(); | ||||
| bool NeedCallback(); | bool NeedCallback(); | ||||
| void ReleaseInput(int index); | void ReleaseInput(int index); | ||||
| @@ -34,6 +34,8 @@ const int64_t kDynamicImageSizeNum = 2; | |||||
| const size_t kMaxDynamicDimNum = 100; | const size_t kMaxDynamicDimNum = 100; | ||||
| const size_t kMaxNDDimNum = 4; | const size_t kMaxNDDimNum = 4; | ||||
| const size_t kMinNDDimNum = 1; | const size_t kMinNDDimNum = 1; | ||||
| const size_t kSquareBracketsSize = 2; | |||||
| const size_t kRangePairSize = 2; | |||||
| // datatype/formats from user to GE, Unified to util interface file later | // datatype/formats from user to GE, Unified to util interface file later | ||||
| const std::map<std::string, ge::DataType> kOutputTypeSupportDatatype = { | const std::map<std::string, ge::DataType> kOutputTypeSupportDatatype = { | ||||
| {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; | {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; | ||||
| @@ -292,7 +294,8 @@ bool ParseSingleShapeRange(std::string &shape_range, vector<pair<int64_t, int64_ | |||||
| } | } | ||||
| } | } | ||||
| bool is_square_brackets = (square_brackets[0] == '[') && (square_brackets[1] == ']') && (square_brackets.size() == 2); | |||||
| bool is_square_brackets = (square_brackets[0] == '[') && (square_brackets[1] == ']') && | |||||
| (square_brackets.size() == kSquareBracketsSize); | |||||
| if (!is_square_brackets) { | if (!is_square_brackets) { | ||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10048", {"shape_range", "reason", "sample"}, | ErrorManager::GetInstance().ATCReportErrMessage("E10048", {"shape_range", "reason", "sample"}, | ||||
| {shape_range, kInputShapeRangeInvalid, kInputShapeRangeSample2}); | {shape_range, kInputShapeRangeInvalid, kInputShapeRangeSample2}); | ||||
| @@ -320,7 +323,7 @@ bool ParseSingleShapeRange(std::string &shape_range, vector<pair<int64_t, int64_ | |||||
| } else { | } else { | ||||
| range_pair = std::make_pair(range_value, range_value); | range_pair = std::make_pair(range_value, range_value); | ||||
| } | } | ||||
| } else if (range_pair_set.size() == 2) { | |||||
| } else if (range_pair_set.size() == kRangePairSize) { | |||||
| // unknown dim, should get range. | // unknown dim, should get range. | ||||
| long range_left = 0; | long range_left = 0; | ||||
| if (!StringToLongNoThrow(range_pair_set.at(0), range_left)) { | if (!StringToLongNoThrow(range_pair_set.at(0), range_left)) { | ||||
| @@ -332,9 +335,10 @@ bool ParseSingleShapeRange(std::string &shape_range, vector<pair<int64_t, int64_ | |||||
| } | } | ||||
| if (range_left < 0 || (range_right < 0)) { | if (range_left < 0 || (range_right < 0)) { | ||||
| ErrorManager::GetInstance().ATCReportErrMessage("E10048", {"shape_range", "reason", "sample"}, | ErrorManager::GetInstance().ATCReportErrMessage("E10048", {"shape_range", "reason", "sample"}, | ||||
| {shape_range, kInputShapeRangeInvalid, kInputShapeRangeSample3}); | |||||
| {shape_range, kInputShapeRangeInvalid, kInputShapeRangeSample3}); | |||||
| GELOGE(PARAM_INVALID, | GELOGE(PARAM_INVALID, | ||||
| "Parse input parameter [--input_shape_range]'s shape range[%s] failed, reason: %s, correct sample is %s.", | |||||
| "Parse input parameter [--input_shape_range]'s shape range[%s] failed," | |||||
| "reason: %s, correct sample is %s.", | |||||
| shape_range.c_str(), kInputShapeRangeInvalid, kInputShapeRangeSample3); | shape_range.c_str(), kInputShapeRangeInvalid, kInputShapeRangeSample3); | ||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -383,7 +387,7 @@ bool ParseInputShapeRange(const std::string &shape_range, | |||||
| } | } | ||||
| shape_range_map.emplace(make_pair(StringUtils::Trim(shape_range_pair_vec[0]), shape_range_val)); | shape_range_map.emplace(make_pair(StringUtils::Trim(shape_range_pair_vec[0]), shape_range_val)); | ||||
| } | } | ||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -402,7 +406,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i | |||||
| if (param_size == 0) { | if (param_size == 0) { | ||||
| if (!input_shape_range.empty()) { | if (!input_shape_range.empty()) { | ||||
| std::map<string, std::vector<std::pair<int64_t, int64_t>>> shape_range_map; | std::map<string, std::vector<std::pair<int64_t, int64_t>>> shape_range_map; | ||||
| if(!ParseInputShapeRange(input_shape_range, shape_range_map)) { | |||||
| if (!ParseInputShapeRange(input_shape_range, shape_range_map)) { | |||||
| GELOGE(ge::PARAM_INVALID, "Failed to parse input shape range: %s", input_shape_range.c_str()); | GELOGE(ge::PARAM_INVALID, "Failed to parse input shape range: %s", input_shape_range.c_str()); | ||||
| return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
| } | } | ||||
| @@ -793,7 +793,7 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<stri | |||||
| std::string input_shape_range; | std::string input_shape_range; | ||||
| ParseAtcParms(atc_params, INPUT_SHAPE_RANGE, input_shape_range); | ParseAtcParms(atc_params, INPUT_SHAPE_RANGE, input_shape_range); | ||||
| GE_RETURN_WITH_LOG_IF_ERROR(UpdateDynamicInputShapeRange(compute_graph, input_shape_range), | GE_RETURN_WITH_LOG_IF_ERROR(UpdateDynamicInputShapeRange(compute_graph, input_shape_range), | ||||
| "Update input shape range failed"); | |||||
| "Update input shape range failed"); | |||||
| GELOGI("ATC parser success."); | GELOGI("ATC parser success."); | ||||
| @@ -42,9 +42,9 @@ class GE_FUNC_VISIBILITY GeLog { | |||||
| public: | public: | ||||
| static uint64_t GetTid() { | static uint64_t GetTid() { | ||||
| #ifdef __GNUC__ | #ifdef __GNUC__ | ||||
| thread_local static uint64_t tid = static_cast<uint64_t>(syscall(__NR_gettid)); | |||||
| uint64_t tid = static_cast<uint64_t>(syscall(__NR_gettid)); | |||||
| #else | #else | ||||
| thread_local static uint64_t tid = static_cast<uint64_t>(GetCurrentThreadId()); | |||||
| uint64_t tid = static_cast<uint64_t>(GetCurrentThreadId()); | |||||
| #endif | #endif | ||||
| return tid; | return tid; | ||||
| } | } | ||||
| @@ -52,7 +52,7 @@ class GE_FUNC_VISIBILITY StringUtils { | |||||
| return s; | return s; | ||||
| } | } | ||||
| // lint -esym(551,*) | // lint -esym(551,*) | ||||
| static std::string &Rtrim(std::string &s) { /*lint !e618*/ | |||||
| static std::string &Rtrim(std::string &s) { /*lint !e618*/ | |||||
| #if __cplusplus >= 201103L | #if __cplusplus >= 201103L | ||||
| (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return !std::isspace(c); })); | (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return !std::isspace(c); })); | ||||
| #else | #else | ||||
| @@ -76,8 +76,8 @@ class GE_FUNC_VISIBILITY StringUtils { | |||||
| /// @param [in] delim separator | /// @param [in] delim separator | ||||
| /// @return string array after segmentation | /// @return string array after segmentation | ||||
| /// | /// | ||||
| static std::vector<std::string> Split(const std::string &str, char delim) { | |||||
| std::vector<std::string> elems; | |||||
| static std::vector<std::string> Split(const std::string &str, char delim) { /*lint !e1077*/ | |||||
| std::vector<std::string> elems; /*lint !e1077*/ | |||||
| if (str.empty()) { | if (str.empty()) { | ||||
| elems.emplace_back(""); | elems.emplace_back(""); | ||||
| @@ -1 +1 @@ | |||||
| Subproject commit 0c4602a4615a9368b06633a5087e2114518f29ca | |||||
| Subproject commit 8cf3c51d53a9f4ebd6d601a2383f62788e3b8176 | |||||
| @@ -52,34 +52,34 @@ TEST_F(UtestFormatTransfer, build_unsupported_transfer) { | |||||
| EXPECT_EQ(transfer2, nullptr); | EXPECT_EQ(transfer2, nullptr); | ||||
| } | } | ||||
| // TEST_F(UtestFormatTransfer, get_size_by_data_type) { | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_FLOAT), 4); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_FLOAT16), 2); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_INT8), 1); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_INT16), 2); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_UINT16), 2); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_UINT8), 1); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_INT32), 4); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_INT64), 8); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_UINT32), 4); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_UINT64), 8); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_BOOL), 1); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_DOUBLE), 8); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_STRING), -1); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_DUAL_SUB_INT8), 1); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_DUAL_SUB_UINT8), 1); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_COMPLEX64), 8); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_COMPLEX128), 16); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_QINT8), 1); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_QINT16), 2); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_QINT32), 4); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_QUINT8), 1); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_QUINT16), 2); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_RESOURCE), -1); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_STRING_REF), -1); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_DUAL), 5); | |||||
| // EXPECT_EQ(GetSizeByDataType(DT_UNDEFINED), -1); | |||||
| // EXPECT_EQ(DT_UNDEFINED, 27); | |||||
| // } | |||||
| TEST_F(UtestFormatTransfer, get_size_by_data_type) { | |||||
| EXPECT_EQ(GetSizeByDataType(DT_FLOAT), 4); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_FLOAT16), 2); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_INT8), 1); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_INT16), 2); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_UINT16), 2); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_UINT8), 1); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_INT32), 4); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_INT64), 8); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_UINT32), 4); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_UINT64), 8); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_BOOL), 1); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_DOUBLE), 8); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_STRING), -1); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_DUAL_SUB_INT8), 1); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_DUAL_SUB_UINT8), 1); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_COMPLEX64), 8); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_COMPLEX128), 16); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_QINT8), 1); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_QINT16), 2); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_QINT32), 4); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_QUINT8), 1); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_QUINT16), 2); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_RESOURCE), 8); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_STRING_REF), -1); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_DUAL), 5); | |||||
| EXPECT_EQ(GetSizeByDataType(DT_UNDEFINED), -1); | |||||
| EXPECT_EQ(DT_UNDEFINED, 28); | |||||
| } | |||||
| } // namespace formats | } // namespace formats | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -141,6 +141,12 @@ TEST_F(UtestDavinciModel, init_success) { | |||||
| ProfilingManager::Instance().is_load_profiling_ = false; | ProfilingManager::Instance().is_load_profiling_ = false; | ||||
| } | } | ||||
| TEST_F(UtestDavinciModel, CheckCapability) { | |||||
| DavinciModel model(0, nullptr); | |||||
| bool is_support = false; | |||||
| (void)model.CheckCapability(FEATURE_TYPE_MEMORY, MEMORY_INFO_TS_4G_LIMITED, is_support); | |||||
| } | |||||
| TEST_F(UtestDavinciModel, init_data_op) { | TEST_F(UtestDavinciModel, init_data_op) { | ||||
| DavinciModel model(0, nullptr); | DavinciModel model(0, nullptr); | ||||
| model.ge_model_ = make_shared<GeModel>(); | model.ge_model_ = make_shared<GeModel>(); | ||||
| @@ -67,4 +67,22 @@ TEST_F(UtestModelUtils, get_var_addr_rdma_hbm) { | |||||
| EXPECT_EQ(reinterpret_cast<uint8_t *>(offset), var_addr); | EXPECT_EQ(reinterpret_cast<uint8_t *>(offset), var_addr); | ||||
| VarManager::Instance(runtime_param.session_id)->Destory(); | VarManager::Instance(runtime_param.session_id)->Destory(); | ||||
| } | } | ||||
| TEST_F(UtestModelUtils, get_var_addr_rdma_hbm_negative_offset) { | |||||
| uint8_t test = 2; | |||||
| uint8_t *pf = &test; | |||||
| RuntimeParam runtime_param; | |||||
| runtime_param.session_id = 0; | |||||
| runtime_param.logic_var_base = 0; | |||||
| runtime_param.var_base = pf; | |||||
| int64_t offset = -1; | |||||
| EXPECT_EQ(VarManager::Instance(runtime_param.session_id)->Init(0, 0, 0, 0), SUCCESS); | |||||
| EXPECT_NE(VarManager::Instance(runtime_param.session_id)->var_resource_, nullptr); | |||||
| VarManager::Instance(runtime_param.session_id)->var_resource_->var_offset_map_[offset] = RT_MEMORY_RDMA_HBM; | |||||
| std::shared_ptr<OpDesc> op_desc = std::make_shared<OpDesc>("test", "test"); | |||||
| uint8_t *var_addr = nullptr; | |||||
| EXPECT_NE(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, var_addr), SUCCESS); | |||||
| VarManager::Instance(runtime_param.session_id)->Destory(); | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -256,3 +256,77 @@ TEST_F(UtestGeHybrid, init_weight_success) { | |||||
| HybridModelExecutor executor(model_ptr, device_id, stream); | HybridModelExecutor executor(model_ptr, device_id, stream); | ||||
| executor.Init(); | executor.Init(); | ||||
| } | } | ||||
| TEST_F(UtestGeHybrid, unfold_subgraphs_success) { | |||||
| ComputeGraphPtr merged_graph = nullptr; | |||||
| ComputeGraphPtr sub_sub_graph1 = std::make_shared<ComputeGraph>("while_cond"); | |||||
| OpDescPtr sub_sub_graph_while_cond_data_op_desc = CreateOpDesc("cond_data", DATA); | |||||
| NodePtr sub_sub_graph_while_cond_data_node = sub_sub_graph1->AddNode(sub_sub_graph_while_cond_data_op_desc); | |||||
| ComputeGraphPtr sub_sub_graph2 = std::make_shared<ComputeGraph>("while_body"); | |||||
| /*OpDescPtr sub_sub_graph_while_body_const_op_desc = CreateOpDesc("body_const", CONSTANT); | |||||
| NodePtr sub_sub_graph_while_body_const_node = sub_sub_graph2->AddNode(sub_sub_graph_while_body_const_op_desc);*/ | |||||
| OpDescPtr sub_sub_graph_while_body_data_op_desc = CreateOpDesc("body_data", DATA); | |||||
| NodePtr sub_sub_graph_while_body_data_node = sub_sub_graph2->AddNode(sub_sub_graph_while_body_data_op_desc); | |||||
| sub_sub_graph2->SetGraphUnknownFlag(true); | |||||
| /*OpDescPtr sub_sub_graph_while_body_add_op_desc = CreateOpDesc("body_add", ADD); | |||||
| NodePtr sub_sub_graph_while_body_add_node = sub_sub_graph2->AddNode(sub_sub_graph_while_body_add_node); | |||||
| sub_sub_graph_while_body_add_node->AddLinkFrom(sub_sub_graph_while_body_data_node); | |||||
| sub_sub_graph_while_body_add_node->AddLinkFrom(sub_sub_graph_while_body_const_node);*/ | |||||
| ComputeGraphPtr sub_graph = std::make_shared<ComputeGraph>("sub_graph"); | |||||
| OpDescPtr sub_graph_while_op_desc = CreateOpDesc("while", WHILE); | |||||
| NodePtr sub_graph_while_node = sub_graph->AddNode(sub_graph_while_op_desc); | |||||
| sub_graph->SetGraphUnknownFlag(true); | |||||
| sub_graph_while_node->GetOpDesc()->AddSubgraphName("while_cond"); | |||||
| sub_graph_while_node->GetOpDesc()->AddSubgraphName("while_body"); | |||||
| sub_graph_while_node->GetOpDesc()->SetSubgraphInstanceName(0, "while_cond"); | |||||
| sub_graph_while_node->GetOpDesc()->SetSubgraphInstanceName(1, "while_body"); | |||||
| ComputeGraphPtr root_graph = std::make_shared<ComputeGraph>("root_graph"); | |||||
| auto partitioned_call_op_desc = MakeShared<OpDesc>("partitioned_call", PARTITIONEDCALL); | |||||
| auto partitioned_call_node = root_graph->AddNode(partitioned_call_op_desc); | |||||
| partitioned_call_node->GetOpDesc()->AddSubgraphName("sub_graph"); | |||||
| partitioned_call_node->GetOpDesc()->SetSubgraphInstanceName(0, "sub_graph"); | |||||
| root_graph->AddSubGraph(sub_sub_graph1); | |||||
| root_graph->AddSubGraph(sub_sub_graph2); | |||||
| sub_sub_graph1->SetParentGraph(root_graph); | |||||
| sub_sub_graph2->SetParentGraph(root_graph); | |||||
| sub_sub_graph1->SetParentNode(sub_graph_while_node); | |||||
| sub_sub_graph2->SetParentNode(sub_graph_while_node); | |||||
| root_graph->AddSubGraph(sub_graph); | |||||
| sub_graph->SetParentNode(partitioned_call_node); | |||||
| sub_graph->SetParentGraph(root_graph); | |||||
| GeRootModelPtr root_model = MakeShared<ge::GeRootModel>(root_graph); | |||||
| HybridModel hybrid_model(root_model); | |||||
| HybridModelBuilder hybrid_model_builder(hybrid_model); | |||||
| // subgraph num before unfold: 1 | |||||
| EXPECT_EQ(root_graph->GetAllSubgraphs().size(), 3); | |||||
| // num of nodes in root_graph before unfold: 1, name: partitioned_call | |||||
| EXPECT_EQ(root_graph->GetDirectNodesSize(), 1); | |||||
| EXPECT_EQ(root_graph->GetDirectNode().at(0)->GetName(), "partitioned_call"); | |||||
| // two sub_sub_graphs: while cond & while body, their parent graph is "subgraph" before unfold | |||||
| EXPECT_EQ(sub_sub_graph1->GetParentGraph()->GetName(), "root_graph"); | |||||
| EXPECT_EQ(sub_sub_graph1->GetParentGraph()->GetName(), "root_graph"); | |||||
| // node "cond_data" & "body_data" has owner compute graph "subgraph" before unfold | |||||
| EXPECT_EQ(sub_graph_while_node->GetOwnerComputeGraph()->GetName(), "sub_graph"); | |||||
| // unfold success | |||||
| EXPECT_EQ(hybrid_model_builder.UnfoldSubgraphs(root_graph, merged_graph), SUCCESS); | |||||
| // subgraph num after unfold: 0 | |||||
| EXPECT_EQ(merged_graph->GetAllSubgraphs().size(), 2); | |||||
| // num of nodes in MergedGraph after unfold: 1, name: while | |||||
| EXPECT_EQ(merged_graph->GetDirectNodesSize(), 1); | |||||
| EXPECT_EQ(merged_graph->GetDirectNode().at(0)->GetName(), "while"); | |||||
| // two sub_sub_graphs: while cond & while body, their parent graph is "MergedGraph" after unfold | |||||
| EXPECT_EQ(sub_sub_graph1->GetParentGraph()->GetName(), "MergedGraph" ); | |||||
| EXPECT_EQ(sub_sub_graph1->GetParentGraph()->GetName(), "MergedGraph"); | |||||
| // node "cond_data" & "body_data" has owner compute graph "MergedGraph" before unfold | |||||
| EXPECT_EQ(sub_graph_while_node->GetOwnerComputeGraph()->GetName(), "MergedGraph"); | |||||
| } | |||||