| @@ -49,18 +49,6 @@ static int64_t Lcm(int64_t a, int64_t b) { | |||||
| int64_t temp = (a * b) / (Measure(a, b)); | int64_t temp = (a * b) / (Measure(a, b)); | ||||
| return temp; | return temp; | ||||
| } | } | ||||
| // get the result of two number divisor and let result round up | |||||
| static int64_t DivCeil(int64_t a, int64_t b) { | |||||
| if (b == 0) { | |||||
| return -1; | |||||
| } else { | |||||
| int64_t ret = a / b; | |||||
| if ((a % b) != 0) { | |||||
| ret++; | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_type) > 0 ? SUCCESS : UNSUPPORTED; } | Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_type) > 0 ? SUCCESS : UNSUPPORTED; } | ||||
| @@ -94,22 +82,22 @@ Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_ | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status TransShapeToFzWithGroups(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector<int64_t> &dst_shape | |||||
| , int64_t groups) { | |||||
| Status TransShapeToFzWithGroups(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector<int64_t> &dst_shape, | |||||
| int64_t groups) { | |||||
| auto c0 = GetCubeSizeByDataType(data_type); | auto c0 = GetCubeSizeByDataType(data_type); | ||||
| if (c0 < 0) { | if (c0 < 0) { | ||||
| return ACL_ERROR_GE_DATATYPE_INVALID; | return ACL_ERROR_GE_DATATYPE_INVALID; | ||||
| } | } | ||||
| int64_t cin_ori = c; | int64_t cin_ori = c; | ||||
| int64_t cout_ori = n / groups; | int64_t cout_ori = n / groups; | ||||
| int64_t cube_k = data_type == DT_INT8 ? 32 : 16; | |||||
| int64_t cube_k = GetCubeSizeByDataType(data_type); | |||||
| int64_t e_mult = std::min( | int64_t e_mult = std::min( | ||||
| Lcm(Lcm(cin_ori, cube_k) / (cin_ori), Lcm(cout_ori, kCubeN) / (cout_ori)), | Lcm(Lcm(cin_ori, cube_k) / (cin_ori), Lcm(cout_ori, kCubeN) / (cout_ori)), | ||||
| groups); | groups); | ||||
| int64_t cin_opt = DivCeil(e_mult * cin_ori, cube_k) * cube_k; | |||||
| int64_t cin_opt = Ceil(e_mult * cin_ori, cube_k) * cube_k; | |||||
| int64_t c1_dim = cin_opt / cube_k; | int64_t c1_dim = cin_opt / cube_k; | ||||
| int64_t g_dim = DivCeil(groups, e_mult); | |||||
| auto n1 = DivCeil(cout_ori * e_mult, kCubeN); | |||||
| int64_t g_dim = Ceil(groups, e_mult); | |||||
| auto n1 = Ceil(cout_ori * e_mult, kCubeN); | |||||
| dst_shape.clear(); | dst_shape.clear(); | ||||
| dst_shape.push_back(g_dim * c1_dim * h * w); | dst_shape.push_back(g_dim * c1_dim * h * w); | ||||
| dst_shape.push_back(n1); | dst_shape.push_back(n1); | ||||
| @@ -274,24 +262,21 @@ Status TransFormatHwcnToFzWithGroups(const TransArgs &args, TransResult &result, | |||||
| int64_t cin_ori = c_dim; | int64_t cin_ori = c_dim; | ||||
| int64_t cout_ori = n_dim / groups; | int64_t cout_ori = n_dim / groups; | ||||
| if (cin_ori == 0 || cout_ori == 0) { | if (cin_ori == 0 || cout_ori == 0) { | ||||
| GELOGE(GRAPH_FAILED, | |||||
| "Cin_ori, cout_ori must not be equal 0, " | |||||
| "and current cin_ori, cout_ori, groups are %ld %ld %ld", | |||||
| cin_ori, cout_ori, groups); | |||||
| GELOGE(GRAPH_FAILED, "Cin_ori, cout_ori must not be equal 0, and current cin_ori, cout_ori," | |||||
| "groups are %ld %ld %ld",cin_ori, cout_ori, groups); | |||||
| return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
| } | } | ||||
| const int64_t cube_k = args.src_data_type == DT_INT8 ? 32 : 16; | |||||
| const int64_t cube_k = GetCubeSizeByDataType(data_type); | |||||
| int64_t e_mult = std::min( | int64_t e_mult = std::min( | ||||
| Lcm(Lcm(cin_ori, cube_k) / (cin_ori), Lcm(cout_ori, kCubeN) / (cout_ori)), | Lcm(Lcm(cin_ori, cube_k) / (cin_ori), Lcm(cout_ori, kCubeN) / (cout_ori)), | ||||
| groups); | groups); | ||||
| int64_t cin_opt = DivCeil(e_mult * cin_ori, cube_k) * cube_k; | |||||
| int64_t cout_opt = DivCeil(e_mult * cout_ori, kCubeN) * kCubeN; | |||||
| int64_t cin_opt = Ceil(e_mult * cin_ori, cube_k) * cube_k; | |||||
| int64_t cout_opt = Ceil(e_mult * cout_ori, kCubeN) * kCubeN; | |||||
| int64_t c1_dim = cin_opt / cube_k; | int64_t c1_dim = cin_opt / cube_k; | ||||
| int64_t g_dim = DivCeil(groups, e_mult); | |||||
| int64_t g_dim = Ceil(groups, e_mult); | |||||
| int64_t dim_cin = cin_opt / cube_k; | int64_t dim_cin = cin_opt / cube_k; | ||||
| int64_t data_size = GetSizeByDataType(args.src_data_type); | int64_t data_size = GetSizeByDataType(args.src_data_type); | ||||
| int64_t size_output_data = | |||||
| g_dim * kDim * dim_cin * h_dim * w_dim * cout_opt * cube_k * data_size; | |||||
| int64_t size_output_data = g_dim * kDim * dim_cin * h_dim * w_dim * cout_opt * cube_k * data_size; | |||||
| GE_CHK_BOOL_EXEC_NOLOG(size_output_data != 0, result.length = static_cast<size_t>(size_output_data); | GE_CHK_BOOL_EXEC_NOLOG(size_output_data != 0, result.length = static_cast<size_t>(size_output_data); | ||||
| return SUCCESS;); | return SUCCESS;); | ||||
| errno_t ret = EOK; | errno_t ret = EOK; | ||||
| @@ -302,7 +287,7 @@ Status TransFormatHwcnToFzWithGroups(const TransArgs &args, TransResult &result, | |||||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | TypeUtils::FormatToSerialString(args.src_format).c_str(), | ||||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), size_output_data); | TypeUtils::FormatToSerialString(args.dst_format).c_str(), size_output_data); | ||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION;); | return ACL_ERROR_GE_MEMORY_ALLOCATION;); | ||||
| ret = memset_s(dst.get(), size_output_data, 0, size_output_data); | |||||
| ret = memset_s(dst.get(), static_cast<size_t>(size_output_data), 0, static_cast<size_t>(size_output_data)); | |||||
| if (ret != EOK) { | if (ret != EOK) { | ||||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory, ret is %d", ret); | GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory, ret is %d", ret); | ||||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | ||||