| @@ -49,18 +49,6 @@ static int64_t Lcm(int64_t a, int64_t b) { | |||
| int64_t temp = (a * b) / (Measure(a, b)); | |||
| return temp; | |||
| } | |||
| // get the result of two number divisor and let result round up | |||
| static int64_t DivCeil(int64_t a, int64_t b) { | |||
| if (b == 0) { | |||
| return -1; | |||
| } else { | |||
| int64_t ret = a / b; | |||
| if ((a % b) != 0) { | |||
| ret++; | |||
| } | |||
| return ret; | |||
| } | |||
| } | |||
| Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_type) > 0 ? SUCCESS : UNSUPPORTED; } | |||
| @@ -94,22 +82,22 @@ Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_ | |||
| return SUCCESS; | |||
| } | |||
| Status TransShapeToFzWithGroups(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector<int64_t> &dst_shape | |||
| , int64_t groups) { | |||
| Status TransShapeToFzWithGroups(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector<int64_t> &dst_shape, | |||
| int64_t groups) { | |||
| auto c0 = GetCubeSizeByDataType(data_type); | |||
| if (c0 < 0) { | |||
| return ACL_ERROR_GE_DATATYPE_INVALID; | |||
| } | |||
| int64_t cin_ori = c; | |||
| int64_t cout_ori = n / groups; | |||
| int64_t cube_k = data_type == DT_INT8 ? 32 : 16; | |||
| int64_t cube_k = GetCubeSizeByDataType(data_type); | |||
| int64_t e_mult = std::min( | |||
| Lcm(Lcm(cin_ori, cube_k) / (cin_ori), Lcm(cout_ori, kCubeN) / (cout_ori)), | |||
| groups); | |||
| int64_t cin_opt = DivCeil(e_mult * cin_ori, cube_k) * cube_k; | |||
| int64_t cin_opt = Ceil(e_mult * cin_ori, cube_k) * cube_k; | |||
| int64_t c1_dim = cin_opt / cube_k; | |||
| int64_t g_dim = DivCeil(groups, e_mult); | |||
| auto n1 = DivCeil(cout_ori * e_mult, kCubeN); | |||
| int64_t g_dim = Ceil(groups, e_mult); | |||
| auto n1 = Ceil(cout_ori * e_mult, kCubeN); | |||
| dst_shape.clear(); | |||
| dst_shape.push_back(g_dim * c1_dim * h * w); | |||
| dst_shape.push_back(n1); | |||
| @@ -274,24 +262,21 @@ Status TransFormatHwcnToFzWithGroups(const TransArgs &args, TransResult &result, | |||
| int64_t cin_ori = c_dim; | |||
| int64_t cout_ori = n_dim / groups; | |||
| if (cin_ori == 0 || cout_ori == 0) { | |||
| GELOGE(GRAPH_FAILED, | |||
| "Cin_ori, cout_ori must not be equal 0, " | |||
| "and current cin_ori, cout_ori, groups are %ld %ld %ld", | |||
| cin_ori, cout_ori, groups); | |||
| GELOGE(GRAPH_FAILED, "Cin_ori, cout_ori must not be equal 0, and current cin_ori, cout_ori," | |||
| "groups are %ld %ld %ld",cin_ori, cout_ori, groups); | |||
| return GRAPH_FAILED; | |||
| } | |||
| const int64_t cube_k = args.src_data_type == DT_INT8 ? 32 : 16; | |||
| const int64_t cube_k = GetCubeSizeByDataType(data_type); | |||
| int64_t e_mult = std::min( | |||
| Lcm(Lcm(cin_ori, cube_k) / (cin_ori), Lcm(cout_ori, kCubeN) / (cout_ori)), | |||
| groups); | |||
| int64_t cin_opt = DivCeil(e_mult * cin_ori, cube_k) * cube_k; | |||
| int64_t cout_opt = DivCeil(e_mult * cout_ori, kCubeN) * kCubeN; | |||
| int64_t cin_opt = Ceil(e_mult * cin_ori, cube_k) * cube_k; | |||
| int64_t cout_opt = Ceil(e_mult * cout_ori, kCubeN) * kCubeN; | |||
| int64_t c1_dim = cin_opt / cube_k; | |||
| int64_t g_dim = DivCeil(groups, e_mult); | |||
| int64_t g_dim = Ceil(groups, e_mult); | |||
| int64_t dim_cin = cin_opt / cube_k; | |||
| int64_t data_size = GetSizeByDataType(args.src_data_type); | |||
| int64_t size_output_data = | |||
| g_dim * kDim * dim_cin * h_dim * w_dim * cout_opt * cube_k * data_size; | |||
| int64_t size_output_data = g_dim * kDim * dim_cin * h_dim * w_dim * cout_opt * cube_k * data_size; | |||
| GE_CHK_BOOL_EXEC_NOLOG(size_output_data != 0, result.length = static_cast<size_t>(size_output_data); | |||
| return SUCCESS;); | |||
| errno_t ret = EOK; | |||
| @@ -302,7 +287,7 @@ Status TransFormatHwcnToFzWithGroups(const TransArgs &args, TransResult &result, | |||
| TypeUtils::FormatToSerialString(args.src_format).c_str(), | |||
| TypeUtils::FormatToSerialString(args.dst_format).c_str(), size_output_data); | |||
| return ACL_ERROR_GE_MEMORY_ALLOCATION;); | |||
| ret = memset_s(dst.get(), size_output_data, 0, size_output_data); | |||
| ret = memset_s(dst.get(), static_cast<size_t>(size_output_data), 0, static_cast<size_t>(size_output_data)); | |||
| if (ret != EOK) { | |||
| GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory, ret is %d", ret); | |||
| return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||