|
|
@@ -1,576 +0,0 @@ |
|
|
|
/** |
|
|
|
* Copyright 2019-2020 Huawei Technologies Co., Ltd |
|
|
|
* |
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
|
|
* you may not use this file except in compliance with the License. |
|
|
|
* You may obtain a copy of the License at |
|
|
|
* |
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0 |
|
|
|
* |
|
|
|
* Unless required by applicable law or agreed to in writing, software |
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS, |
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
|
|
* See the License for the specific language governing permissions and |
|
|
|
* limitations under the License. |
|
|
|
*/ |
|
|
|
|
|
|
|
#include <vector> |
|
|
|
#include <cce/cce.h> |
|
|
|
#include <cce/dnn.h> |
|
|
|
#include <cce/compiler_stub.h> |
|
|
|
#include <cce/taskdown_api.h> |
|
|
|
|
|
|
|
#include "cce/optimizer/fusion_engine.h" |
|
|
|
#include "common/op/attr_value_util.h" |
|
|
|
#include "graph/utils/tensor_utils.h" |
|
|
|
#include "graph/utils/graph_utils.h" |
|
|
|
|
|
|
|
using namespace cce; |
|
|
|
using namespace std; |
|
|
|
using namespace ge; |
|
|
|
using namespace fusion; |
|
|
|
|
|
|
|
uint64_t global_mem_base = 0; |
|
|
|
|
|
|
|
namespace cce { |
|
|
|
#define DIM_MAX_SIZE 8 |
|
|
|
static const uint32_t C0 = 16; |
|
|
|
struct tagCcPad {}; |
|
|
|
struct tagCcConvolution {}; |
|
|
|
|
|
|
|
struct tagCcLRN {}; |
|
|
|
|
|
|
|
struct tagCcFasterRcnnProposal {}; |
|
|
|
struct tagCcRoiAlign {}; |
|
|
|
struct tagCcBatchNorm {}; |
|
|
|
struct tagCcDetectpostprocess {}; |
|
|
|
|
|
|
|
struct tagCcSsdDetectionOutput {}; |
|
|
|
|
|
|
|
struct tagCcRefinedetDetectionOutput {}; |
|
|
|
|
|
|
|
struct tagCcMsrGenerateRpnProposals {}; |
|
|
|
|
|
|
|
struct tagCcFilter { |
|
|
|
vector<uint32_t> dims; |
|
|
|
}; |
|
|
|
|
|
|
|
struct tagCcTensor { |
|
|
|
ccTensorFormat_t format; |
|
|
|
ccDataType_t data_type; |
|
|
|
uint32_t dim_cnt; |
|
|
|
int32_t real_dim_cnt; |
|
|
|
uint32_t data_size; |
|
|
|
int32_t dim_buf[DIM_MAX_SIZE]; |
|
|
|
int32_t stride_buf[DIM_MAX_SIZE]; |
|
|
|
}; |
|
|
|
|
|
|
|
typedef struct tagCcPooling { |
|
|
|
ccPoolingMode_t mode; |
|
|
|
ccPaddingMode_t pad_mode; |
|
|
|
ccNanPropagation_t max_pooling_nan_opt; |
|
|
|
uint32_t dim_cnt; |
|
|
|
int32_t window_dim[6]; |
|
|
|
int32_t padding[6]; |
|
|
|
int32_t stride[6]; |
|
|
|
} ccPooling_t; |
|
|
|
|
|
|
|
struct tagCcActivation {}; |
|
|
|
|
|
|
|
struct tagCcFasterRcnnDetectionOutput {}; |
|
|
|
struct tagCcSpatialTransformer {}; |
|
|
|
|
|
|
|
struct tagCcPower {}; |
|
|
|
struct tagCcResizeBilinear {}; |
|
|
|
struct tagCcSsdNormalize {}; |
|
|
|
struct tagCcSsdPostProcessor {}; |
|
|
|
struct tagCcSsdPriorBox {}; |
|
|
|
struct tagCcPsRoiPooling {}; |
|
|
|
|
|
|
|
struct tagMsrFastRcnnPredictions {}; |
|
|
|
struct tagCcPRelu {}; |
|
|
|
struct tagCcStridedSlice {}; |
|
|
|
|
|
|
|
struct tagCcStridedSliceAttrs {}; |
|
|
|
|
|
|
|
struct tagCcRnn {}; |
|
|
|
|
|
|
|
struct tagCcArgmaxmin {}; |
|
|
|
|
|
|
|
typedef struct tagCcLog { |
|
|
|
ccDataType_t data_type; |
|
|
|
uint32_t param_cnt; |
|
|
|
} ccLog_t; |
|
|
|
typedef struct tagCcLog *ccLogDescriptor_t; |
|
|
|
|
|
|
|
struct tagCcPadV2 {}; |
|
|
|
|
|
|
|
ccStatus_t ccGetPadV2OutputDim(const ccTensorDescriptor_t x_desc, const ccPadV2Descriptor_t pad_desc, int32_t *dim_cnt, |
|
|
|
int32_t dim[], int32_t dim_len) { |
|
|
|
*dim_cnt = 4; |
|
|
|
dim[0] = 1; |
|
|
|
dim[1] = 2; |
|
|
|
dim[2] = 2; |
|
|
|
dim[3] = 3; |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccPadV2Forward(ccHandle_t handle, const ccPadV2Descriptor_t pad_desc, const void *alpha, |
|
|
|
const ccTensorDescriptor_t x_desc, const void *x, const void *beta, |
|
|
|
const ccTensorDescriptor_t output_desc, void *output) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccCreatePadV2Descriptor(ccPadV2Descriptor_t *pad_desc) { return CC_STATUS_SUCCESS; } |
|
|
|
|
|
|
|
ccStatus_t ccDestroyPadV2Descriptor(ccPadV2Descriptor_t *pad_desc) { return CC_STATUS_SUCCESS; } |
|
|
|
|
|
|
|
ccStatus_t ccSetKernelOpMap(ccHandle_t handle) { return CC_STATUS_SUCCESS; } |
|
|
|
|
|
|
|
ccStatus_t ccDataDumpForward(ccHandle_t handle, const void *buffer, const uint64_t buf_len, const uint32_t task_index) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccSetPadV2Descriptor(ccPadV2Descriptor_t pad_desc, const int32_t pad_shape_cnt, |
|
|
|
const int32_t pad_shape_low[], const int32_t pad_shape_high[], |
|
|
|
const ccPadMode_t pad_mode, const void *pad_value, const ccDataType_t pad_value_type) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
struct tagCcYoloDetectionOutput { |
|
|
|
ccYoloVersion_t yolo_version; |
|
|
|
uint32_t net_h; |
|
|
|
uint32_t net_w; |
|
|
|
uint32_t post_top_k; |
|
|
|
uint32_t classes; |
|
|
|
float nms_threshold; |
|
|
|
float iou_thre_decay; |
|
|
|
float coor_scale_factor; |
|
|
|
bool relative; |
|
|
|
float obj_threshold; |
|
|
|
float cls_threshold; |
|
|
|
uint32_t bias_num; |
|
|
|
float *bias; |
|
|
|
}; |
|
|
|
|
|
|
|
struct tagCcYoloRegion {}; |
|
|
|
|
|
|
|
struct tagCcEltwise {}; |
|
|
|
|
|
|
|
struct tagCcHashTableLookup {}; |
|
|
|
|
|
|
|
struct tagCcEmbeddingAttnDecoder {}; |
|
|
|
struct tagNonMaxSuppression {}; |
|
|
|
|
|
|
|
struct tagCcArcSinCos {}; |
|
|
|
struct tagCcPow {}; |
|
|
|
struct tagCcConcatFive2Four_t {}; |
|
|
|
struct tagCcConcatFour2Five_t {}; |
|
|
|
|
|
|
|
ccStatus_t ccCreatePowDescriptor(ccPowDescriptor_t *pow_desc) { |
|
|
|
*pow_desc = new tagCcPow(); |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccSetPowDescriptor(ccPowDescriptor_t pow_desc, ccDataType_t data_type, uint32_t param_cnt) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccDestroyPowDescriptor(ccPowDescriptor_t *pow_desc) { |
|
|
|
if (nullptr == pow_desc) { |
|
|
|
return CC_STATUS_BAD_PARAM; |
|
|
|
} |
|
|
|
|
|
|
|
delete *pow_desc; |
|
|
|
*pow_desc = 0; |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccPowForward(ccHandle_t handle, const ccPowDescriptor_t pow_desc, const void *pow_param, const void *alpha, |
|
|
|
const ccTensorDescriptor_t x_desc, const void *x, const ccTensorDescriptor_t y_desc, |
|
|
|
const void *y, const void *beta, const ccTensorDescriptor_t z_desc, void *z) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccLogicalOrForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t x_desc, const void *x, |
|
|
|
const ccTensorDescriptor_t y_desc, const void *y, const void *beta, |
|
|
|
const ccTensorDescriptor_t output_desc, void *output) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccCompareForward(ccHandle_t handle, ccCompareType_t compare_type, const void *alpha, |
|
|
|
const ccTensorDescriptor_t x_desc, const void *x, const ccTensorDescriptor_t y_desc, |
|
|
|
const void *y, const void *beta, const ccTensorDescriptor_t output_desc, void *output) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccGetCompareOutputDim(const ccTensorDescriptor_t x_desc, const ccTensorDescriptor_t y_desc, int32_t *dim_cnt, |
|
|
|
int32_t *dim, int32_t dim_len) { |
|
|
|
*dim_cnt = 4; |
|
|
|
dim[0] = 1; |
|
|
|
dim[1] = 1; |
|
|
|
dim[2] = 1; |
|
|
|
dim[3] = 1; |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccArcTanForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t x_desc, const void *x, |
|
|
|
const void *beta, const ccTensorDescriptor_t y_desc, void *y) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccAtanhForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t x_desc, const void *x, |
|
|
|
const void *beta, const ccTensorDescriptor_t y_desc, void *y) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccIsDepthwiseHighPerformance(int32_t input_n, int32_t input_c, int32_t input_h, int32_t input_w, |
|
|
|
int32_t filter_n, int32_t filter_c, int32_t filter_h, int32_t filter_w, |
|
|
|
int32_t dilation_h, int32_t dilation_w, int32_t pad_h_head, int32_t pad_h_tail, |
|
|
|
int32_t pad_w_head, int32_t pad_w_tail, int32_t stride_h, int32_t stride_w, |
|
|
|
int32_t group_num, bool &is_high_performance, bool is_quant, |
|
|
|
ccDataType_t input_data_type, ccDataType_t output_data_type) { |
|
|
|
is_high_performance = true; |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
struct tagCcSpaceToBatch {}; |
|
|
|
|
|
|
|
struct tagCcBatchToSpace {}; |
|
|
|
|
|
|
|
struct tagCcResizeNearestNeighbor {}; |
|
|
|
|
|
|
|
ccStatus_t ccGetStream(ccHandle_t handle, rtStream_t *stream_id) { return CC_STATUS_SUCCESS; } |
|
|
|
|
|
|
|
ccStatus_t ccGetRtVersion(uint32_t *count) { return CC_STATUS_SUCCESS; } |
|
|
|
|
|
|
|
ccStatus_t ccDestroyTensorDescriptor(ccTensorDescriptor_t *tensor_desc) { |
|
|
|
if (nullptr == tensor_desc) { |
|
|
|
return CC_STATUS_BAD_PARAM; |
|
|
|
} |
|
|
|
delete *tensor_desc; |
|
|
|
*tensor_desc = 0; |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
ccStatus_t ccDestroyFilterDescriptor(ccFilterDescriptor_t *filter_desc) { |
|
|
|
delete *filter_desc; |
|
|
|
*filter_desc = 0; |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccGetFilterSizeInBytes(const ccFilterDescriptor_t filter_desc, uint32_t *size) { |
|
|
|
*size = filter_desc->dims[0] * filter_desc->dims[1] * filter_desc->dims[2] * filter_desc->dims[3] * sizeof(float); |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccTransFilter(const ccFilterDescriptor_t w_desc, const void *w, ccFilterDescriptor_t y_desc, void *y, |
|
|
|
uint32_t y_size_in_bytes) { |
|
|
|
y = const_cast<void *>(w); |
|
|
|
|
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccCreateTensorDescriptor(ccTensorDescriptor_t *tensor_desc) { |
|
|
|
*tensor_desc = new tagCcTensor(); |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccSetTensor4dDescriptor(ccTensorDescriptor_t tensor_desc, ccTensorFormat_t format, ccDataType_t data_type, |
|
|
|
int32_t n, int32_t c, int32_t h, int32_t w) { |
|
|
|
if (CC_TENSOR_NHWC == format) { |
|
|
|
tensor_desc->dim_buf[0] = n; |
|
|
|
tensor_desc->dim_buf[1] = h; |
|
|
|
tensor_desc->dim_buf[2] = w; |
|
|
|
tensor_desc->dim_buf[3] = c; |
|
|
|
} else { |
|
|
|
tensor_desc->dim_buf[0] = n; |
|
|
|
tensor_desc->dim_buf[1] = c; |
|
|
|
tensor_desc->dim_buf[2] = h; |
|
|
|
tensor_desc->dim_buf[3] = w; |
|
|
|
} |
|
|
|
tensor_desc->dim_cnt = 4; |
|
|
|
tensor_desc->data_type = data_type; |
|
|
|
tensor_desc->format = format; |
|
|
|
tensor_desc->data_size = n * c * h * w * sizeof(data_type); |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
ccStatus_t ccGetTensorSizeInBytes(const ccTensorDescriptor_t tensor_desc, uint32_t *size) { |
|
|
|
if ((NULL == tensor_desc) || (NULL == size)) { |
|
|
|
return CC_STATUS_BAD_PARAM; |
|
|
|
} |
|
|
|
*size = tensor_desc->data_size; |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccGetTensorMemorySizeInBytes(const ccTensorDescriptor_t tensor_desc, uint32_t *size) { |
|
|
|
*size = tensor_desc->data_size; |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccCreateFilterDescriptor(ccFilterDescriptor_t *filter_desc) { |
|
|
|
*filter_desc = new tagCcFilter(); |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccSetFilter4dDescriptor(ccFilterDescriptor_t filter_desc, ccTensorFormat_t format, ccDataType_t data_type, |
|
|
|
int32_t k, int32_t c, int32_t h, int32_t w) { |
|
|
|
filter_desc->dims.push_back(k); |
|
|
|
filter_desc->dims.push_back(c); |
|
|
|
filter_desc->dims.push_back(h); |
|
|
|
filter_desc->dims.push_back(w); |
|
|
|
|
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccSetFilterFractalDescriptor(ccFilterDescriptor_t filter_desc, ccTensorFormat_t format, |
|
|
|
ccDataType_t data_type, int32_t k, int32_t c, int32_t h, int32_t w) { |
|
|
|
filter_desc->dims.push_back(k); |
|
|
|
filter_desc->dims.push_back(c); |
|
|
|
filter_desc->dims.push_back(h); |
|
|
|
filter_desc->dims.push_back(w); |
|
|
|
|
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccSetStream(ccHandle_t handle, rtStream_t stream_id) { return CC_STATUS_SUCCESS; } |
|
|
|
ccStatus_t ccCreatePoolingMaskDescriptor(ccTensorDescriptor_t *pooling_mask_desc) { |
|
|
|
*pooling_mask_desc = new tagCcTensor(); |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccSetPoolingMaskTensorDescriptor(ccTensorDescriptor_t tensor_desc, ccTensorFormat_t format, |
|
|
|
ccDataType_t data_type, int32_t n, int32_t c, int32_t h, int32_t w, |
|
|
|
int32_t window_h, int32_t window_w) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccSetFilter6dDescriptor(ccTensorDescriptor_t filter_desc, ccTensorFormat_t format, ccDataType_t data_type, |
|
|
|
int32_t c1, int32_t h, int32_t w, int32_t n, int32_t co, int32_t c0) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
/// @ingroup dnn |
|
|
|
/// @brief get the format and dimcnt of GeTensor |
|
|
|
/// @param [in] tensor_desc descriptor of tensor |
|
|
|
/// @param [in|out] format point to format |
|
|
|
/// @return ccStatus_t |
|
|
|
ccStatus_t ccGetTensorFormat(const ccTensorDescriptor_t tensor_desc, ccTensorFormat_t *format) { |
|
|
|
*format = tensor_desc->format; |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccTransTensor(const ccTensorDescriptor_t x_desc, const void *x, const ccTensorDescriptor_t y_desc, void *y, |
|
|
|
uint32_t y_size_in_bytes) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
void cceSysInit() {} |
|
|
|
|
|
|
|
bool compilerStubFree() { return true; } |
|
|
|
|
|
|
|
bool compilerStubInit() { return true; } |
|
|
|
|
|
|
|
ccStatus_t ccSetInt8Filter4dDescriptor(ccFilterDescriptor_t filter_desc, ccTensorFormat_t format, |
|
|
|
ccDataType_t data_type, int32_t k, int32_t c, int32_t h, int32_t w, |
|
|
|
ccDataType_t output_data_type) { |
|
|
|
filter_desc->dims.push_back(k); |
|
|
|
filter_desc->dims.push_back(c); |
|
|
|
filter_desc->dims.push_back(h); |
|
|
|
filter_desc->dims.push_back(w); |
|
|
|
|
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
ccStatus_t ccSetTensorNdDescriptor(ccTensorDescriptor_t tensor_desc, ccDataType_t data_type, int32_t dim_cnt, |
|
|
|
int32_t dimA[]) { |
|
|
|
tensor_desc->data_type = data_type; |
|
|
|
tensor_desc->data_size = sizeof(data_type); |
|
|
|
for (int32_t i = 0; i < dim_cnt; i++) { |
|
|
|
tensor_desc->data_size = tensor_desc->data_size * dimA[i]; |
|
|
|
} |
|
|
|
tensor_desc->format = CC_TENSOR_ND; |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t CceProfilingConfig(const char *target, const char *job_ctx, uint32_t flag) { return CC_STATUS_SUCCESS; } |
|
|
|
ccStatus_t ccSetTensorRealDimCnt(ccTensorDescriptor_t tensor_desc, int32_t real_dim_cnt) { |
|
|
|
if (tensor_desc != NULL && tensor_desc != nullptr) { |
|
|
|
tensor_desc->real_dim_cnt = real_dim_cnt; |
|
|
|
} |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccGetTensorRealDimCnt(ccTensorDescriptor_t tensor_desc, int32_t *real_dim_cnt) { |
|
|
|
*real_dim_cnt = tensor_desc->real_dim_cnt; |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccSetQuantizeFactors(ccQuantizeDescriptor_t quantize_info, ccScaleValueMode_t scale_val_mode, |
|
|
|
const uint16_t *scale, const uint16_t *offset, const uint8_t *offset_pad) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccSetReQuantizeFactors(ccQuantizeDescriptor_t quantize_info, ccScaleValueMode_t scale_val_mode, |
|
|
|
const uint16_t *scale_rq, const uint16_t *next_layer_offset, |
|
|
|
const int32_t *offset_w) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccSetDeQuantizeFactors(ccQuantizeDescriptor_t quantize_info, ccScaleValueMode_t scale_val_mode, |
|
|
|
const uint16_t *scale_dq, const int32_t *offset_w) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccSetQuantizeAlgoAndScaleType(ccQuantizeDescriptor_t quantize_info, ccQuantizeAlgo_t quant_algo, |
|
|
|
ccScaleType_t scale_type, bool relu_flag) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
ccStatus_t ccPrintTimeStat() { return CC_STATUS_SUCCESS; } |
|
|
|
ccStatus_t ccSetModelId(ccHandle_t handle, uint32_t model_id) { return CC_STATUS_SUCCESS; } |
|
|
|
|
|
|
|
ccStatus_t ccGetKernelContext(rtStream_t stream_id, ccOpContext &op_context) { |
|
|
|
if (stream_id == nullptr) { |
|
|
|
op_context.kernelType = ccKernelType::TE; |
|
|
|
} else { |
|
|
|
op_context.kernelType = ccKernelType::CCE_AI_CORE; |
|
|
|
op_context.opId = 1; |
|
|
|
op_context.kernelFuncId = 1; |
|
|
|
op_context.isFlowtable = true; |
|
|
|
op_context.opCount = 1; |
|
|
|
op_context.opIndex2[0] = 0; |
|
|
|
} |
|
|
|
|
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccUpdateKernelArgs(ccOpContext &op_context, uint64_t data_base_addr, uint64_t weight_base_addr, |
|
|
|
uint64_t variable_base_addr, void *args_addr, uint64_t args_size, void *l2ctrl_addr) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
ccStatus_t ccGetKernelArgsAddrs(ccOpContext &op_context, void *args_addr, uint64_t args_size, void *l2ctrl_addr, |
|
|
|
std::vector<ccOpAddrsInfo> &op_addrs_info) { |
|
|
|
// cce |
|
|
|
ccOpAddrsInfo tmp_op_addrs_info; |
|
|
|
uint64_t tmp_input = (uint64_t)global_mem_base; |
|
|
|
tmp_op_addrs_info.addrPos = &tmp_input; |
|
|
|
tmp_op_addrs_info.addrData = tmp_input; |
|
|
|
op_addrs_info.push_back(tmp_op_addrs_info); |
|
|
|
|
|
|
|
uint64_t tmp_output = (uint64_t)(global_mem_base + 5476352); |
|
|
|
tmp_op_addrs_info.addrPos = &tmp_output; |
|
|
|
tmp_op_addrs_info.addrData = tmp_output; |
|
|
|
op_addrs_info.push_back(tmp_op_addrs_info); |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccSetKernelArgs(std::vector<ccOpAddrsInfo> &date_info) { return CC_STATUS_SUCCESS; } |
|
|
|
} // namespace cce |
|
|
|
// ccFusion no namespace |
|
|
|
ccStatus_t ccFusionStart(ccHandle_t handle, uint32_t graph_id, uint32_t init_flag, CceFusionMemCfg_t mem_cfg) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
//???ccFusion ????namespace cce?? |
|
|
|
ccStatus_t ccFusionStart(ccHandle_t handle, uint32_t graph_id, uint32_t init_flag, uint32_t addr_change_flag) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t ccFusionEnd(ccHandle_t handle, uint32_t graph_id) { return CC_STATUS_SUCCESS; } |
|
|
|
|
|
|
|
ccStatus_t ccFusionTaskEnd(ccHandle_t handle, uint32_t graph_id) { return CC_STATUS_SUCCESS; } |
|
|
|
|
|
|
|
ccStatus_t ccKernelLaunchRepeat(ccHandle_t handle) { return CC_STATUS_SUCCESS; } |
|
|
|
|
|
|
|
ccStatus_t ccKernelDelete(ccHandle_t handle) { return CC_STATUS_SUCCESS; } |
|
|
|
|
|
|
|
ccStatus_t cce::ccSetTensorFormat(cce::tagCcTensor *, cce::tagCcTensorFormat) { return CC_STATUS_SUCCESS; } |
|
|
|
|
|
|
|
namespace fusion { |
|
|
|
uint32_t BufferFusion(std::shared_ptr<ge::ComputeGraph>, std::shared_ptr<ge::ComputeGraph>, bool) { return 0; } |
|
|
|
|
|
|
|
uint32_t BufferFusionTrain(std::shared_ptr<ge::ComputeGraph>, std::shared_ptr<ge::ComputeGraph>) { return 0; } |
|
|
|
|
|
|
|
uint32_t GraphFusionTrain(ge::ComputeGraphPtr orig_graph, ge::ComputeGraphPtr fusion_graph) { return 0; } |
|
|
|
} // namespace fusion |
|
|
|
namespace fusion { |
|
|
|
using namespace ge; |
|
|
|
|
|
|
|
uint32_t Fusion(ComputeGraphPtr model_graph, ComputeGraphPtr fusion_graph, kScopeNodeMap_t &te_fusion_map) { |
|
|
|
OpDescPtr op_def_a = std::make_shared<OpDesc>(); |
|
|
|
op_def_a->SetName("reduction_nd"); |
|
|
|
op_def_a->SetType("reduction_nd"); |
|
|
|
|
|
|
|
GeTensorDescPtr v_input_desc = std::make_shared<GeTensorDesc>(); |
|
|
|
op_def_a->AddInputDesc(*v_input_desc); |
|
|
|
|
|
|
|
vector<int64_t> v_input; |
|
|
|
v_input.push_back(0); |
|
|
|
op_def_a->SetInputOffset(v_input); |
|
|
|
|
|
|
|
GeTensorDesc input_desc = op_def_a->GetInputDesc(0); |
|
|
|
input_desc.SetFormat(FORMAT_NCHW); |
|
|
|
input_desc.SetDataType(DT_FLOAT); |
|
|
|
input_desc.SetShape(GeShape({1, 3, 5, 5})); |
|
|
|
ge::TensorUtils::SetSize(input_desc, 192); |
|
|
|
ge::TensorUtils::SetRealDimCnt(input_desc, 4); |
|
|
|
|
|
|
|
GeTensorDescPtr output_desc = std::make_shared<GeTensorDesc>(); |
|
|
|
op_def_a->AddOutputDesc(*output_desc); |
|
|
|
|
|
|
|
output_desc->SetFormat(FORMAT_NCHW); |
|
|
|
output_desc->SetDataType(DT_FLOAT); |
|
|
|
output_desc->SetShape(GeShape({1, 3, 5})); |
|
|
|
ge::TensorUtils::SetSize(*output_desc, 96); |
|
|
|
ge::TensorUtils::SetRealDimCnt(*output_desc, 3); |
|
|
|
|
|
|
|
OpDescPtr op_def_b = std::make_shared<OpDesc>(); |
|
|
|
op_def_b->SetName("transdata_1"); |
|
|
|
op_def_b->SetType("TransData"); |
|
|
|
|
|
|
|
int stream_num = 1; |
|
|
|
int flag = 0; |
|
|
|
|
|
|
|
NodePtr node_a = fusion_graph->AddNode(op_def_a); |
|
|
|
NodePtr node_b = fusion_graph->AddNode(op_def_b); |
|
|
|
|
|
|
|
GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0)); |
|
|
|
int32_t a = 1; |
|
|
|
int32_t b = 2; |
|
|
|
|
|
|
|
AttrUtils::SetInt(op_def_a, "fusion_scope", a); |
|
|
|
AttrUtils::SetInt(op_def_b, "fusion_scope", b); |
|
|
|
|
|
|
|
vector<NodePtr> node_list1; |
|
|
|
node_list1.push_back(node_a); |
|
|
|
vector<NodePtr> node_list2; |
|
|
|
node_list2.push_back(node_b); |
|
|
|
te_fusion_map[1] = node_list1; |
|
|
|
te_fusion_map[2] = node_list2; |
|
|
|
|
|
|
|
return FUSION_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
uint32_t FusionTaskBuild(cce::ccHandle_t cc_handle, ge::ComputeGraphPtr fusion_graph, ge::Buffer &buffer, |
|
|
|
ModelRes &model_res, std::vector<TaskDef> &task_def_list_) { |
|
|
|
TaskDef task_def_temp; |
|
|
|
task_def_list_.push_back(task_def_temp); |
|
|
|
|
|
|
|
return FUSION_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
uint32_t GraphFusion(ge::ComputeGraphPtr orig_graph, ge::ComputeGraphPtr fusion_graph) { |
|
|
|
*fusion_graph = *orig_graph; |
|
|
|
return FUSION_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
void FusionTaskBuildComplete(std::vector<ccHandle_t> cc_handle_list) { return; } |
|
|
|
|
|
|
|
} // namespace fusion |
|
|
|
|
|
|
|
ccStatus_t cce::ccSetTensorDescriptorQuantizeParam(ccTensorDescriptor_t tensor_desc, |
|
|
|
const ccVecQuantizePara_t *vec_quantize_para) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
ccStatus_t cce::ccSetAllOffsetQuantizeFactors(ccQuantizeDescriptor_t quantize_info, const uint8_t *offset_w, |
|
|
|
const uint8_t *offset_d, const uint16_t *scale_req, |
|
|
|
const uint16_t *offset_d_next) { |
|
|
|
return CC_STATUS_SUCCESS; |
|
|
|
} |