From: @HW_KK Reviewed-by: @liujunzhu,@youui Signed-off-by: @youuir1.0
| @@ -0,0 +1,134 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| /** | |||||
| * @file hccl.h | |||||
| * @brief HCCL API | |||||
| */ | |||||
| #ifndef HCCL_H_ | |||||
| #define HCCL_H_ | |||||
| #include <hccl/hccl_types.h> | |||||
| #include <acl/acl.h> | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif // __cplusplus | |||||
| /** | |||||
| * @brief Initialize HCCL. | |||||
| * | |||||
| * @param clusterInfo A string identifying the cluster info file path, include file name. | |||||
| * @param rank A integer identifying the identify for the rank. | |||||
| * @param comm A pointer identifying the initialized communication resource. | |||||
| * @return HcclResult | |||||
| * @see HcclCommDestroy() | |||||
| */ | |||||
| extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm); | |||||
| /** | |||||
| * @brief Get hccl root info. | |||||
| * | |||||
| * @param rootInfo A pointer identifying the hccl root info. | |||||
| * @return HcclResult | |||||
| */ | |||||
| extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo); | |||||
| /** | |||||
| * @brief Initialize HCCL with root info. | |||||
| * | |||||
| * @param nRanks A integer identifying the rank size of the cluster. | |||||
| * @param rootInfo A struct identifying the hccl root info. | |||||
| * @param rank A integer identifying the identify for the rank. | |||||
| * @param comm A pointer identifying the initialized communication resource. | |||||
| * @return HcclResult | |||||
| * @see HcclCommDestroy() | |||||
| */ | |||||
| extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm); | |||||
| /** | |||||
| * @brief AllReduce operator. | |||||
| * | |||||
| * @param sendBuf A pointer identifying the input data address of the operator. | |||||
| * @param recvBuf A pointer identifying the output data address of the operator. | |||||
| * @param count An integer(u64) identifying the number of the output data. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, | |||||
| * float32. | |||||
| * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||||
| * @param comm A pointer identifying the communication resource based on. | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @return HcclResult | |||||
| */ | |||||
| extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, | |||||
| HcclComm comm, aclrtStream stream); | |||||
| /** | |||||
| * @brief Broadcast operator. | |||||
| * | |||||
| * @param buf A pointer identifying the data address of the operator. | |||||
| * @param count An integer(u64) identifying the number of the data. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
| * @param root An integer(u32) identifying the the root rank in the operator. | |||||
| * @param comm A pointer identifying the communication resource based on | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @return HcclResult | |||||
| */ | |||||
| extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||||
| aclrtStream stream); | |||||
| /** | |||||
| * @brief ReduceScatter operator. | |||||
| * | |||||
| * @param sendBuf A pointer identifying the input data address of the operator. | |||||
| * @param recvBuf A pointer identifying the output data address of the operator. | |||||
| * @param recvCount An integer(u64) identifying the number of the output data. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
| * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||||
| * @param comm A pointer identifying the communication resource based on. | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @return HcclResult | |||||
| */ | |||||
| extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||||
| HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||||
| /** | |||||
| * @brief AllGather operator. | |||||
| * | |||||
| * @param sendBuf A pointer identifying the input data address of the operator. | |||||
| * @param recvBuf A pointer identifying the output data address of the operator. | |||||
| * @param sendCount An integer(u64) identifying the number of the input data. | |||||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||||
| * @param comm A pointer identifying the communication resource based on. | |||||
| * @param stream A pointer identifying the stream information. | |||||
| * @return HcclResult | |||||
| */ | |||||
| extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, | |||||
| aclrtStream stream); | |||||
| /** | |||||
| * @brief Destroy HCCL comm | |||||
| * | |||||
| * @param comm A pointer identifying the communication resource targetting | |||||
| * @return HcclResult | |||||
| * @see HcclCommInitClusterInfo() | |||||
| */ | |||||
| extern HcclResult HcclCommDestroy(HcclComm comm); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif // __cplusplus | |||||
| #endif // HCCL_H_ | |||||
| @@ -0,0 +1,101 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| /** | |||||
| * @file hccl_types.h | |||||
| * @brief HCCL data type definition | |||||
| * | |||||
| */ | |||||
| #ifndef HCCL_TYPES_H_ | |||||
| #define HCCL_TYPES_H_ | |||||
| #include <stdint.h> | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif // __cplusplus | |||||
| /** | |||||
| * @brief HCCL functions return value definition | |||||
| */ | |||||
| typedef enum { | |||||
| HCCL_SUCCESS = 0, /**< success */ | |||||
| HCCL_E_PARA = 1, /**< parameter error */ | |||||
| HCCL_E_PTR = 2, /**< empty pointer */ | |||||
| HCCL_E_MEMORY = 3, /**< memory error */ | |||||
| HCCL_E_INTERNAL = 4, /**< internal error */ | |||||
| HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||||
| HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||||
| HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||||
| HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||||
| HCCL_E_TIMEOUT = 9, /**< timeout */ | |||||
| HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||||
| HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||||
| HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||||
| HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||||
| HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||||
| HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||||
| HCCL_E_DRV = 16, /**< call driver api fail */ | |||||
| HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||||
| HCCL_E_CCE = 18, /**< call cce api fail */ | |||||
| HCCL_E_NETWORK = 19, /**< call network api fail */ | |||||
| HCCL_E_RESERVED /**< reserved */ | |||||
| } HcclResult; | |||||
| /** | |||||
| * @brief handle to HCCL communicator | |||||
| */ | |||||
| typedef void *HcclComm; | |||||
| /** | |||||
| * @brief HCCL Reduction opperation | |||||
| */ | |||||
| typedef enum { | |||||
| HCCL_REDUCE_SUM = 0, /**< sum */ | |||||
| HCCL_REDUCE_PROD = 1, /**< prod */ | |||||
| HCCL_REDUCE_MAX = 2, /**< max */ | |||||
| HCCL_REDUCE_MIN = 3, /**< min */ | |||||
| HCCL_REDUCE_RESERVED /**< reserved */ | |||||
| } HcclReduceOp; | |||||
| /** | |||||
| * @brief HCCL data type | |||||
| */ | |||||
| typedef enum { | |||||
| HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||||
| HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||||
| HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||||
| HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||||
| HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||||
| HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||||
| HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||||
| HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||||
| } HcclDataType; | |||||
| const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
| /** | |||||
| * @brief HCCL root info | |||||
| */ | |||||
| typedef struct HcclRootInfoDef { | |||||
| char internal[HCCL_ROOT_INFO_BYTES]; | |||||
| } HcclRootInfo; | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif // __cplusplus | |||||
| #endif // HCCL_TYPES_H_ | |||||
| @@ -790,22 +790,24 @@ Status AippOp::AddAttrToAippData(const OpDescPtr &aipp_data_op_desc) { | |||||
| } | } | ||||
| Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size) { | Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size) { | ||||
| static int index = 0; | |||||
| std::vector<int64_t> input_shape_dim(1, max_dynamic_aipp_size); | std::vector<int64_t> input_shape_dim(1, max_dynamic_aipp_size); | ||||
| GeShape input_shape(input_shape_dim); | GeShape input_shape(input_shape_dim); | ||||
| // construct input tensor | // construct input tensor | ||||
| GeTensorDesc input_tensor(input_shape, FORMAT_ND, DT_UINT8); | GeTensorDesc input_tensor(input_shape, FORMAT_ND, DT_UINT8); | ||||
| TensorUtils::SetReuseInput(input_tensor, false); | TensorUtils::SetReuseInput(input_tensor, false); | ||||
| TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); | TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); | ||||
| GE_CHECK_NOTNULL(aipp_node); | |||||
| const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph(); | const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph(); | ||||
| string node_name; | string node_name; | ||||
| if (index == 0) { | |||||
| // First aippdata name should be definite. | |||||
| if (graph->FindFirstNodeMatchType(AIPPDATA) == nullptr) { | |||||
| GELOGI("Current graph has no aippdata node, so the name of it must be definite."); | |||||
| node_name = kDynamicAippData; | node_name = kDynamicAippData; | ||||
| } else { | } else { | ||||
| node_name = string(kDynamicAippData) + "_" + to_string(index); | |||||
| node_name = string(kDynamicAippData) + "_" + aipp_node->GetName(); | |||||
| } | } | ||||
| ++index; | |||||
| GELOGI("Current add aippdata node name is %s", node_name.c_str()); | |||||
| // new add aipp_data ops for dynamic aipp param input | // new add aipp_data ops for dynamic aipp param input | ||||
| OpDescPtr op_desc_ptr_data = MakeShared<OpDesc>(node_name, AIPPDATA); | OpDescPtr op_desc_ptr_data = MakeShared<OpDesc>(node_name, AIPPDATA); | ||||
| GE_CHECK_NOTNULL(op_desc_ptr_data); | GE_CHECK_NOTNULL(op_desc_ptr_data); | ||||
| @@ -1 +0,0 @@ | |||||
| ../../proto/task.proto | |||||
| @@ -0,0 +1,170 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| syntax = "proto3"; | |||||
| package domi; | |||||
| message ModelTaskDef { | |||||
| string version = 1; | |||||
| map<string, string> attr = 9; // Extended field | |||||
| repeated TaskDef task = 10; | |||||
| uint64 memory_size = 11; | |||||
| uint32 stream_num = 12; | |||||
| uint32 event_num = 13; | |||||
| uint64 weight_size = 14; | |||||
| repeated bytes op = 15; // input/output opdef in bytes | |||||
| uint64 base_addr = 16; // base addr | |||||
| uint64 weight_addr = 17; // weight addr | |||||
| uint32 batch_num = 18; | |||||
| } | |||||
| message TaskDef { | |||||
| uint32 id = 1; | |||||
| uint32 type = 2; | |||||
| uint32 stream_id = 10; | |||||
| uint32 event_id = 11; | |||||
| KernelDef kernel = 20; | |||||
| KernelExDef kernel_ex = 21; | |||||
| KernelHcclDef kernel_hccl = 25; | |||||
| EventExDef event_ex = 26; | |||||
| LogTimeStampDef log_timestamp = 28; | |||||
| uint32 label_id = 30; | |||||
| MemcpyAsyncDef memcpy_async = 31; | |||||
| StreamSwitchDef stream_switch = 32; | |||||
| StreamActiveDef stream_active = 33; | |||||
| bytes private_def = 34; | |||||
| uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future | |||||
| StreamSwitchNDef stream_switch_n = 36; | |||||
| LabelSetDef label_set = 37; | |||||
| LabelGotoExDef label_goto_ex = 38; | |||||
| LabelSwitchByIndexDef label_switch_by_index = 39; | |||||
| } | |||||
| message KernelDef { | |||||
| KernelContext context = 1; | |||||
| string stub_func = 10; | |||||
| uint32 block_dim = 11; | |||||
| uint32 args_size = 12; | |||||
| bytes args = 13; | |||||
| bytes sm_desc = 14; | |||||
| bytes flowtable = 15; | |||||
| string so_name = 16; | |||||
| string kernel_name = 17; | |||||
| bytes kernel_ext_info = 18; | |||||
| uint32 kernel_ext_info_size = 19; | |||||
| } | |||||
| message KernelContext { | |||||
| uint32 kernel_type = 1; | |||||
| uint32 op_id = 2; // OP type in CCE | |||||
| uint32 kernel_func_id = 3; | |||||
| uint32 op_index = 4; // TE/Custom operator | |||||
| bool is_flowtable = 5; // Identify whether args is a flowtable structure | |||||
| bytes args_offset = 6; // args offset information | |||||
| uint32 args_count = 7; // args count | |||||
| repeated uint32 origin_op_index = 8; | |||||
| } | |||||
| message KernelExDef { | |||||
| uint32 flags = 1; | |||||
| uint32 op_index = 4; | |||||
| uint32 args_size = 12; | |||||
| bytes args = 13; | |||||
| bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput | |||||
| uint32 task_info_size = 15; | |||||
| bytes kernel_ext_info = 16; | |||||
| uint32 kernel_ext_info_size = 17; | |||||
| } | |||||
| message KernelHcclDef { | |||||
| uint32 op_index = 8; | |||||
| string hccl_type = 9; | |||||
| } | |||||
| message EventExDef { | |||||
| uint32 op_index = 1; | |||||
| uint32 event_type = 2; | |||||
| } | |||||
| message LogTimeStampDef { | |||||
| uint64 logid = 1; | |||||
| bool notify = 2; | |||||
| uint32 flat = 3; | |||||
| } | |||||
| message MemcpyAsyncDef { | |||||
| uint64 dst = 1; | |||||
| uint64 dst_max = 2; | |||||
| uint64 src = 3; | |||||
| uint64 count = 4; | |||||
| uint32 kind = 5; | |||||
| uint32 op_index = 6; | |||||
| } | |||||
| message StreamSwitchDef { | |||||
| uint32 op_index = 1; | |||||
| uint32 true_stream_id = 2; | |||||
| int64 value = 3; | |||||
| uint64 value_ptr = 4; | |||||
| uint32 data_type = 5; | |||||
| } | |||||
| message StreamActiveDef { | |||||
| uint32 op_index = 1; | |||||
| uint32 active_stream_id = 2; | |||||
| } | |||||
| message StreamSwitchNDef { | |||||
| uint32 op_index = 1; | |||||
| uint32 size = 2; | |||||
| repeated int64 target_value = 3; | |||||
| repeated uint32 true_stream_id = 4; | |||||
| uint32 element_size = 5; | |||||
| uint32 data_type = 6; | |||||
| } | |||||
| message LabelSetDef { | |||||
| uint32 op_index = 1; | |||||
| uint32 label_id = 2; | |||||
| uint32 model_id = 3; | |||||
| } | |||||
| message LabelGotoExDef { | |||||
| uint32 op_index = 1; | |||||
| uint32 label_id = 2; | |||||
| uint32 model_id = 3; | |||||
| } | |||||
| message LabelSwitchByIndexDef { | |||||
| uint32 op_index = 1; | |||||
| uint32 label_max = 2; | |||||
| } | |||||
| @@ -889,14 +889,29 @@ REG_OP(ReadVariableOp) | |||||
| .ATTR(dtype, Int, DT_INT32) | .ATTR(dtype, Int, DT_INT32) | ||||
| .OP_END_FACTORY_REG(ReadVariableOp) | .OP_END_FACTORY_REG(ReadVariableOp) | ||||
| /** | |||||
| *@brief Mark outputs of one sub graph which partitioned by engine type. | |||||
| *@par Inputs: | |||||
| *x: A tensor. \n | |||||
| *@par Outputs: | |||||
| *y: A tensor. \n | |||||
| *@par Attributes: | |||||
| *@li peerIndex: The index of the corresponding 'placeholder' node it's connected to. | |||||
| *@li parentOpType: Op type of original node. | |||||
| *@par Restrictions: | |||||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
| */ | |||||
| REG_OP(End) | REG_OP(End) | ||||
| .INPUT(x, TensorType::ALL()) | .INPUT(x, TensorType::ALL()) | ||||
| .OUTPUT(y, TensorType::ALL()) | .OUTPUT(y, TensorType::ALL()) | ||||
| .ATTR(peerIndex, Int, 0) // the index of the corresponding 'placeholder' node it's connected to | |||||
| .ATTR(parentOpType, String, "") // op type of original node | |||||
| .ATTR(peerIndex, Int, 0) | |||||
| .ATTR(parentOpType, String, "") | |||||
| .OP_END_FACTORY_REG(End) | .OP_END_FACTORY_REG(End) | ||||
| /** | /** | ||||
| *@brief Operations for writing summary data, for use in analysis and visualization. | *@brief Operations for writing summary data, for use in analysis and visualization. | ||||