| @@ -0,0 +1,134 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| /** | |||
| * @file hccl.h | |||
| * @brief HCCL API | |||
| */ | |||
| #ifndef HCCL_H_ | |||
| #define HCCL_H_ | |||
| #include <hccl/hccl_types.h> | |||
| #include <acl/acl.h> | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif // __cplusplus | |||
| /** | |||
| * @brief Initialize HCCL. | |||
| * | |||
| * @param clusterInfo A string identifying the cluster info file path, include file name. | |||
| * @param rank A integer identifying the identify for the rank. | |||
| * @param comm A pointer identifying the initialized communication resource. | |||
| * @return HcclResult | |||
| * @see HcclCommDestroy() | |||
| */ | |||
| extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm); | |||
| /** | |||
| * @brief Get hccl root info. | |||
| * | |||
| * @param rootInfo A pointer identifying the hccl root info. | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo); | |||
| /** | |||
| * @brief Initialize HCCL with root info. | |||
| * | |||
| * @param nRanks A integer identifying the rank size of the cluster. | |||
| * @param rootInfo A struct identifying the hccl root info. | |||
| * @param rank A integer identifying the identify for the rank. | |||
| * @param comm A pointer identifying the initialized communication resource. | |||
| * @return HcclResult | |||
| * @see HcclCommDestroy() | |||
| */ | |||
| extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm); | |||
| /** | |||
| * @brief AllReduce operator. | |||
| * | |||
| * @param sendBuf A pointer identifying the input data address of the operator. | |||
| * @param recvBuf A pointer identifying the output data address of the operator. | |||
| * @param count An integer(u64) identifying the number of the output data. | |||
| * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, | |||
| * float32. | |||
| * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||
| * @param comm A pointer identifying the communication resource based on. | |||
| * @param stream A pointer identifying the stream information. | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, | |||
| HcclComm comm, aclrtStream stream); | |||
| /** | |||
| * @brief Broadcast operator. | |||
| * | |||
| * @param buf A pointer identifying the data address of the operator. | |||
| * @param count An integer(u64) identifying the number of the data. | |||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
| * @param root An integer(u32) identifying the the root rank in the operator. | |||
| * @param comm A pointer identifying the communication resource based on | |||
| * @param stream A pointer identifying the stream information. | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||
| aclrtStream stream); | |||
| /** | |||
| * @brief ReduceScatter operator. | |||
| * | |||
| * @param sendBuf A pointer identifying the input data address of the operator. | |||
| * @param recvBuf A pointer identifying the output data address of the operator. | |||
| * @param recvCount An integer(u64) identifying the number of the output data. | |||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
| * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||
| * @param comm A pointer identifying the communication resource based on. | |||
| * @param stream A pointer identifying the stream information. | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||
| HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||
| /** | |||
| * @brief AllGather operator. | |||
| * | |||
| * @param sendBuf A pointer identifying the input data address of the operator. | |||
| * @param recvBuf A pointer identifying the output data address of the operator. | |||
| * @param sendCount An integer(u64) identifying the number of the input data. | |||
| * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
| * @param comm A pointer identifying the communication resource based on. | |||
| * @param stream A pointer identifying the stream information. | |||
| * @return HcclResult | |||
| */ | |||
| extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, | |||
| aclrtStream stream); | |||
| /** | |||
| * @brief Destroy HCCL comm | |||
| * | |||
| * @param comm A pointer identifying the communication resource targetting | |||
| * @return HcclResult | |||
| * @see HcclCommInitClusterInfo() | |||
| */ | |||
| extern HcclResult HcclCommDestroy(HcclComm comm); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif // __cplusplus | |||
| #endif // HCCL_H_ | |||
| @@ -0,0 +1,101 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| /** | |||
| * @file hccl_types.h | |||
| * @brief HCCL data type definition | |||
| * | |||
| */ | |||
| #ifndef HCCL_TYPES_H_ | |||
| #define HCCL_TYPES_H_ | |||
| #include <stdint.h> | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif // __cplusplus | |||
| /** | |||
| * @brief HCCL functions return value definition | |||
| */ | |||
| typedef enum { | |||
| HCCL_SUCCESS = 0, /**< success */ | |||
| HCCL_E_PARA = 1, /**< parameter error */ | |||
| HCCL_E_PTR = 2, /**< empty pointer */ | |||
| HCCL_E_MEMORY = 3, /**< memory error */ | |||
| HCCL_E_INTERNAL = 4, /**< internal error */ | |||
| HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||
| HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||
| HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||
| HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||
| HCCL_E_TIMEOUT = 9, /**< timeout */ | |||
| HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||
| HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||
| HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||
| HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||
| HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||
| HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||
| HCCL_E_DRV = 16, /**< call driver api fail */ | |||
| HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||
| HCCL_E_CCE = 18, /**< call cce api fail */ | |||
| HCCL_E_NETWORK = 19, /**< call network api fail */ | |||
| HCCL_E_RESERVED /**< reserved */ | |||
| } HcclResult; | |||
| /** | |||
| * @brief handle to HCCL communicator | |||
| */ | |||
| typedef void *HcclComm; | |||
| /** | |||
| * @brief HCCL Reduction opperation | |||
| */ | |||
| typedef enum { | |||
| HCCL_REDUCE_SUM = 0, /**< sum */ | |||
| HCCL_REDUCE_PROD = 1, /**< prod */ | |||
| HCCL_REDUCE_MAX = 2, /**< max */ | |||
| HCCL_REDUCE_MIN = 3, /**< min */ | |||
| HCCL_REDUCE_RESERVED /**< reserved */ | |||
| } HcclReduceOp; | |||
| /** | |||
| * @brief HCCL data type | |||
| */ | |||
| typedef enum { | |||
| HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||
| HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||
| HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||
| HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||
| HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||
| HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||
| HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||
| HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||
| } HcclDataType; | |||
| const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||
| /** | |||
| * @brief HCCL root info | |||
| */ | |||
| typedef struct HcclRootInfoDef { | |||
| char internal[HCCL_ROOT_INFO_BYTES]; | |||
| } HcclRootInfo; | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif // __cplusplus | |||
| #endif // HCCL_TYPES_H_ | |||
| @@ -790,22 +790,24 @@ Status AippOp::AddAttrToAippData(const OpDescPtr &aipp_data_op_desc) { | |||
| } | |||
| Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size) { | |||
| static int index = 0; | |||
| std::vector<int64_t> input_shape_dim(1, max_dynamic_aipp_size); | |||
| GeShape input_shape(input_shape_dim); | |||
| // construct input tensor | |||
| GeTensorDesc input_tensor(input_shape, FORMAT_ND, DT_UINT8); | |||
| TensorUtils::SetReuseInput(input_tensor, false); | |||
| TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); | |||
| GE_CHECK_NOTNULL(aipp_node); | |||
| const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph(); | |||
| string node_name; | |||
| if (index == 0) { | |||
| // First aippdata name should be definite. | |||
| if (graph->FindFirstNodeMatchType(AIPPDATA) == nullptr) { | |||
| GELOGI("Current graph has no aippdata node, so the name of it must be definite."); | |||
| node_name = kDynamicAippData; | |||
| } else { | |||
| node_name = string(kDynamicAippData) + "_" + to_string(index); | |||
| node_name = string(kDynamicAippData) + "_" + aipp_node->GetName(); | |||
| } | |||
| ++index; | |||
| GELOGI("Current add aippdata node name is %s", node_name.c_str()); | |||
| // new add aipp_data ops for dynamic aipp param input | |||
| OpDescPtr op_desc_ptr_data = MakeShared<OpDesc>(node_name, AIPPDATA); | |||
| GE_CHECK_NOTNULL(op_desc_ptr_data); | |||
| @@ -1 +0,0 @@ | |||
| ../../proto/task.proto | |||
| @@ -0,0 +1,170 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| syntax = "proto3"; | |||
| package domi; | |||
| message ModelTaskDef { | |||
| string version = 1; | |||
| map<string, string> attr = 9; // Extended field | |||
| repeated TaskDef task = 10; | |||
| uint64 memory_size = 11; | |||
| uint32 stream_num = 12; | |||
| uint32 event_num = 13; | |||
| uint64 weight_size = 14; | |||
| repeated bytes op = 15; // input/output opdef in bytes | |||
| uint64 base_addr = 16; // base addr | |||
| uint64 weight_addr = 17; // weight addr | |||
| uint32 batch_num = 18; | |||
| } | |||
| message TaskDef { | |||
| uint32 id = 1; | |||
| uint32 type = 2; | |||
| uint32 stream_id = 10; | |||
| uint32 event_id = 11; | |||
| KernelDef kernel = 20; | |||
| KernelExDef kernel_ex = 21; | |||
| KernelHcclDef kernel_hccl = 25; | |||
| EventExDef event_ex = 26; | |||
| LogTimeStampDef log_timestamp = 28; | |||
| uint32 label_id = 30; | |||
| MemcpyAsyncDef memcpy_async = 31; | |||
| StreamSwitchDef stream_switch = 32; | |||
| StreamActiveDef stream_active = 33; | |||
| bytes private_def = 34; | |||
| uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future | |||
| StreamSwitchNDef stream_switch_n = 36; | |||
| LabelSetDef label_set = 37; | |||
| LabelGotoExDef label_goto_ex = 38; | |||
| LabelSwitchByIndexDef label_switch_by_index = 39; | |||
| } | |||
| message KernelDef { | |||
| KernelContext context = 1; | |||
| string stub_func = 10; | |||
| uint32 block_dim = 11; | |||
| uint32 args_size = 12; | |||
| bytes args = 13; | |||
| bytes sm_desc = 14; | |||
| bytes flowtable = 15; | |||
| string so_name = 16; | |||
| string kernel_name = 17; | |||
| bytes kernel_ext_info = 18; | |||
| uint32 kernel_ext_info_size = 19; | |||
| } | |||
| message KernelContext { | |||
| uint32 kernel_type = 1; | |||
| uint32 op_id = 2; // OP type in CCE | |||
| uint32 kernel_func_id = 3; | |||
| uint32 op_index = 4; // TE/Custom operator | |||
| bool is_flowtable = 5; // Identify whether args is a flowtable structure | |||
| bytes args_offset = 6; // args offset information | |||
| uint32 args_count = 7; // args count | |||
| repeated uint32 origin_op_index = 8; | |||
| } | |||
| message KernelExDef { | |||
| uint32 flags = 1; | |||
| uint32 op_index = 4; | |||
| uint32 args_size = 12; | |||
| bytes args = 13; | |||
| bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput | |||
| uint32 task_info_size = 15; | |||
| bytes kernel_ext_info = 16; | |||
| uint32 kernel_ext_info_size = 17; | |||
| } | |||
| message KernelHcclDef { | |||
| uint32 op_index = 8; | |||
| string hccl_type = 9; | |||
| } | |||
| message EventExDef { | |||
| uint32 op_index = 1; | |||
| uint32 event_type = 2; | |||
| } | |||
| message LogTimeStampDef { | |||
| uint64 logid = 1; | |||
| bool notify = 2; | |||
| uint32 flat = 3; | |||
| } | |||
| message MemcpyAsyncDef { | |||
| uint64 dst = 1; | |||
| uint64 dst_max = 2; | |||
| uint64 src = 3; | |||
| uint64 count = 4; | |||
| uint32 kind = 5; | |||
| uint32 op_index = 6; | |||
| } | |||
| message StreamSwitchDef { | |||
| uint32 op_index = 1; | |||
| uint32 true_stream_id = 2; | |||
| int64 value = 3; | |||
| uint64 value_ptr = 4; | |||
| uint32 data_type = 5; | |||
| } | |||
| message StreamActiveDef { | |||
| uint32 op_index = 1; | |||
| uint32 active_stream_id = 2; | |||
| } | |||
| message StreamSwitchNDef { | |||
| uint32 op_index = 1; | |||
| uint32 size = 2; | |||
| repeated int64 target_value = 3; | |||
| repeated uint32 true_stream_id = 4; | |||
| uint32 element_size = 5; | |||
| uint32 data_type = 6; | |||
| } | |||
| message LabelSetDef { | |||
| uint32 op_index = 1; | |||
| uint32 label_id = 2; | |||
| uint32 model_id = 3; | |||
| } | |||
| message LabelGotoExDef { | |||
| uint32 op_index = 1; | |||
| uint32 label_id = 2; | |||
| uint32 model_id = 3; | |||
| } | |||
| message LabelSwitchByIndexDef { | |||
| uint32 op_index = 1; | |||
| uint32 label_max = 2; | |||
| } | |||
| @@ -889,14 +889,29 @@ REG_OP(ReadVariableOp) | |||
| .ATTR(dtype, Int, DT_INT32) | |||
| .OP_END_FACTORY_REG(ReadVariableOp) | |||
| /** | |||
| *@brief Mark outputs of one sub graph which partitioned by engine type. | |||
| *@par Inputs: | |||
| *x: A tensor. \n | |||
| *@par Outputs: | |||
| *y: A tensor. \n | |||
| *@par Attributes: | |||
| *@li peerIndex: The index of the corresponding 'placeholder' node it's connected to. | |||
| *@li parentOpType: Op type of original node. | |||
| *@par Restrictions: | |||
| *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
| */ | |||
| REG_OP(End) | |||
| .INPUT(x, TensorType::ALL()) | |||
| .OUTPUT(y, TensorType::ALL()) | |||
| .ATTR(peerIndex, Int, 0) // the index of the corresponding 'placeholder' node it's connected to | |||
| .ATTR(parentOpType, String, "") // op type of original node | |||
| .ATTR(peerIndex, Int, 0) | |||
| .ATTR(parentOpType, String, "") | |||
| .OP_END_FACTORY_REG(End) | |||
| /** | |||
| *@brief Operations for writing summary data, for use in analysis and visualization. | |||