diff --git a/CMakeLists.txt b/CMakeLists.txt index 86d0184b..bea12fcc 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,7 +80,7 @@ if (ENABLE_OPEN_SRC) find_module(error_manager liberror_manager.so ${GE_LIB_PATH}) find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) - find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH}) + #find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH}) #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) else() find_module(slog libslog.so ${ASCEND_ATC_DIR}) @@ -92,7 +92,7 @@ if (ENABLE_OPEN_SRC) find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) - find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) + #find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) if(PRODUCT STREQUAL "flr3") message(FATAL_ERROR "This platform is not supported in train mode, build terminated") @@ -115,7 +115,7 @@ if (ENABLE_OPEN_SRC) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) endif() elseif(PLATFORM STREQUAL "all") - find_module(msprofiler libmsprofiler.a ${ASCEND_DRIVER_COMMON_DIR}) + find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) @@ -123,7 +123,7 @@ if (ENABLE_OPEN_SRC) find_module(resource libresource.so ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) - find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_ACL_DIR}) + #find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) else() diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 59b804d8..90c341d5 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -603,6 +603,21 @@ set(INFER_SRC_LIST "analyzer/analyzer.cc" ) +if (ENABLE_OPEN_SRC) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object) + if(EXISTS ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a) + execute_process( + COMMAND ar x ${ASCEND_RUNTIME_DIR}/libmsprofiler_fwk.a + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object + ) + file(GLOB msprof_file ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o) + else() + file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc CONTENT "") + set(msprof_file ${CMAKE_BINARY_DIR}/msprofiler_fwk.cc) + endif() + add_library(msprofiler_fwk OBJECT ${msprof_file}) +endif() + if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) ############ libge_runner.so ############ add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $) diff --git a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h new file mode 100644 index 00000000..7e0f94a8 --- /dev/null +++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h @@ -0,0 +1,60 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AICPU_OP_TYPE_LIST_H_ +#define AICPU_OP_TYPE_LIST_H_ + +enum OpKernelType { + TF_KERNEL, + CPU_KERNEL +}; + +enum ReturnCode { + OP_TYPE_NOT_SUPPORT, + FORMAT_NOT_SUPPORT, + DTYPE_NOT_SUPPORT +}; + +#pragma pack(push, 1) +//One byte alignment +struct SysOpInfo { + uint64_t opLen; + uint64_t opType; + OpKernelType kernelsType; +}; + +struct OpParamInfo { + uint64_t num; + uint64_t dtypeList; + uint64_t formatList; +}; + +struct SysOpCheckInfo { + uint64_t opListNum; + uint64_t offSetLen; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; + +struct SysOpCheckResp { + uint64_t opListNum; + bool isWithoutJson; + uint64_t returnCodeList; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; +#pragma pack(pop) +#endif // AICPU_OP_TYPE_LIST_H_ diff --git a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h index c3672663..72e21f6f 100644 --- a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h +++ b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h @@ -21,13 +21,15 @@ namespace aicpu { +#pragma pack(push, 1) struct AicpuParamHead { uint32_t length; // Total length: include cunstom message uint32_t ioAddrNum; // Input and output address number uint32_t extInfoLength; // extInfo struct Length uint64_t extInfoAddr; // extInfo address -} __attribute__ ((packed)); +}; +#pragma pack(pop) } // namespace aicpu diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine.h b/third_party/fwkacllib/inc/cce/aicpu_engine.h index 740f1200..b83731a8 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine.h @@ -13,10 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef AICPU_ENGINE_H__ #define AICPU_ENGINE_H__ +#include + #ifdef __cplusplus extern "C" { #endif @@ -36,12 +37,23 @@ typedef enum { /** * @ingroup aicpu engine * @brief aeCallInterface: - * a interface to call a function in a op kernfel lib + * a interface to call a function in a op kernfel lib * @param [in] addr void *, should be STR_KERNEL * format * @return aeStatus_t */ aeStatus_t aeCallInterface(void *addr); +/** + * @ingroup aicpu engine + * @brief aeBatchLoadKernelSo: + * a interface to load kernel so + * @param [in] loadSoNum load so number + * @param [in] soPaths load so paths + * @param [in] soNames load so names + * @return aeStatus_t + */ +aeStatus_t aeBatchLoadKernelSo(const uint32_t loadSoNum, const char *soPaths[], const char *soNames[]); + #ifdef __cplusplus } #endif diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h index a5f43be9..8c0c1847 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h @@ -33,18 +33,22 @@ typedef enum { FMK_KERNEL_TYPE_RESERVED } FwkkernelType_t; +#pragma pack(push, 1) typedef struct { uint32_t fwkKernelType; // FwkkernelType_t union { ::aicpu::FWKAdapter::FWKOperateParam fwk_kernel; } fwkKernelBase; -} __attribute__((packed)) STR_FWK_OP_KERNEL; +} STR_FWK_OP_KERNEL; +#pragma pack(pop) +#pragma pack(push, 1) struct SessionInfo { uint64_t sessionId; uint64_t kernelId; bool sessFlag; -} __attribute__((packed)); +}; +#pragma pack(pop) #ifdef __cplusplus } diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h index 79d94023..50b39d91 100644 --- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h +++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h @@ -70,6 +70,7 @@ enum FWKExtUpdateAddrType { FWK_ADPT_UPDATE_INPUT_OUTPUT }; +#pragma pack(push, 1) // API Parameter Structure struct StrFWKKernel { FWKOperateType opType; @@ -89,31 +90,39 @@ struct StrFWKKernel { uint64_t extInfoLen; // extend info total length uint64_t extInfoAddr; // extend info addr, ExtInfo structure -} __attribute__((packed)); +}; +#pragma pack(pop) typedef StrFWKKernel FWKOperateParam; // Extent info ShapeAndType const uint32_t kMaxShapeDims = 8; +#pragma pack(push, 1) struct ShapeAndType { int32_t type; int64_t dims[kMaxShapeDims]; -} __attribute__((packed)); +}; +#pragma pack(pop) // Extend info structure for extInfoAddr const uint32_t kExtInfoHeadSize = 8; + +#pragma pack(push, 1) struct ExtInfo { int32_t infoType; // extend type uint32_t infoLen; // length for infoMsg char infoMsg[0]; // extend value -} __attribute__((packed)); +}; +#pragma pack(pop) +#pragma pack(push, 1) struct ResultSummary { uint64_t shape_data_ptr; // shape data addr, need convert to void* uint64_t shape_data_size; // num of dims uint64_t raw_data_ptr; // raw data addr, need convert to void* uint64_t raw_data_size; // size of raw data -} __attribute__((packed)); +}; +#pragma pack(pop) } // end namespace FWKAdapter } // namespace aicpu diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h index 8194097e..9facd20c 100644 --- a/third_party/fwkacllib/inc/hccl/base.h +++ b/third_party/fwkacllib/inc/hccl/base.h @@ -22,7 +22,8 @@ #ifndef HCCL_BASE_H_ #define HCCL_BASE_H_ - +#include +#include #ifdef __cplusplus extern "C" { #endif // __cplusplus @@ -95,6 +96,33 @@ typedef void *rtStream_t; */ typedef void *rtModel_t; +struct HcomOperation { + std::string hcclType; + void *inputPtr; + void *outputPtr; + u64 count; + HcclDataType dataType; + HcclReduceOp opType; + u32 root; + + HcomOperation() + { + inputPtr = nullptr; + outputPtr = nullptr; + count = 0; + dataType = HCCL_DATA_TYPE_RESERVED; + opType = HCCL_REDUCE_RESERVED; + root = 0; + } +}; + +struct HcomRemoteAccessAddrInfo { + u32 remotetRankID; + u64 remoteAddr; // host embedding table address + u64 localAddr; // device HBM address + u64 length; // Memory Length in Bytes +}; + #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h index 90b96ac7..e491d43f 100644 --- a/third_party/fwkacllib/inc/hccl/hcom.h +++ b/third_party/fwkacllib/inc/hccl/hcom.h @@ -24,145 +24,96 @@ #include #include +#include +#include #ifdef __cplusplus extern "C" { #endif // __cplusplus -/** - * @brief Initialize HCOM. - * - * @param rank_table A string identifying the rank table file path, include file name. - * @param identify A string identifying the identify for the rank. - * @return HcclResult - * @see hcom_destroy() - */ -extern HcclResult hcom_init(const char *rank_table, const char *identify); -/** - * @brief Destroy HCOM - * - * @return HcclResult - * @see hcom_init() - */ -extern HcclResult hcom_destroy(void); - -/** - * @brief Bind the model. - * - * @param model A pointer identifying the model information. - * @param stream A pointer identifying the stream information. - * @return HcclResult - * @see hcom_unbind_model() - */ -extern HcclResult hcom_bind_model(rtModel_t model, rtStream_t stream); /** - * @brief Unbind the model. + * @brief Get the rank number in the group. * - * @param model An pointer identifying the model information. - * @return HcclResult - * @see hcom_unbind_model() + * @param group A string identifying the group name. + * @param rankSize A pointer identifying the rank number. + * @return HcclResult */ -extern HcclResult hcom_unbind_model(rtModel_t model); +HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); /** - * @brief All-gather operator. + * @brief Get the rank number in the group. * - * @param tag A string identifying the tag of the operator. - * @param inputPtr A pointer identifying the input data address of the operator. - * @param outputPtr A pointer identifying the output data address of the operator. - * @param inputCount An integer(u64) identifying the number of the input data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. + * @param rankSize A pointer identifying the rank number. * @return HcclResult */ -extern HcclResult hcom_all_gather(const char *tag, void *inputPtr, void *outputPtr, u64 inputCount, - HcclDataType dataType, const char *group, rtStream_t stream); +HcclResult HcomGetRankSize(const char *group, u32 *rankSize); /** - * @brief All-reduce operator. + * @brief Get the rank number of this rank's server within the group. * - * @param tag A string identifying the tag of the operator. - * @param inputPtr A pointer identifying the input data address of the operator. - * @param outputPtr A pointer identifying the output data address of the operator. - * @param count An integer(u64) identifying the number of the output data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. + * @param localRankSize A pointer identifying the rank number. * @return HcclResult */ -extern HcclResult hcom_all_reduce(const char *tag, void *inputPtr, void *outputPtr, u64 count, - HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream); +HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); /** - * @brief Broadcast operator. + * @brief Get the rank number of this rank's server within the group. * - * @param tag A string identifying the tag of the operator. - * @param ptr A pointer identifying the data address of the operator. - * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param root An integer(u32) identifying the the root rank in the operator. - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. + * @param localRankSize A pointer identifying the rank number. * @return HcclResult */ -extern HcclResult hcom_broadcast(const char *tag, void *ptr, u64 count, HcclDataType dataType, u32 root, - const char *group, rtStream_t stream); +HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize); /** - * @brief Reduce-scatter operator. + * @brief Get the rank id of this rank. * - * @param tag A string identifying the tag of the operator. - * @param inputPtr A pointer identifying the input data address of the operator. - * @param outputPtr A pointer identifying the output data address of the operator. - * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. + * @param rankId A pointer identifying the rank id. * @return HcclResult */ -extern HcclResult hcom_reduce_scatter(const char *tag, void *inputPtr, void *outputPtr, u64 count, - HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream); +HcclResult hcom_get_rank_id(const char *group, u32 *rankId); /** - * @brief Get the rank number in the group. + * @brief Get the rank id of this rank. * * @param group A string identifying the group name. - * @param rankSize A pointer identifying the rank number. + * @param rankId A pointer identifying the rank id. * @return HcclResult */ -HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); +HcclResult HcomGetRankId(const char *group, u32 *rankId); /** - * @brief Get the rank number of this rank's server within the group. + * @brief Get the local rank id of this rank's server within the group. * * @param group A string identifying the group name. - * @param localRankSize A pointer identifying the rank number. + * @param localRankId A pointer identifying the local rank id. * @return HcclResult */ -HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); +HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); /** - * @brief Get the rank id of this rank. + * @brief Get the local rank id of this rank's server within the group. * * @param group A string identifying the group name. - * @param rankId A pointer identifying the rank id. + * @param localRankId A pointer identifying the local rank id. * @return HcclResult */ -HcclResult hcom_get_rank_id(const char *group, u32 *rankId); +HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId); /** - * @brief Get the local rank id of this rank's server within the group. + * @brief Get the world rank id according to the group rank id. * * @param group A string identifying the group name. - * @param localRankId A pointer identifying the local rank id. + * @param groupRank An integer(u32) identifying the group rank id. + * @param worldRank A pointer identifying the world rank id. * @return HcclResult */ -HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); +HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); /** * @brief Get the world rank id according to the group rank id. @@ -172,7 +123,7 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); * @param worldRank A pointer identifying the world rank id. * @return HcclResult */ -HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); +HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank); /** * @brief Get the group rank id according to the world rank id. @@ -184,6 +135,16 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, */ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); +/** + * @brief Get the group rank id according to the world rank id. + * + * @param worldRank An integer(u32) identifying the world rank id. + * @param group A string identifying the group name. + * @param groupRank A pointer identifying the group rank id. + * @return HcclResult + */ +HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank); + /** * @brief Create group. * @@ -195,60 +156,40 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); /** - * @brief Destroy group + * @brief Create group. * * @param group A string identifying the group name. + * @param rankNum An integer(u32) identifying the number of ranks in the group. + * @param rankIds A list identifying the ranks in the group. * @return HcclResult */ -HcclResult hcom_destroy_group(const char *group); +HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds); /** - * @brief Send operator. + * @brief Destroy group * - * @param tag A string identifying the tag of the operator. - * @param inputPtr A pointer identifying the input data address of the operator. - * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param destRank An integer identifying the destination rank. - * @param srTag An integer identifying the send/recv message tag. - * The message will be send by the receive operator with the same "sr_tag". - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. * @return HcclResult */ -HcclResult hcom_send(const char *tag, void *inputPtr, u64 count, HcclDataType dataType, - u32 destRank, u32 srTag, const char *group, rtStream_t stream); +HcclResult hcom_destroy_group(const char *group); /** - * @brief Receive operator. + * @brief Destroy group * - * @param tag A string identifying the tag of the operator. - * @param outputPtr A pointer identifying the output data address of the operator. - * @param count An integer(u64) identifying the number of the data. - * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. - * @param srcRank An integer identifying the source rank. - * @param srTag An integer identifying the send/recv message tag. - * The message will be send by the send operator with the same "sr_tag". - * @param group A string identifying the group name of ranks participating in the operator. - * @param stream A pointer identifying the stream information. + * @param group A string identifying the group name. * @return HcclResult */ -HcclResult hcom_receive(const char *tag, void *outputPtr, u64 count, HcclDataType dataType, - u32 srcRank, u32 srTag, const char *group, rtStream_t stream); +HcclResult HcomDestroyGroup(const char *group); /** - * @brief Get the gradient split strategy with in the group. + * @brief Set the gradient split strategy with in the group, according to gradient index. * * @param group A string identifying the group name. - * @param feature A pointer identifying the feature of the model. - * @param maxSegmentNum An integer(u32) identifying the max segments of gradients. - * @param segmentNum A pointer identifying the segments number of gradients. - * @param segmentIdx A list identifying the index of end gradient in each segment. - * @return HcclResult + * @param segmentNum An integer(u32) identifying the segments number of gradients. + * @param IdxList A list identifying the index of end gradient in each segment. + * @return HcclResult */ -HcclResult hcom_get_split_strategy(const char *group, const struct model_feature *feature, u32 maxSegmentNum, - u32 *segmentNum, u32 *segmentIdx, GradSplitForceMode force = FORCE_NONE, - OriginalGraphShapeType shapeType = KNOWN_SHAPE); +extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); /** * @brief Set the gradient split strategy with in the group, according to gradient index. @@ -258,7 +199,7 @@ HcclResult hcom_get_split_strategy(const char *group, const struct model_feature * @param IdxList A list identifying the index of end gradient in each segment. * @return HcclResult */ -extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); +extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); /** * @brief Set the gradient split strategy with in the group, according to gradient data size. @@ -270,6 +211,16 @@ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmen */ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); +/** + * @brief Set the gradient split strategy with in the group, according to gradient data size. + * + * @param group A string identifying the group name. + * @param segmentNum An integer(u32) identifying the segments number of gradients. + * @param sizeList A list identifying the percent of each segment. + * @return HcclResult + */ +extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); + /** * @brief Register memories and init resources for remote access. * @@ -279,6 +230,25 @@ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segment */ extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); +/** + * @brief Register memories and init resources for remote access. + * + * @param addrList memory addresses for remote access. + * @param count number of remote memory addresses. + * @return HcclResult + */ +extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); + +HcclResult HcomExecInitialize(); + +HcclResult HcomExecFinalize(); + +HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function callback); + +HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, + const std::vector& addrInfos, + std::function callback); + #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index ea51f497..ad48f70b 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -215,6 +215,10 @@ typedef struct { #define S_IWRITE S_IWUSR #endif +#define mm_no_argument no_argument +#define mm_required_argument required_argument +#define mm_optional_argument optional_argument + #define M_FILE_RDONLY O_RDONLY #define M_FILE_WRONLY O_WRONLY #define M_FILE_RDWR O_RDWR @@ -227,6 +231,7 @@ typedef struct { #define M_BINARY O_RDONLY #define M_TRUNC O_TRUNC #define M_IRWXU S_IRWXU +#define M_APPEND O_APPEND #define M_IN_CREATE IN_CREATE #define M_IN_CLOSE_WRITE IN_CLOSE_WRITE @@ -342,17 +347,17 @@ MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd); MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag); MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag); MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd, - VOID *sendMsg, - INT32 sendLen, - UINT32 sendFlag, - const mmSockAddr* addr, - INT32 tolen); + VOID *sendMsg, + INT32 sendLen, + UINT32 sendFlag, + const mmSockAddr* addr, + INT32 tolen); MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd, - VOID *recvBuf, - mmSize recvLen, - UINT32 recvFlag, - mmSockAddr* addr, - mmSocklen_t *FromLen); + VOID *recvBuf, + mmSize recvLen, + UINT32 recvFlag, + mmSockAddr* addr, + mmSocklen_t *FromLen); MMPA_FUNC_VISIBILITY INT32 mmSAStartup(); MMPA_FUNC_VISIBILITY INT32 mmSACleanup(); MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT32 mode); @@ -360,7 +365,10 @@ MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info); MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *funcName); MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle); MMPA_FUNC_VISIBILITY CHAR *mmDlerror(); -MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock, UINT milliSecond, UINT period); +MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, + mmUserBlock_t *timerBlock, + UINT milliSecond, + UINT period); MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle); MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer); MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer); @@ -408,8 +416,12 @@ MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount); // Poll related interface MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort(); MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle); -MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP, - pmmPollData polledData, mmPollBack pollBack); +MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, + INT32 fdCount, + INT32 timeout, + mmCompletionHandle handleIOCP, + pmmPollData polledData, + mmPollBack pollBack); MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode(); MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size); MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); @@ -454,8 +466,11 @@ MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt); MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg(); MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg); MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts); -MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, char *const *argv, const char *opts, const mmStructOption *longOpts, - INT32 *longIndex); +MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, + char *const *argv, + const char *opts, + const mmStructOption *longOpts, + INT32 *longIndex); MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag); MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length); @@ -521,11 +536,14 @@ MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count); MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count); MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count); MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count); -MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const char *stdoutRedirectFile, - mmProcess *id); - -MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock, - const mmThreadAttr *threadAttr); +MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, + const mmArgvEnv *env, + const char *stdoutRedirectFile, + mmProcess *id); + +MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, + const mmUserBlock_t *funcBlock, + const mmThreadAttr *threadAttr); MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode); MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index 5db6bbf8..cecdd4a7 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -237,6 +237,11 @@ typedef struct { } mmThreadAttr; typedef VOID (*mmPf)(VOID); + +#define mm_no_argument 0 +#define mm_required_argument 1 +#define mm_optional_argument 2 + #define M_FILE_RDONLY GENERIC_READ #define M_FILE_WRONLY GENERIC_WRITE #define M_FILE_RDWR (GENERIC_READ | GENERIC_WRITE) @@ -249,6 +254,7 @@ typedef VOID (*mmPf)(VOID); #define M_CREAT _O_CREAT #define M_BINARY _O_BINARY #define M_TRUNC _O_TRUNC +#define M_APPEND _O_APPEND #define M_IREAD _S_IREAD #define M_IRUSR _S_IREAD diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 4e735438..b9b2cbe5 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -18,6 +18,7 @@ #define __CCE_RUNTIME_BASE_H__ #include +#include "toolchain/prof_callback.h" #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { @@ -32,309 +33,8 @@ extern "C" { #endif #endif -/** - * @ingroup dvrt_base - * @brief runtime error numbers. - */ -typedef enum tagRtError { - RT_ERROR_NONE = 0x0, // success - - RT_ERROR_DEVICE_BASE = 0x07010000, - RT_ERROR_DEVICE_NULL, - RT_ERROR_DEVICE_NEW, - RT_ERROR_DEVICE_ID, - RT_ERROR_DEVICE_CHIPTYPE, - RT_ERROR_DEVICE_DEPLOY, - RT_ERROR_DEVICE_RETAIN, - RT_ERROR_DEVICE_PLATFORM, - RT_ERROR_DEVICE_LOADER, - RT_ERROR_DEVICE_LIMIT, - RT_ERROR_DEVICE_PROC_HANG_OUT, - RT_ERROR_DEVICE_POWER_UP_FAIL, - RT_ERROR_DEVICE_POWER_DOWN_FAIL, - RT_ERROR_DEVICE_INVALID, - - RT_ERROR_DRV_BASE = 0x07020000, - RT_ERROR_DRV_NULL, - RT_ERROR_DRV_NEW, - RT_ERROR_DRV_MEMORY, - RT_ERROR_DRV_INPUT, - RT_ERROR_DRV_PTRNULL, - RT_ERROR_DRV_OPEN_AICPU, - RT_ERROR_DRV_CLOSE_AICPU, - RT_ERROR_DRV_SYM_AICPU, - RT_ERROR_DRV_OPEN_TSD, - RT_ERROR_DRV_CLOSE_TSD, - RT_ERROR_DRV_SYM_TSD, - RT_ERROR_DRV_SOURCE, - RT_ERROR_DRV_REPORT, - RT_ERROR_DRV_COMMAND, - RT_ERROR_DRV_OCCUPY, - RT_ERROR_DRV_ERR, - - RT_ERROR_STREAM_BASE = 0x07030000, - RT_ERROR_STREAM_NULL, - RT_ERROR_STREAM_NEW, - RT_ERROR_STREAM_CONTEXT, - RT_ERROR_STREAM_INVALID, - RT_ERROR_STREAM_MODEL, - RT_ERROR_STREAM_FUSION, - RT_ERROR_STREAM_FULL, - RT_ERROR_STREAM_EMPTY, - RT_ERROR_STREAM_NOT_COMPLETE, - RT_ERROR_STREAM_SYNC, - RT_ERROR_STREAM_NO_CB_REG, - RT_ERROR_STREAM_DUPLICATE, - RT_ERROR_STREAM_NOT_EXIST, - RT_ERROR_SQ_NO_EXIST_SQ_TO_REUSE, - RT_ERROR_SQID_FULL, - - RT_ERROR_MODEL_BASE = 0x07040000, - RT_ERROR_MODEL_NULL, - RT_ERROR_MODEL_NEW, - RT_ERROR_MODEL_CONTEXT, - RT_ERROR_MODEL_ENDGRAPH, - RT_ERROR_MODEL_STREAM, - RT_ERROR_MODEL_EXCUTOR, - RT_ERROR_MODEL_SETUP, - RT_ERROR_MODEL_ID, - RT_ERROR_MODEL_EXE_FAILED, - RT_ERROR_END_OF_SEQUENCE, // end of sequence - RT_ERROR_MODEL_EXIT, - RT_ERROR_MODEL_EXIT_STREAM_UNBIND, - RT_ERROR_MODEL_EXIT_ID, - RT_ERROR_MODEL_ABORT_NORMAL, - - RT_ERROR_EVENT_BASE = 0x07050000, - RT_ERROR_EVENT_NULL, - RT_ERROR_EVENT_NEW, - RT_ERROR_EVENT_RECORDER_NULL, - RT_ERROR_EVENT_TIMESTAMP_INVALID, - RT_ERROR_EVENT_TIMESTAMP_REVERSAL, - RT_ERROR_EVENT_NOT_COMPLETE, - - RT_ERROR_NOTIFY_BASE = 0x07060000, - RT_ERROR_NOTIFY_NULL, - RT_ERROR_NOTIFY_NEW, - RT_ERROR_NOTIFY_TYPE, - RT_ERROR_NOTIFY_NOT_COMPLETE, - - RT_ERROR_CONTEXT_BASE = 0x07070000, - RT_ERROR_CONTEXT_NULL, - RT_ERROR_CONTEXT_NEW, - RT_ERROR_CONTEXT_DEL, - RT_ERROR_CONTEXT_DEFAULT_STREAM_NULL, - RT_ERROR_CONTEXT_ONLINE_STREAM_NULL, - - RT_ERROR_KERNEL_BASE = 0x07080000, - RT_ERROR_KERNEL_NULL, - RT_ERROR_KERNEL_NEW, - RT_ERROR_KERNEL_LOOKUP, - RT_ERROR_KERNEL_NAME, - RT_ERROR_KERNEL_TYPE, - RT_ERROR_KERNEL_OFFSET, - RT_ERROR_KERNEL_DUPLICATE, - RT_ERROR_KERNEL_UNREGISTERING, - - RT_ERROR_PROGRAM_BASE = 0x07090000, - RT_ERROR_PROGRAM_NULL, - RT_ERROR_PROGRAM_NEW, - RT_ERROR_PROGRAM_DATA, - RT_ERROR_PROGRAM_SIZE, - RT_ERROR_PROGRAM_MEM_TYPE, - RT_ERROR_PROGRAM_MACHINE_TYPE, - RT_ERROR_PROGRAM_USEOUT, - - RT_ERROR_MODULE_BASE = 0x070a0000, - RT_ERROR_MODULE_NULL, - RT_ERROR_MODULE_NEW, - - RT_ERROR_INSTANCE_BASE = 0x070b0000, - RT_ERROR_INSTANCE_NULL, - RT_ERROR_INSTANCE_NEW, - RT_ERROR_INSTANCE_VERSION, - - RT_ERROR_API_BASE = 0x070c0000, - RT_ERROR_API_NULL, - RT_ERROR_API_NEW, - - RT_ERROR_DATADUMP_BASE = 0x070d0000, - RT_ERROR_DATADUMP_NULL, - RT_ERROR_DATADUMP_NEW, - RT_ERROR_DATADUMP_TIME, - RT_ERROR_DATADUMP_FILE, - RT_ERROR_DATADUMP_ADDRESS, - RT_ERROR_DATADUMP_LOAD_FAILED, - RT_ERROR_DUMP_ADDR_SET_FAILED, - - RT_ERROR_PROF_BASE = 0x070e0000, - RT_ERROR_PROF_NULL, - RT_ERROR_PROF_NEW, - RT_ERROR_PROF_START, - RT_ERROR_PROF_DEVICE_MEM, - RT_ERROR_PROF_HOST_MEM, - RT_ERROR_PROF_SET_DIR, - RT_ERROR_PROF_OPER, - RT_ERROR_PROF_FULL, - RT_ERROR_PROF_NAME, - - RT_ERROR_PCTRACE_BASE = 0x070f0000, - RT_ERROR_PCTRACE_NULL, - RT_ERROR_PCTRACE_NEW, - RT_ERROR_PCTRACE_TIME, - RT_ERROR_PCTRACE_FILE, - - RT_ERROR_TASK_BASE = 0x07100000, - RT_ERROR_TASK_NULL, - RT_ERROR_TASK_NEW, - RT_ERROR_TASK_TYPE, - RT_ERROR_TASK_ALLOCATOR, - - RT_ERROR_COMMON_BASE = 0x07110000, - RT_ERROR_INVALID_VALUE, // RT_ERROR_INPUT_INVALID - RT_ERROR_MEMORY_ADDRESS_UNALIGNED, - RT_ERROR_SEC_HANDLE, - RT_ERROR_OS_HANDLE, - RT_ERROR_MUTEX_LOCK, - RT_ERROR_MUTEX_UNLOCK, - RT_ERROR_CALLOC, - RT_ERROR_POOL_RESOURCE, - RT_ERROR_TRANS_ARGS, - RT_ERROR_METADATA, - RT_ERROR_LOST_HEARTBEAT, - RT_ERROR_REPORT_TIMEOUT, - RT_ERROR_FEATURE_NOT_SUPPROT, - RT_ERROR_MEMORY_ALLOCATION, - RT_ERROR_MEMORY_FREE, - RT_ERROR_INVALID_MEMORY_TYPE, - - RT_ERROR_DEBUG_BASE = 0x07120000, - RT_ERROR_DEBUG_NULL, - RT_ERROR_DEBUG_NEW, - RT_ERROR_DEBUG_SIGNAL, - RT_ERROR_DEBUG_OPEN, - RT_ERROR_DEBUG_WRITE, - RT_ERROR_DEBUG_REGISTER_FAILED, - RT_ERROR_DEBUG_UNREGISTER_FAILED, - - RT_ERROR_ENGINE_BASE = 0x07130000, - RT_ERROR_ENGINE_NULL, - RT_ERROR_ENGINE_NEW, - RT_ERROR_ENGINE_THREAD, - - RT_ERROR_LABEL_BASE = 0x07140000, - RT_ERROR_LABEL_NULL, - RT_ERROR_LABEL_NEW, - RT_ERROR_LABEL_CONTEXT, - RT_ERROR_LABEL_STREAM, - RT_ERROR_LABEL_MODEL, - RT_ERROR_LABEL_ALLOCATOR, - RT_ERROR_LABEL_FREE, - RT_ERROR_LABEL_SET, - RT_ERROR_LABEL_ID, - - RT_ERROR_TSFW_BASE = 0x07150000, - RT_ERROR_TSFW_UNKNOWN, - RT_ERROR_TSFW_NULL_PTR, - RT_ERROR_TSFW_ILLEGAL_AI_CORE_ID, - RT_ERROR_TSFW_ILLEGAL_PARAM, - RT_ERROR_TSFW_TASK_CMD_QUEUE_FULL, - RT_ERROR_TSFW_TASK_CMD_QUEUE_EMPTY, - RT_ERROR_TSFW_TASK_REPORT_QUEUE_FULL, - RT_ERROR_TSFW_TASK_REPORT_QUEUE_EMPTY, - RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_OCCUPYED, - RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_FREED, - RT_ERROR_TSFW_L2_MEM_INSUFFICIENT_SPACE, - RT_ERROR_TSFW_L2_MALLOC_FAILED, - RT_ERROR_TSFW_DMA_CHANNEL_ALL_OCCUPYED, - RT_ERROR_TSFW_MEMCPY_OP_FAILED, - RT_ERROR_TSFW_BS_SLOT_ALL_OCCUPYED, - RT_ERROR_TSFW_TBS_SLOT_REPEAT_FREE, - RT_ERROR_TSFW_PRIORITY_TASK_LIST_FULL, - RT_ERROR_TSFW_PRIORITY_TASK_LIST_EMPTY, - RT_ERROR_TSFW_NO_STREAM_LIST_NEED_TO_BE_PROCESSED, - RT_ERROR_TSFW_REPEAT_MARK_STREAM_NEED_SERVICE, - RT_ERROR_TSFW_SYS_DMA_CHANNEL_ALL_OCCUPAPYED, - RT_ERROR_TSFW_NO_HBML2TASKNODE_FOUND, - RT_ERROR_TSFW_SQNODE_NODE_SLOT_ALL_OCCUPAPYED, - RT_ERROR_TSFW_CQNODE_NODE_SLOT_ALL_OCCUPAPYED, - RT_ERROR_TSFW_SQNODE_NOT_ENOUGH, - RT_ERROR_TSFW_SQNODE_SLOT_REPEAT_FREE, - RT_ERROR_TSFW_CQNODE_SLOT_REPEAT_FREE, - RT_ERROR_TSFW_CQ_REPORT_FAILED, - RT_ERROR_TSFW_SYS_DMA_RESET_SUCCESS, - RT_ERROR_TSFW_SYS_DMA_RESET_FAILED, - RT_ERROR_TSFW_SYS_DMA_TRNSFER_FAILED, - RT_ERROR_TSFW_SYS_DMA_MEMADDRALIGN_FAILED, - RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_FULL, - RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_EMPTY, - RT_ERROR_TSFW_TIMER_EVENT_FULL, - RT_ERROR_TSFW_TASK_L2_DESC_ENTRY_NOT_ENOUGH, - RT_ERROR_TSFW_AICORE_TIMEOUT, - RT_ERROR_TSFW_AICORE_EXCEPTION, - RT_ERROR_TSFW_AICORE_TRAP_EXCEPTION, - RT_ERROR_TSFW_AICPU_TIMEOUT, - RT_ERROR_TSFW_SDMA_L2_TO_DDR_MALLOC_FAIL, - RT_ERROR_TSFW_AICPU_EXCEPTION, - RT_ERROR_TSFW_AICPU_DATADUMP_RSP_ERR, - RT_ERROR_TSFW_AICPU_MODEL_RSP_ERR, - RT_ERROR_TSFW_REPEAT_ACTIVE_MODEL_STREAM, - RT_ERROR_TSFW_REPEAT_NOTIFY_WAIT, - RT_ERROR_TSFW_DEBUG_INVALID_SQCQ, - RT_ERROR_TSFW_DEBUG_WRONG_COMMAND_TYPE, - RT_ERROR_TSFW_DEBUG_CMD_PROCESS, - RT_ERROR_TSFW_DEBUG_INVALID_DEVICE_STATUS, - RT_ERROR_TSFW_DEBUG_NOT_IN_DEBUG_STATUS, - RT_ERROR_TSFW_DEBUG_INVALID_TASK_STATUS, - RT_ERROR_TSFW_DEBUG_TASK_EMPTY, - RT_ERROR_TSFW_DEBUG_TASK_FULL, - RT_ERROR_TSFW_DEBUG_TASK_NOT_EXIST, - RT_ERROR_TSFW_DEBUG_AI_CORE_FULL, - RT_ERROR_TSFW_DEBUG_AI_CORE_NOT_EXIST, - RT_ERROR_TSFW_DEBUG_AI_CORE_EXCEPTION, - RT_ERROR_TSFW_DEBUG_AI_CORE_TIMEOUT, - RT_ERROR_TSFW_DEBUG_BREAKPOINT_FULL, - RT_ERROR_TSFW_DEBUG_READ_ERROR, - RT_ERROR_TSFW_DEBUG_WRITE_FAIL, - RT_ERROR_TSFW_QUEUE_FULL, - RT_ERROR_TSFW_QUEUE_EMPTY, - RT_ERROR_TSFW_QUEUE_ALLOC_MEM_FAIL, - RT_ERROR_TSFW_QUEUE_DATA_SIZE_UNMATCH, - RT_ERROR_TSFW_PCIE_DMA_INVLD_CPY_TYPE, - RT_ERROR_TSFW_INVLD_CPY_DIR, - RT_ERROR_TSFW_PCIE_DMA_INVLD_CQ_DES, - RT_ERROR_TSFW_PCIE_DMA_CPY_ERR, - RT_ERROR_TSFW_PCIE_DMA_LNK_CHN_BUSY, - RT_ERROR_TSFW_PROFILE_BUFF_FULL, - RT_ERROR_TSFW_PROFILE_MODE_CONFLICT, - RT_ERROR_TSFW_PROFILE_OTHER_PID_ON, - RT_ERROR_TSFW_SCHD_AIC_TASK_PRELOAD_FAILED, - RT_ERROR_TSFW_TSCPU_CLOSE_FAILED, - RT_ERROR_TSFW_EXPECT_FAIL, - RT_ERROR_TSFW_REPEAT_MODEL_STREAM, - RT_ERROR_TSFW_STREAM_MODEL_UNBIND, - RT_ERROR_TSFW_MODEL_EXE_FAILED, - RT_ERROR_TSFW_IPC_SEND_FAILED, - RT_ERROR_TSFW_IPC_PROC_REG_FAILED, - RT_ERROR_TSFW_STREAM_FULL, - RT_ERROR_TSFW_END_OF_SEQUENCE, - RT_ERROR_TSFW_SWITCH_STREAM_LABEL, - RT_ERROR_TSFW_TRANS_SQE_FAIL, - RT_ERROR_TSFW_RESERVED, - - RT_ERROR_SUBSCRIBE_BASE = 0x07160000, - RT_ERROR_SUBSCRIBE_NULL, - RT_ERROR_SUBSCRIBE_NEW, - RT_ERROR_SUBSCRIBE_STREAM, - RT_ERROR_SUBSCRIBE_THREAD, - RT_ERROR_SUBSCRIBE_GROUP, - - RT_ERROR_GROUP_BASE = 0x07170000, - RT_ERROR_GROUP_NOT_SET, - RT_ERROR_GROUP_NOT_CREATE, - - RT_ERROR_RESERVED = 0x07ff0000, - }rtError_t; +typedef int32_t rtError_t; +static const int32_t RT_ERROR_NONE = 0; // success /** * @ingroup dvrt_base @@ -387,10 +87,20 @@ typedef struct rtExceptionInfo { uint32_t deviceid; } rtExceptionInfo; +typedef struct rtTaskFailInfo { + uint32_t taskid; + uint32_t streamid; + uint32_t tid; + uint32_t deviceid; + uint32_t retcode; +} rtTaskFailInfo; + typedef void (*rtErrorCallback)(rtExceptionType); typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); +typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo); + typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); /** @@ -447,6 +157,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* */ RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream); +/** + * @ingroup profiling_base + * @brief ts set profiling reporter callback. + */ +RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback); + /** * @ingroup dvrt_base * @brief Returns the last error from a runtime call. @@ -485,6 +201,16 @@ RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback); */ RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback); +/** + * @ingroup dvrt_base + * @brief register callback for fail task + * @param [in] uniName unique register name, can't be null + * @param [in] callback fail task callback function + * @param [out] NA + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback); + /** * @ingroup dvrt_base * @brief notify handle. diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index f1a70eaa..12a407d7 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -121,14 +121,6 @@ typedef struct tagRtMemoryConfig { typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t; -/** - * @ingroup - * @brief get platform - * @param [in] platForm - * @return platForm - */ -RTS_API rtError_t rtGetPlatformConfig(rtPlatformConfig_t *platForm); - /** * @ingroup * @brief get AI core count @@ -169,13 +161,6 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate */ RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig); -/** - * @ingroup - * @brief set platform in gen ctx - * @param [in] platForm - * @return RT_ERROR_NONE for ok, errno for failed - */ -RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); /** * @ingroup @@ -185,6 +170,14 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); */ RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); +/** + * @ingroup + * @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be represented by 9020. + * @param [out] runtimeVersion + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion); #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index b378e3b0..d1a91a9b 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -330,12 +330,12 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3 FEATURE_TYPE_MEMCPY = 0, FEATURE_TYPE_RSV, } rtFeatureType_t; - * @param [in] infoType info type + * @param [in] featureInfo info type typedef enum tagMemcpyInfo { MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, MEMCPY_INFO _RSV, } rtMemcpyInfo_t; - * @param [out] value the capability info + * @param [out] value the capability info RT_CAPABILITY_SUPPORT or RT_CAPABILITY_NOT_SUPPORT * @return RT_ERROR_NONE for ok */ RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value); diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h index d3d5956f..83cafa3c 100644 --- a/third_party/fwkacllib/inc/runtime/rt.h +++ b/third_party/fwkacllib/inc/runtime/rt.h @@ -28,4 +28,4 @@ #include "rt_model.h" #include "stream.h" -#endif // __CCE_RUNTIME_RT_H__ \ No newline at end of file +#endif // __CCE_RUNTIME_RT_H__ diff --git a/third_party/fwkacllib/inc/tdt/status.h b/third_party/fwkacllib/inc/tdt/status.h index d30564b8..d5050f35 100644 --- a/third_party/fwkacllib/inc/tdt/status.h +++ b/third_party/fwkacllib/inc/tdt/status.h @@ -34,9 +34,16 @@ using TDT_StatusT = uint32_t; typedef uint32_t TDT_StatusT; #endif +#define LINUX 0 +#define WINDOWS 1 + #ifndef TDT_LIB_EXPORT +#if(TARGET_SYSTEM_NAME == WINDOWS) +#define TDT_LIB_EXPORT __declspec(dllexport) +#else #define TDT_LIB_EXPORT __attribute__((visibility("default"))) #endif +#endif /** * @ingroup tdt status. * diff --git a/third_party/fwkacllib/inc/tdt/tsd_client.h b/third_party/fwkacllib/inc/tdt/tsd_client.h index 6066a12e..665c8b82 100644 --- a/third_party/fwkacllib/inc/tdt/tsd_client.h +++ b/third_party/fwkacllib/inc/tdt/tsd_client.h @@ -23,6 +23,7 @@ #include #include "tdt/status.h" #include "tdt/data_common.h" +#include "toolchain/prof_callback.h" #ifdef __cplusplus extern "C" { @@ -37,7 +38,7 @@ extern "C" { * Used for the Framework process to communicate with the TSDDaemon process, * and notify TSD to complete the initialization of other processes * -* @param phyDeviceId [IN] type #unsigned int. Physical device ID +* @param logicDeviceId [IN] type #unsigned int. Logic device ID * @param rankSize [IN] type #unsigned int. The rankSize of the training. * The default value is 1. When rankSize is greater than 1, * HCCP will be pulled to perform set communication related operations. @@ -49,7 +50,7 @@ extern "C" { * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t rankSize); +TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize); /** * @ingroup Close @@ -67,7 +68,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t ra * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId); +TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId); /** * @ingroup UpdateProfilingMode @@ -85,7 +86,26 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId); * @li tsd_client.h: Header file where the interface declaration is located. * @li data_common.h: Header file where 'TDT_StatusT' defined */ -TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t phyDeviceId, const uint32_t flag); +TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag); + +/** +* @ingroup TsdSetMsprofReporterCallback +* @brief 用于推理场景下设置aicpu的profilng的callback函数 +* +* @par Function +* 设置offline模式下aicpu_sd进程的profiling的callback函数 +* +* @param callback [IN] type #MsprofReporterCallback. 回调函数 +* @retval TDT_OK Success +* @retval OtherValues Failure +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tsd_client.h: Header file where the interface declaration is located. +* @li data_common.h: Header file where 'TDT_StatusT' defined +* @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined +*/ +TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback); /** * @ingroup CreateCmdParameterObj diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h new file mode 100644 index 00000000..3fad74bc --- /dev/null +++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h @@ -0,0 +1,135 @@ +/** + * Copyright 2020-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * @file prof_callback.h + * @brief declaraion of profiling callbacks + */ + +#ifndef MSPROFILER_PROF_CALLBACK_H_ +#define MSPROFILER_PROF_CALLBACK_H_ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + + +#include "stddef.h" +#include "stdint.h" + +/** + * @name MsprofErrorCode + * @brief error code + */ +enum MsprofErrorCode { + MSPROF_ERROR_NONE = 0, + MSPROF_ERROR_MEM_NOT_ENOUGH, + MSPROF_ERROR_GET_ENV, + MSPROF_ERROR_CONFIG_INVALID, + MSPROF_ERROR_ACL_JSON_OFF, + MSPROF_ERROR, +}; + +#define MSPROF_ENGINE_MAX_TAG_LEN (31) + +/** + * @name ReporterData + * @brief struct of data to report + */ +struct ReporterData { + char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; // the sub-type of the module, data with different tag will be writen + int deviceId; // the index of device + size_t dataLen; // the length of send data + unsigned char *data; // the data content +}; + +/** + * @name MsprofReporterModuleId + * @brief module id of data to report + */ +enum MsprofReporterModuleId { + MSPROF_MODULE_DATA_PREPROCESS = 0, // DATA_PREPROCESS + MSPROF_MODULE_HCCL, // HCCL + MSPROF_MODULE_ACL, // AclModule + MSPROF_MODULE_FRAMEWORK, // Framework + MSPROF_MODULE_RUNTIME // runtime +}; + +/** + * @name MsprofReporterCallbackType + * @brief reporter callback request type + */ +enum MsprofReporterCallbackType { + MSPROF_REPORTER_REPORT = 0, // report data + MSPROF_REPORTER_INIT, // init reporter + MSPROF_REPORTER_UNINIT, // uninit reporter +}; + +/** + * @name MsprofReporterCallback + * @brief callback to start reporter/stop reporter/report date + * @param moduleId [IN] enum MsprofReporterModuleId + * @param type [IN] enum MsprofReporterCallbackType + * @param data [IN] callback data (nullptr on INTI/UNINIT) + * @param len [IN] callback data size (0 on INIT/UNINIT) + * @return enum MsprofErrorCode + */ +typedef int32_t (*MsprofReporterCallback)(uint32_t moduleId, uint32_t type, void *data, uint32_t len); + + +#define MSPROF_OPTIONS_DEF_LEN_MAX (2048) + +/** + * @name MsprofGeOptions + * @brief struct of MSPROF_CTRL_INIT_GE_OPTIONS + */ +struct MsprofGeOptions { + char jobId[MSPROF_OPTIONS_DEF_LEN_MAX]; + char options[MSPROF_OPTIONS_DEF_LEN_MAX]; +}; + +/** + * @name MsprofCtrlCallbackType + * @brief ctrl callback request type + */ +enum MsprofCtrlCallbackType { + MSPROF_CTRL_INIT_ACL_ENV = 0, // start profiling with acl env + MSPROF_CTRL_INIT_ACL_JSON, // start profiling with acl.json + MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options + MSPROF_CTRL_FINALIZE // stop profiling +}; + +/** + * @name MsprofCtrlCallback + * @brief callback to start/stop profiling + * @param type [IN] enum MsprofCtrlCallbackType + * @param data [IN] callback data + * @param len [IN] callback data size + * @return enum MsprofErrorCode + */ +typedef int32_t (*MsprofCtrlCallback)(uint32_t type, void *data, uint32_t len); + +/** + * @name MsprofSetDeviceCallback + * @brief callback to notify set/reset device + * @param devId [IN] device id + * @param isOpenDevice [IN] true: set device, false: reset device + */ +typedef void (*MsprofSetDeviceCallback)(uint32_t devId, bool isOpenDevice); + +#ifdef __cplusplus +} +#endif + +#endif // MSPROFILER_PROF_CALLBACK_H_ diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h index c734380c..ff91351b 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h +++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h @@ -16,7 +16,17 @@ #ifndef MSPROF_ENGINE_PROF_REPORTER_H_ #define MSPROF_ENGINE_PROF_REPORTER_H_ +#ifndef OS_TYPE +#define OS_TYPE 0 +#endif // OS_TYPE + +#if (OS_TYPE != LINUX) +#define MSVP_PROF_API __declspec(dllexport) +#else #define MSVP_PROF_API __attribute__((visibility("default"))) +#endif + +#include "prof_callback.h" /** * @file prof_reporter.h @@ -25,20 +35,6 @@ */ namespace Msprof { namespace Engine { -/// the max tag length -#define MSPROF_ENGINE_MAX_TAG_LEN (31) -/** - * @ingroup reporter - * @brief struct ReporterData - * the sturct of the data send to libmsprof - */ -struct ReporterData { - char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1]; ///< the sub-type of the module, data with different tag will be writen - int deviceId; ///< the physical id of device - size_t dataLen; ///< the length of send data - unsigned char *data; ///< the data content -}; - /** * @ingroup reporter * @brief class Reporter @@ -86,4 +82,4 @@ class MSVP_PROF_API Reporter { } // namespace Engine } // namespace Msprof -#endif // MSPROF_ENGINE_PROF_REPORTER_H_ \ No newline at end of file +#endif // MSPROF_ENGINE_PROF_REPORTER_H_ diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h index bce58f32..5faca0ae 100644 --- a/third_party/fwkacllib/inc/toolchain/slog.h +++ b/third_party/fwkacllib/inc/toolchain/slog.h @@ -18,7 +18,9 @@ #define D_SYSLOG_H_ #ifdef __cplusplus +#ifndef LOG_CPP extern "C" { +#endif #endif // __cplusplus #ifndef LINUX @@ -105,6 +107,7 @@ extern "C" { #define SECURITY_LOG_MASK (0x00100000) #define RUN_LOG_MASK (0x01000000) #define OPERATION_LOG_MASK (0x10000000) +#define RESERVERD_LENGTH 52 typedef struct tagDCODE { const char *cName; @@ -116,6 +119,18 @@ typedef struct tagKV { char *value; } KeyValue; +typedef enum { + APPLICATION = 0, + SYSTEM +} ProcessType; + +typedef struct { + ProcessType type; + unsigned int pid; + unsigned int deviceId; + char reserved[RESERVERD_LENGTH]; +} LogAttr; + /** * @ingroup slog * @@ -228,6 +243,14 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent); */ DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel); +/** + * @ingroup slog + * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION + * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID) + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogSetAttr(LogAttr logAttr); + /** * @ingroup slog * @brief dlog_error: print error log @@ -367,6 +390,8 @@ void DlogInner(int moduleId, int level, const char *fmt, ...); void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); #ifdef __cplusplus +#ifndef LOG_CPP } +#endif // LOG_CPP #endif // __cplusplus #endif // D_SYSLOG_H_