@@ -174,11 +174,9 @@ echo "---------------- GraphEngine output generated ----------------" | |||
# generate output package in tar form, including ut/st libraries/executables | |||
cd ${BASEPATH} | |||
mkdir -p output/plugin/nnengine/ge_config/ | |||
mkdir -p output/plugin/opskernel/ | |||
find output/ -name graphengine_lib.tar -exec rm {} \; | |||
cp src/ge/engine_manager/engine_conf.json output/plugin/nnengine/ge_config/ | |||
find output/ -maxdepth 1 -name libengine.so -exec mv -f {} output/plugin/nnengine/ \; | |||
find output/ -maxdepth 1 -name libge_local_engine.so -exec mv -f {} output/plugin/opskernel/ \; | |||
tar -cf graphengine_lib.tar output/* | |||
mv -f graphengine_lib.tar output | |||
echo "---------------- GraphEngine package archive generated ----------------" |
@@ -52,16 +52,5 @@ struct GETaskInfo { | |||
std::vector<GETaskKernelHcclInfo> kernelHcclInfo; | |||
}; | |||
struct HcomOpertion { | |||
std::string hcclType; | |||
void *inputPtr; | |||
void *outputPtr; | |||
uint64_t count; | |||
int32_t dataType; | |||
int32_t opType; | |||
int32_t root; | |||
}; | |||
} // namespace ge | |||
#endif // INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_ |
@@ -28,7 +28,6 @@ struct CompressConfig { | |||
size_t channel; // channels of L2 or DDR. For load balance | |||
size_t fractalSize; // size of compressing block | |||
bool isTight; // whether compose compressed data tightly | |||
size_t init_offset; | |||
}; | |||
CmpStatus CompressWeights(char* input, const CompressConfig& compressConfig, char* indexs, char* output, | |||
@@ -1,33 +0,0 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef COMPRESS_WEIGHT_H | |||
#define COMPRESS_WEIGHT_H | |||
#include "compress.h" | |||
const int SHAPE_SIZE_WEIGHT = 4; | |||
struct CompressOpConfig { | |||
int64_t wShape[SHAPE_SIZE_WEIGHT]; | |||
size_t compressTilingK; | |||
size_t compressTilingN; | |||
struct CompressConfig compressConfig; | |||
}; | |||
extern "C" CmpStatus CompressWeightsConv2D(const char *const input, char *const zipBuffer, char *const infoBuffer, | |||
CompressOpConfig *const param); | |||
#endif // COMPRESS_WEIGHT_H |
@@ -27,6 +27,7 @@ using std::string; | |||
using std::vector; | |||
namespace fe { | |||
class PlatformInfoManager { | |||
public: | |||
PlatformInfoManager(const PlatformInfoManager &) = delete; | |||
@@ -38,8 +39,6 @@ class PlatformInfoManager { | |||
uint32_t GetPlatformInfo(const string SoCVersion, PlatformInfo &platformInfo, OptionalInfo &optiCompilationInfo); | |||
uint32_t GetPlatformInfoWithOutSocVersion(PlatformInfo &platformInfo, OptionalInfo &optiCompilationInfo); | |||
void SetOptionalCompilationInfo(OptionalInfo &optiCompilationInfo); | |||
private: | |||
@@ -95,5 +94,6 @@ class PlatformInfoManager { | |||
map<string, PlatformInfo> platformInfoMap_; | |||
OptionalInfo optiCompilationInfo_; | |||
}; | |||
} // namespace fe | |||
#endif |
@@ -44,12 +44,8 @@ const char *const OPTION_EXEC_ENABLE_DUMP = "ge.exec.enableDump"; | |||
const char *const OPTION_EXEC_DUMP_PATH = "ge.exec.dumpPath"; | |||
const char *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep"; | |||
const char *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode"; | |||
const char *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug"; | |||
const char *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode"; | |||
const char *const OPTION_EXEC_OP_DEBUG_LEVEL = "ge.exec.opDebugLevel"; | |||
const char *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild"; | |||
const char *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath"; | |||
const char *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses"; | |||
// profiling flag | |||
const char *const OPTION_EXEC_PROFILING_MODE = "ge.exec.profilingMode"; | |||
const char *const OPTION_EXEC_PROFILING_OPTIONS = "ge.exec.profilingOptions"; | |||
@@ -223,10 +219,6 @@ const char *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream"; | |||
// Configure input fp16 nodes | |||
const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; | |||
// Configure debug level, its value should be 0(default), 1 or 2. | |||
// 0: close debug; 1: open TBE compiler; 2: open ccec compiler | |||
const std::string OP_DEBUG_LEVEL = "ge.opDebugLevel"; | |||
// Graph run mode | |||
enum GraphRunMode { PREDICTION = 0, TRAIN }; | |||
@@ -145,8 +145,7 @@ enum Format { | |||
FORMAT_FRACTAL_ZN_LSTM, | |||
FORMAT_FRACTAL_Z_G, | |||
FORMAT_RESERVED, | |||
FORMAT_ALL, | |||
FORMAT_NULL | |||
FORMAT_ALL | |||
}; | |||
// for unknown shape op type | |||
@@ -98,8 +98,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData { | |||
OpRegistrationData &DelInputWithOriginalType(int input_idx, const std::string &ori_type); | |||
OpRegistrationData &InputReorderVector(const vector<int> &input_order); | |||
domi::ImplyType GetImplyType() const; | |||
std::string GetOmOptype() const; | |||
std::set<std::string> GetOriginOpTypeSet() const; | |||
@@ -51,6 +51,30 @@ inline pid_t GetTid() { | |||
return tid; | |||
} | |||
#define GE_TIMESTAMP_START(stage) uint64_t startUsec_##stage = ge::GetCurrentTimestap() | |||
#define GE_TIMESTAMP_END(stage, stage_name) \ | |||
do { \ | |||
uint64_t endUsec_##stage = ge::GetCurrentTimestap(); \ | |||
GEEVENT("[GEPERFTRACE] The time cost of %s is [%lu] micro second.", (stage_name), \ | |||
(endUsec_##stage - startUsec_##stage)); \ | |||
} while (0); | |||
#define GE_TIMESTAMP_CALLNUM_START(stage) \ | |||
uint64_t startUsec_##stage = ge::GetCurrentTimestap(); \ | |||
uint64_t call_num_of##stage = 0; \ | |||
uint64_t time_of##stage = 0 | |||
#define GE_TIMESTAMP_RESTART(stage) (startUsec_##stage = ge::GetCurrentTimestap()) | |||
#define GE_TIMESTAMP_ADD(stage) \ | |||
time_of##stage += ge::GetCurrentTimestap() - startUsec_##stage; \ | |||
call_num_of##stage++ | |||
#define GE_TIMESTAMP_CALLNUM_END(stage, stage_name) \ | |||
GEEVENT("[GEPERFTRACE] The time cost of %s is [%lu] micro second, call num is %lu", (stage_name), time_of##stage, \ | |||
call_num_of##stage) | |||
#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||
dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GetTid(), __FUNCTION__, ERROR_CODE, \ | |||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) | |||
@@ -19,12 +19,15 @@ | |||
#include <string> | |||
#include "runtime/rt.h" | |||
#include "cce/cce_def.hpp" | |||
#include "common/string_util.h" | |||
#include "common/util.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "ge/ge_api_error_codes.h" | |||
using cce::CC_STATUS_SUCCESS; | |||
using cce::ccStatus_t; | |||
#if !defined(__ANDROID__) && !defined(ANDROID) | |||
#define DOMI_LOGE(...) GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, __VA_ARGS__) | |||
#else | |||
@@ -99,13 +102,17 @@ | |||
} while (0); | |||
// If expr is not true, print the log and return the specified status | |||
#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \ | |||
do { \ | |||
bool b = (expr); \ | |||
if (!b) { \ | |||
GELOGE(_status, __VA_ARGS__); \ | |||
return _status; \ | |||
} \ | |||
#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \ | |||
do { \ | |||
bool b = (expr); \ | |||
if (!b) { \ | |||
std::string msg; \ | |||
(void)msg.append(ge::StringUtils::FormatString(__VA_ARGS__)); \ | |||
(void)msg.append( \ | |||
ge::StringUtils::FormatString(" Error Code:0x%X(%s)", _status, GET_ERRORNO_STR(_status).c_str())); \ | |||
DOMI_LOGE("%s", msg.c_str()); \ | |||
return _status; \ | |||
} \ | |||
} while (0); | |||
// If expr is not true, print the log and return the specified status | |||
@@ -125,7 +132,7 @@ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
exec_expr; \ | |||
} \ | |||
} | |||
}; | |||
// If expr is not true, print the log and execute a custom statement | |||
#define GE_CHK_BOOL_EXEC_WARN(expr, exec_expr, ...) \ | |||
@@ -135,7 +142,7 @@ | |||
GELOGW(__VA_ARGS__); \ | |||
exec_expr; \ | |||
} \ | |||
} | |||
}; | |||
// If expr is not true, print the log and execute a custom statement | |||
#define GE_CHK_BOOL_EXEC_INFO(expr, exec_expr, ...) \ | |||
{ \ | |||
@@ -144,7 +151,7 @@ | |||
GELOGI(__VA_ARGS__); \ | |||
exec_expr; \ | |||
} \ | |||
} | |||
}; | |||
// If expr is not true, print the log and execute a custom statement | |||
#define GE_CHK_BOOL_TRUE_EXEC_INFO(expr, exec_expr, ...) \ | |||
@@ -154,7 +161,7 @@ | |||
GELOGI(__VA_ARGS__); \ | |||
exec_expr; \ | |||
} \ | |||
} | |||
}; | |||
// If expr is true, print logs and execute custom statements | |||
#define GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(expr, exec_expr, ...) \ | |||
@@ -164,7 +171,7 @@ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
exec_expr; \ | |||
} \ | |||
} | |||
}; | |||
// If expr is true, print the Information log and execute a custom statement | |||
#define GE_CHK_TRUE_EXEC_INFO(expr, exec_expr, ...) \ | |||
{ \ | |||
@@ -173,7 +180,7 @@ | |||
GELOGI(__VA_ARGS__); \ | |||
exec_expr; \ | |||
} \ | |||
} | |||
}; | |||
// If expr is not SUCCESS, print the log and execute the expression + return | |||
#define GE_CHK_BOOL_TRUE_RET_VOID(expr, exec_expr, ...) \ | |||
@@ -184,7 +191,7 @@ | |||
exec_expr; \ | |||
return; \ | |||
} \ | |||
} | |||
}; | |||
// If expr is not SUCCESS, print the log and execute the expression + return _status | |||
#define GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(expr, _status, exec_expr, ...) \ | |||
@@ -195,7 +202,7 @@ | |||
exec_expr; \ | |||
return _status; \ | |||
} \ | |||
} | |||
}; | |||
// If expr is not true, execute a custom statement | |||
#define GE_CHK_BOOL_EXEC_NOLOG(expr, exec_expr) \ | |||
@@ -204,7 +211,7 @@ | |||
if (!b) { \ | |||
exec_expr; \ | |||
} \ | |||
} | |||
}; | |||
// -----------------runtime related macro definitions------------------------------- | |||
// If expr is not RT_ERROR_NONE, print the log | |||
@@ -224,7 +231,7 @@ | |||
DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ | |||
exec_expr; \ | |||
} \ | |||
} | |||
}; | |||
// If expr is not RT_ERROR_NONE, print the log and return | |||
#define GE_CHK_RT_RET(expr) \ | |||
@@ -236,13 +243,23 @@ | |||
} \ | |||
} while (0); | |||
// ------------------------cce related macro definitions---------------------------- | |||
// If expr is not CC_STATUS_SUCCESS, print the log | |||
#define GE_CHK_CCE(expr) \ | |||
do { \ | |||
ccStatus_t _cc_ret = (expr); \ | |||
if (_cc_ret != CC_STATUS_SUCCESS) { \ | |||
DOMI_LOGE("Call cce api failed, ret: 0x%X", _cc_ret); \ | |||
} \ | |||
} while (0); | |||
// If expr is true, execute exec_expr without printing logs | |||
#define GE_IF_BOOL_EXEC(expr, exec_expr) \ | |||
{ \ | |||
if (expr) { \ | |||
exec_expr; \ | |||
} \ | |||
} | |||
}; | |||
// If make_shared is abnormal, print the log and execute the statement | |||
#define GE_MAKE_SHARED(exec_expr0, exec_expr1) \ | |||
@@ -54,9 +54,9 @@ const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | |||
struct DataBuffer { | |||
public: | |||
void *data; // Data address | |||
uint64_t length; // Data length | |||
uint32_t length; // Data length | |||
bool isDataSupportMemShare = false; | |||
DataBuffer(void *dataIn, uint64_t len, bool isSupportMemShare) | |||
DataBuffer(void *dataIn, uint32_t len, bool isSupportMemShare) | |||
: data(dataIn), length(len), isDataSupportMemShare(isSupportMemShare) {} | |||
DataBuffer() : data(nullptr), length(0), isDataSupportMemShare(false) {} | |||
@@ -106,7 +106,7 @@ struct ShapeDescription { | |||
// Definition of input and output description information | |||
struct InputOutputDescInfo { | |||
std::string name; | |||
uint64_t size; | |||
uint32_t size; | |||
uint32_t data_type; | |||
ShapeDescription shape_info; | |||
}; | |||
@@ -231,7 +231,6 @@ struct Options { | |||
// Profiling info of task | |||
struct TaskDescInfo { | |||
std::string model_name; | |||
std::string op_name; | |||
uint32_t block_dim; | |||
uint32_t task_id; | |||
@@ -240,7 +239,6 @@ struct TaskDescInfo { | |||
// Profiling info of graph | |||
struct ComputeGraphDescInfo { | |||
std::string model_name; | |||
std::string op_name; | |||
std::string op_type; | |||
std::vector<Format> input_format; | |||
@@ -44,6 +44,8 @@ class ModelHelper { | |||
void SetSaveMode(bool val) { is_offline_ = val; } | |||
bool GetSaveMode(void) const { return is_offline_; } | |||
static Status TransModelToGeModel(const ModelPtr& model, GeModelPtr& ge_model); | |||
static Status TransGeModelToModel(const GeModelPtr& geModelPtr, ModelPtr& modelPtr); | |||
Status GetBaseNameFromFileName(const std::string& file_name, std::string& base_name); | |||
Status GetModelNameFromMergedGraphName(const std::string& graph_name, std::string& model_name); | |||
@@ -48,9 +48,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_S | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_LAYER; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_FILE_PATH; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_MODE; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_AICORE; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_ATOMIC; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_ALL; | |||
// Supported public properties name | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROP_OME_START_TIME; // Start time | |||
@@ -338,7 +335,6 @@ REGISTER_OPTYPE_DECLARE(BASICLSTMCELL, "BasicLSTMCell"); | |||
REGISTER_OPTYPE_DECLARE(GETNEXT, "GetNext"); | |||
REGISTER_OPTYPE_DECLARE(INITDATA, "InitData"); | |||
REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape") | |||
REGISTER_OPTYPE_DECLARE(REFIDENTITY, "RefIdentity"); | |||
// ANN dedicated operator | |||
REGISTER_OPTYPE_DECLARE(ANN_MEAN, "AnnMean"); | |||
@@ -635,9 +631,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_N | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_END_GRAPH; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_OP_DEBUG; | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_TYPE_OP_DEBUG; | |||
// convolution node type | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_TYPE_CONVOLUTION; | |||
// adds a convolutional node name for the hard AIPP | |||
@@ -21,12 +21,12 @@ | |||
#include <string> | |||
#include <vector> | |||
#include "common/dynamic_aipp.h" | |||
#include "common/ge_inner_error_codes.h" | |||
#include "common/ge_types.h" | |||
#include "common/types.h" | |||
#include "graph/tensor.h" | |||
#include "runtime/base.h" | |||
#include "common/dynamic_aipp.h" | |||
namespace ge { | |||
class ModelListenerAdapter; | |||
@@ -27,7 +27,6 @@ | |||
#include "graph/ge_tensor.h" | |||
#include "graph/graph.h" | |||
#include "graph/op_desc.h" | |||
#include "graph/detail/attributes_holder.h" | |||
namespace ge { | |||
class GeGenerator { | |||
@@ -106,6 +106,7 @@ void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &ou | |||
void UpdateOmgCtxWithParserCtx(); | |||
void UpdateParserCtxWithOmgCtx(); | |||
} // namespace ge | |||
namespace domi { | |||
@@ -74,9 +74,6 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A | |||
size_t GetAllNodesSize() const; | |||
Vistor<NodePtr> GetAllNodes() const; | |||
// is_unknown_shape: false, same with GetAllNodes func | |||
// is_unknown_shape: true, same with GetDirectNodes func | |||
Vistor<NodePtr> GetNodes(bool is_unknown_shape) const; | |||
size_t GetDirectNodesSize() const; | |||
Vistor<NodePtr> GetDirectNode() const; | |||
Vistor<NodePtr> GetInputNodes() const; | |||
@@ -177,10 +174,6 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A | |||
void SetInputSize(uint32_t size) { input_size_ = size; } | |||
uint32_t GetInputSize() const { return input_size_; } | |||
// false: known shape true: unknow shape | |||
bool GetGraphUnknownFlag() const { return is_unknown_shape_graph_; } | |||
void SetGraphUnknownFlag(bool flag) { is_unknown_shape_graph_ = flag; } | |||
/// | |||
/// Set is need train iteration. | |||
/// If set true, it means this graph need to be run iteration some | |||
@@ -289,8 +282,7 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A | |||
std::map<uint32_t, std::string> op_name_map_; | |||
uint64_t session_id_ = 0; | |||
ge::Format data_format_ = ge::FORMAT_ND; | |||
// unknown graph indicator, default is false, mean known shape | |||
bool is_unknown_shape_graph_ = false; | |||
}; | |||
} // namespace ge | |||
#endif // INC_GRAPH_COMPUTE_GRAPH_H_ |
@@ -778,10 +778,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MOD | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_CORE_TYPE; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_ATC_VERSION; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_OPP_VERSION; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_SCALE_MODE; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_SCALE_VALUE; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_SCALE_OFFSET; | |||
@@ -1000,7 +996,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_DUMP_ORIGIN_FORMAT; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_DUMP_ORIGIN_DATA_TYPE; | |||
// used for lX fusion | |||
// used for l1 fusion and other fusion in future | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_L1_FUSION_GROUP_ID; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_L1_FUSION_GROUP_KEY; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSION_GROUP_KEY; | |||
@@ -1014,17 +1010,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_FOR_L1_FUSION; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_N_BATCH_SPILT; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NO_TASK_AND_DUMP_NEEDED; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DATA_DUMP_REF; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_L2_FUSION_GROUP_ID; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_FOR_L2_FUSION; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_FLAG; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_ADDR; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_VALID_SIZE; | |||
// op overflow dump | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE; | |||
// functional ops attr | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_THEN_BRANCH; | |||
@@ -1070,13 +1058,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_HOR | |||
// for gradient group | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_HCCL_FUSED_GROUP; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_HCCL_FUSED_FLAG; | |||
// dynamic shape attrs | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX; | |||
// for fusion op plugin | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE; | |||
} // namespace ge | |||
#endif // INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_ |
@@ -149,4 +149,5 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY AttrHolder { | |||
AnyMap extAttrs_; | |||
}; | |||
} // namespace ge | |||
#endif // INC_GRAPH_DETAIL_ATTRIBUTES_HOLDER_H_ |
@@ -28,7 +28,6 @@ class GEContext { | |||
uint32_t DeviceId(); | |||
uint64_t TraceId(); | |||
void Init(); | |||
void SetSessionId(uint64_t session_id); | |||
void SetCtxDeviceId(uint32_t device_id); | |||
private: | |||
@@ -25,7 +25,6 @@ | |||
#include "graph/buffer.h" | |||
#include "graph/ge_error_codes.h" | |||
#include "graph/types.h" | |||
namespace ge { | |||
class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeShape { | |||
public: | |||
@@ -109,11 +108,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDesc : public AttrH | |||
DataType GetDataType() const; | |||
void SetDataType(DataType dt); | |||
DataType GetOriginDataType() const; | |||
void SetOriginDataType(DataType originDataType); | |||
std::vector<uint32_t> GetRefPortIndex() const; | |||
void SetRefPortByIndex(const std::vector<uint32_t> &index); | |||
DataType GetOriginDataType() const; | |||
GeTensorDesc Clone() const; | |||
GeTensorDesc &operator=(const GeTensorDesc &desc); | |||
@@ -190,4 +186,5 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensor { | |||
GeTensorDesc &DescReference() const; | |||
}; | |||
} // namespace ge | |||
#endif // INC_GRAPH_GE_TENSOR_H_ |
@@ -49,4 +49,5 @@ class ModelSerialize { | |||
friend class GraphDebugImp; | |||
}; | |||
} // namespace ge | |||
#endif // INC_GRAPH_MODEL_SERIALIZE_H_ |
@@ -105,8 +105,6 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder { | |||
GeTensorDescPtr MutableInputDesc(uint32_t index) const; | |||
GeTensorDescPtr MutableInputDesc(const string &name) const; | |||
Vistor<GeTensorDesc> GetAllInputsDesc() const; | |||
Vistor<GeTensorDescPtr> GetAllInputsDescPtr() const; | |||
@@ -129,8 +127,6 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder { | |||
GeTensorDescPtr MutableOutputDesc(uint32_t index) const; | |||
GeTensorDescPtr MutableOutputDesc(const string &name) const; | |||
uint32_t GetAllOutputsDescSize() const; | |||
Vistor<GeTensorDesc> GetAllOutputsDesc() const; | |||
@@ -60,7 +60,6 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||
"common/formats/formats.cc" | |||
"common/formats/utils/formats_trans_utils.cc" | |||
"common/fp16_t.cc" | |||
"common/ge/op_tiling_manager.cc" | |||
"common/ge/plugin_manager.cc" | |||
"common/helper/model_cache_helper.cc" | |||
"common/profiling/profiling_manager.cc" | |||
@@ -95,6 +94,7 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | |||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
"graph/load/new_model_manager/task_info/task_info.cc" | |||
"graph/load/output/output.cc" | |||
"graph/manager/*.cc" | |||
"graph/manager/model_manager/event_manager.cc" | |||
"graph/manager/util/debug.cc" | |||
@@ -159,11 +159,8 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||
"hybrid/node_executor/aicpu/aicpu_ext_info.cc" | |||
"hybrid/node_executor/aicpu/aicpu_node_executor.cc" | |||
"hybrid/node_executor/compiledsubgraph/known_node_executor.cc" | |||
"hybrid/node_executor/controlop/control_op_executor.cc" | |||
"hybrid/node_executor/hccl/hccl_node_executor.cc" | |||
"hybrid/node_executor/hostcpu/ge_local_node_executor.cc" | |||
"hybrid/node_executor/node_executor.cc" | |||
"hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | |||
"hybrid/node_executor/task_context.cc" | |||
"init/gelib.cc" | |||
"model/ge_model.cc" | |||
@@ -207,7 +204,6 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||
"common/formats/formats.cc" | |||
"common/formats/utils/formats_trans_utils.cc" | |||
"common/fp16_t.cc" | |||
"common/ge/op_tiling_manager.cc" | |||
"common/ge/plugin_manager.cc" | |||
"common/helper/model_cache_helper.cc" | |||
"common/profiling/profiling_manager.cc" | |||
@@ -240,6 +236,7 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | |||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
"graph/load/new_model_manager/task_info/task_info.cc" | |||
"graph/load/output/output.cc" | |||
"graph/manager/*.cc" | |||
"graph/manager/model_manager/event_manager.cc" | |||
"graph/manager/util/debug.cc" | |||
@@ -28,7 +28,6 @@ | |||
#include "graph/opsproto_manager.h" | |||
#include "graph/utils/type_utils.h" | |||
#include "graph/manager/util/rt_context_util.h" | |||
#include "graph/common/ge_call_wrapper.h" | |||
#include "register/op_registry.h" | |||
#include "common/ge/tbe_plugin_manager.h" | |||
@@ -42,8 +41,8 @@ namespace { | |||
const int32_t kMaxStrLen = 128; | |||
} | |||
static bool g_ge_initialized = false; | |||
static std::mutex g_ge_release_mutex; // GEFinalize and ~Session use | |||
static bool kGeInitialized = false; | |||
static std::mutex kGeReleaseMutex; // GEFinalize and ~Session use | |||
namespace ge { | |||
void GetOpsProtoPath(std::string &opsproto_path) { | |||
@@ -62,6 +61,31 @@ void GetOpsProtoPath(std::string &opsproto_path) { | |||
opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); | |||
} | |||
Status CheckDumpAndReuseMemory(const std::map<string, string> &options) { | |||
const int kDecimal = 10; | |||
auto dump_op_env = std::getenv("DUMP_OP"); | |||
int dump_op_flag = (dump_op_env != nullptr) ? std::strtol(dump_op_env, nullptr, kDecimal) : 0; | |||
auto disableReuseMemoryIter = options.find("ge.exec.disableReuseMemory"); | |||
if (disableReuseMemoryIter != options.end()) { | |||
if (disableReuseMemoryIter->second == "0") { | |||
GELOGD("ge.exec.disableReuseMemory=0, reuse memory is open"); | |||
if (dump_op_flag) { | |||
GELOGW("Will dump incorrect op data with GE Option ge.exec.disableReuseMemory=0"); | |||
} | |||
} else if (disableReuseMemoryIter->second == "1") { | |||
GELOGD("ge.exec.disableReuseMemory=1, reuse memory is close"); | |||
} else { | |||
GELOGE(PARAM_INVALID, "CheckDumpAndReuseMemory ge.exec.disableReuseMemory is valid"); | |||
return FAILED; | |||
} | |||
} else { | |||
if (dump_op_flag) { | |||
GELOGW("Will dump incorrect op data with default reuse memory"); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status CheckOptionsValid(const std::map<string, string> &options) { | |||
// check job_id is valid | |||
auto job_id_iter = options.find(OPTION_EXEC_JOB_ID); | |||
@@ -72,6 +96,11 @@ Status CheckOptionsValid(const std::map<string, string> &options) { | |||
} | |||
} | |||
// Check ge.exec.disableReuseMemory and env DUMP_OP | |||
if (CheckDumpAndReuseMemory(options) != SUCCESS) { | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -79,7 +108,7 @@ Status CheckOptionsValid(const std::map<string, string> &options) { | |||
Status GEInitialize(const std::map<string, string> &options) { | |||
GELOGT(TRACE_INIT, "GEInitialize start"); | |||
// 0.check init status | |||
if (g_ge_initialized) { | |||
if (kGeInitialized) { | |||
GELOGW("GEInitialize is called more than once"); | |||
return SUCCESS; | |||
} | |||
@@ -118,9 +147,9 @@ Status GEInitialize(const std::map<string, string> &options) { | |||
} | |||
// 7.check return status, return | |||
if (!g_ge_initialized) { | |||
if (!kGeInitialized) { | |||
// Initialize success, first time calling initialize | |||
g_ge_initialized = true; | |||
kGeInitialized = true; | |||
} | |||
GELOGT(TRACE_STOP, "GEInitialize finished"); | |||
@@ -131,12 +160,12 @@ Status GEInitialize(const std::map<string, string> &options) { | |||
Status GEFinalize() { | |||
GELOGT(TRACE_INIT, "GEFinalize start"); | |||
// check init status | |||
if (!g_ge_initialized) { | |||
if (!kGeInitialized) { | |||
GELOGW("GEFinalize is called before GEInitialize"); | |||
return SUCCESS; | |||
} | |||
std::lock_guard<std::mutex> lock(g_ge_release_mutex); | |||
std::lock_guard<std::mutex> lock(kGeReleaseMutex); | |||
// call Finalize | |||
Status ret = SUCCESS; | |||
Status middle_ret; | |||
@@ -158,10 +187,10 @@ Status GEFinalize() { | |||
ret = middle_ret; | |||
} | |||
if (g_ge_initialized && ret == SUCCESS) { | |||
if (kGeInitialized && ret == SUCCESS) { | |||
// Unified destruct rt_context | |||
RtContextUtil::GetInstance().DestroyAllRtContexts(); | |||
g_ge_initialized = false; | |||
RtContextUtil::GetInstance().DestroyrtContexts(); | |||
kGeInitialized = false; | |||
} | |||
GELOGT(TRACE_STOP, "GEFinalize finished"); | |||
@@ -173,7 +202,7 @@ Session::Session(const std::map<string, string> &options) { | |||
GELOGT(TRACE_INIT, "Session Constructor start"); | |||
// check init status | |||
sessionId_ = 0; | |||
if (!g_ge_initialized) { | |||
if (!kGeInitialized) { | |||
GELOGE(GE_CLI_GE_NOT_INITIALIZED); | |||
return; | |||
} | |||
@@ -203,13 +232,13 @@ Session::Session(const std::map<string, string> &options) { | |||
Session::~Session() { | |||
GELOGT(TRACE_INIT, "Session Destructor start"); | |||
// 0.check init status | |||
if (!g_ge_initialized) { | |||
if (!kGeInitialized) { | |||
GELOGW("GE is not yet initialized or is finalized."); | |||
return; | |||
} | |||
Status ret = FAILED; | |||
std::lock_guard<std::mutex> lock(g_ge_release_mutex); | |||
std::lock_guard<std::mutex> lock(kGeReleaseMutex); | |||
try { | |||
uint64_t session_id = sessionId_; | |||
// call DestroySession | |||
@@ -24,7 +24,6 @@ | |||
#include "common/debug/log.h" | |||
#include "common/ge/ge_util.h" | |||
#include "common/util/error_manager/error_manager.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "graph/ge_context.h" | |||
#include "init/gelib.h" | |||
@@ -162,10 +161,6 @@ bool DNNEngineManager::IsEngineRegistered(const std::string &name) { | |||
return false; | |||
} | |||
void DNNEngineManager::InitPerformanceStaistic() { checksupport_cost_.clear(); } | |||
const map<string, uint64_t> &DNNEngineManager::GetCheckSupportCost() const { return checksupport_cost_; } | |||
std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { | |||
GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(GE_CLI_GE_NOT_INITIALIZED, "DNNEngineManager: op_desc is nullptr"); | |||
return ""); | |||
@@ -199,20 +194,15 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { | |||
if (kernel_info_store != kernel_map.end()) { | |||
std::string unsupported_reason; | |||
// It will be replaced by engine' checksupport | |||
uint64_t start_time = GetCurrentTimestap(); | |||
if (kernel_info_store->second->CheckSupported(op_desc, unsupported_reason)) { | |||
checksupport_cost_[kernel_name] += GetCurrentTimestap() - start_time; | |||
op_desc->SetOpEngineName(it.engine); | |||
op_desc->SetOpKernelLibName(kernel_name); | |||
GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s into op_desc %s", kernel_name.c_str(), | |||
it.engine.c_str(), op_desc->GetName().c_str()); | |||
return it.engine; | |||
} else { | |||
checksupport_cost_[kernel_name] += GetCurrentTimestap() - start_time; | |||
bool is_custom_op = false; | |||
if ((ge::AttrUtils::GetBool(op_desc, kCustomOpFlag, is_custom_op)) && is_custom_op) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E13001", {"kernelname", "optype", "opname"}, | |||
{kernel_name, op_desc->GetType(), op_desc->GetName()}); | |||
GELOGE(FAILED, | |||
"The custom operator registered by the user does not support the logic function delivered by this " | |||
"network. Check support failed, kernel_name is %s, op type is %s, op name is %s", | |||
@@ -231,13 +221,9 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { | |||
} | |||
} | |||
for (const auto &it : unsupported_reasons) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E13002", {"optype", "opskernel", "reason"}, | |||
{op_desc->GetType(), it.first, it.second}); | |||
GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "GetDNNEngineName:Op type %s of ops kernel %s is unsupported, reason:%s", | |||
op_desc->GetType().c_str(), it.first.c_str(), it.second.c_str()); | |||
} | |||
ErrorManager::GetInstance().ATCReportErrMessage("E13003", {"opname", "optype"}, | |||
{op_desc->GetName(), op_desc->GetType()}); | |||
GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "Can't find any supported ops kernel and engine of %s, type is %s", | |||
op_desc->GetName().c_str(), op_desc->GetType().c_str()); | |||
return ""; | |||
@@ -398,13 +384,7 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h | |||
return FAILED; | |||
} | |||
try { | |||
ifs >> *json_file; | |||
} catch (const json::exception &e) { | |||
GELOGE(FAILED, "Read json file failed"); | |||
ifs.close(); | |||
return FAILED; | |||
} | |||
ifs >> *json_file; | |||
ifs.close(); | |||
GELOGI("Read json file success"); | |||
return SUCCESS; | |||
@@ -63,8 +63,6 @@ class DNNEngineManager { | |||
// If can't find appropriate engine name, return "", report error | |||
string GetDNNEngineName(const OpDescPtr &op_desc); | |||
const map<string, SchedulerConf> &GetSchedulers() const; | |||
const map<string, uint64_t> &GetCheckSupportCost() const; | |||
void InitPerformanceStaistic(); | |||
private: | |||
DNNEngineManager(); | |||
@@ -80,7 +78,6 @@ class DNNEngineManager { | |||
std::map<std::string, DNNEnginePtr> engines_map_; | |||
std::map<std::string, ge::DNNEngineAttribute> engines_attrs_map_; | |||
std::map<string, SchedulerConf> schedulers_; | |||
std::map<string, uint64_t> checksupport_cost_; | |||
bool init_flag_; | |||
}; | |||
} // namespace ge | |||
@@ -26,7 +26,6 @@ file(GLOB PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||
file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||
"ge_executor.cc" | |||
"../common/ge/op_tiling_manager.cc" | |||
"../common/ge/plugin_manager.cc" | |||
"../common/profiling/profiling_manager.cc" | |||
"../graph/execute/graph_execute.cc" | |||
@@ -60,6 +59,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||
"../graph/load/new_model_manager/task_info/task_info.cc" | |||
"../graph/load/new_model_manager/tbe_handle_store.cc" | |||
"../graph/load/new_model_manager/zero_copy_task.cc" | |||
"../graph/load/output/output.cc" | |||
"../graph/manager/graph_caching_allocator.cc" | |||
"../graph/manager/graph_manager_utils.cc" | |||
"../graph/manager/graph_mem_allocator.cc" | |||
@@ -854,4 +854,5 @@ Status GeExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, | |||
GELOGI("GetAllAippInputOutputDims succ."); | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -4,7 +4,6 @@ local_ge_executor_src_files := \ | |||
ge_executor.cc \ | |||
../common/profiling/profiling_manager.cc \ | |||
../common/ge/plugin_manager.cc \ | |||
../common/ge/op_tiling_manager.cc \ | |||
../graph/load/graph_loader.cc \ | |||
../graph/execute/graph_execute.cc \ | |||
../omm/csa_interact.cc \ | |||
@@ -45,6 +44,7 @@ local_ge_executor_src_files := \ | |||
../graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||
../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||
../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||
../graph/load/output/output.cc \ | |||
../single_op/single_op_manager.cc \ | |||
../single_op/single_op_model.cc \ | |||
../single_op/single_op.cc \ | |||
@@ -53,7 +53,6 @@ local_ge_executor_src_files := \ | |||
../single_op/task/build_task_utils.cc \ | |||
../single_op/task/tbe_task_builder.cc \ | |||
../single_op/task/aicpu_task_builder.cc \ | |||
../single_op/task/aicpu_kernel_task_builder.cc \ | |||
../hybrid/hybrid_davinci_model_stub.cc\ | |||
local_ge_executor_c_include := \ | |||
@@ -32,7 +32,6 @@ COMMON_LOCAL_SRC_FILES := \ | |||
GRAPH_MANAGER_LOCAL_SRC_FILES := \ | |||
common/ge/plugin_manager.cc\ | |||
common/ge/op_tiling_manager.cc\ | |||
init/gelib.cc \ | |||
session/inner_session.cc \ | |||
session/session_manager.cc \ | |||
@@ -92,7 +91,6 @@ OMG_HOST_SRC_FILES := \ | |||
graph/passes/no_use_reshape_remove_pass.cc \ | |||
graph/passes/iterator_op_pass.cc \ | |||
graph/passes/atomic_addr_clean_pass.cc \ | |||
graph/passes/mark_same_addr_pass.cc \ | |||
graph/common/omg_util.cc \ | |||
graph/common/bcast.cc \ | |||
graph/passes/dimension_compute_pass.cc \ | |||
@@ -147,7 +145,6 @@ OMG_HOST_SRC_FILES := \ | |||
graph/passes/stop_gradient_pass.cc \ | |||
graph/passes/prevent_gradient_pass.cc \ | |||
graph/passes/identity_pass.cc \ | |||
graph/passes/ref_identity_delete_op_pass.cc \ | |||
graph/passes/placeholder_with_default_pass.cc \ | |||
graph/passes/snapshot_pass.cc \ | |||
graph/passes/guarantee_const_pass.cc \ | |||
@@ -156,9 +153,7 @@ OMG_HOST_SRC_FILES := \ | |||
graph/passes/folding_pass.cc \ | |||
graph/passes/cast_translate_pass.cc \ | |||
graph/passes/prune_pass.cc \ | |||
graph/passes/merge_to_stream_merge_pass.cc \ | |||
graph/passes/switch_to_stream_switch_pass.cc \ | |||
graph/passes/attach_stream_label_pass.cc \ | |||
graph/passes/switch_op_pass.cc \ | |||
graph/passes/multi_batch_pass.cc \ | |||
graph/passes/next_iteration_pass.cc \ | |||
graph/passes/control_trigger_pass.cc \ | |||
@@ -178,6 +173,7 @@ OMG_HOST_SRC_FILES := \ | |||
graph/passes/variable_op_pass.cc \ | |||
graph/passes/cast_remove_pass.cc \ | |||
graph/passes/transpose_transdata_pass.cc \ | |||
graph/passes/identify_reference_pass.cc \ | |||
graph/passes/hccl_memcpy_pass.cc \ | |||
graph/passes/flow_ctrl_pass.cc \ | |||
graph/passes/link_gen_mask_nodes_pass.cc \ | |||
@@ -203,6 +199,7 @@ OME_HOST_SRC_FILES := \ | |||
graph/load/new_model_manager/tbe_handle_store.cc \ | |||
graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||
graph/load/new_model_manager/zero_copy_task.cc \ | |||
graph/load/output/output.cc \ | |||
graph/load/new_model_manager/data_dumper.cc \ | |||
graph/load/new_model_manager/task_info/task_info.cc \ | |||
graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||
@@ -227,7 +224,6 @@ OME_HOST_SRC_FILES := \ | |||
single_op/task/build_task_utils.cc \ | |||
single_op/task/tbe_task_builder.cc \ | |||
single_op/task/aicpu_task_builder.cc \ | |||
single_op/task/aicpu_kernel_task_builder.cc \ | |||
single_op/single_op.cc \ | |||
single_op/single_op_model.cc \ | |||
single_op/stream_resource.cc \ | |||
@@ -372,7 +368,7 @@ endif | |||
LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||
LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_ir_build.cc | |||
LOCAL_SRC_FILES := ../../out/atc/lib64/stub/ge_ir_build.cc | |||
LOCAL_SHARED_LIBRARIES := | |||
@@ -23,7 +23,6 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
common/formats/utils/formats_trans_utils.cc \ | |||
common/fp16_t.cc \ | |||
common/ge/plugin_manager.cc\ | |||
common/ge/op_tiling_manager.cc\ | |||
common/helper/model_cache_helper.cc \ | |||
common/profiling/profiling_manager.cc \ | |||
engine_manager/dnnengine_manager.cc \ | |||
@@ -78,6 +77,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
graph/load/new_model_manager/task_info/task_info.cc \ | |||
graph/load/new_model_manager/tbe_handle_store.cc \ | |||
graph/load/new_model_manager/zero_copy_task.cc \ | |||
graph/load/output/output.cc \ | |||
graph/manager/graph_context.cc \ | |||
graph/manager/graph_manager.cc \ | |||
graph/manager/graph_manager_utils.cc \ | |||
@@ -99,7 +99,6 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
graph/passes/aicpu_constant_folding_pass.cc \ | |||
graph/passes/assert_pass.cc \ | |||
graph/passes/atomic_addr_clean_pass.cc \ | |||
graph/passes/mark_same_addr_pass.cc \ | |||
graph/partition/dynamic_shape_partition.cc \ | |||
graph/passes/base_pass.cc \ | |||
graph/passes/cast_remove_pass.cc \ | |||
@@ -159,8 +158,8 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
graph/passes/get_original_format_pass.cc \ | |||
graph/passes/guarantee_const_pass.cc \ | |||
graph/passes/hccl_memcpy_pass.cc \ | |||
graph/passes/identify_reference_pass.cc \ | |||
graph/passes/identity_pass.cc \ | |||
graph/passes/ref_identity_delete_op_pass.cc \ | |||
graph/passes/infershape_pass.cc \ | |||
graph/passes/isolated_op_remove_pass.cc \ | |||
graph/passes/iterator_op_pass.cc \ | |||
@@ -192,9 +191,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
graph/passes/data_pass.cc \ | |||
graph/passes/switch_data_edges_bypass.cc \ | |||
graph/passes/switch_logic_remove_pass.cc \ | |||
graph/passes/merge_to_stream_merge_pass.cc \ | |||
graph/passes/switch_to_stream_switch_pass.cc \ | |||
graph/passes/attach_stream_label_pass.cc \ | |||
graph/passes/switch_op_pass.cc \ | |||
graph/passes/switch_dead_branch_elimination.cc \ | |||
graph/passes/replace_transshape_pass.cc \ | |||
graph/passes/transop_breadth_fusion_pass.cc \ | |||
@@ -233,7 +230,6 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
single_op/task/op_task.cc \ | |||
single_op/task/tbe_task_builder.cc \ | |||
single_op/task/aicpu_task_builder.cc \ | |||
single_op/task/aicpu_kernel_task_builder.cc \ | |||
hybrid/common/tensor_value.cc \ | |||
hybrid/common/npu_memory_allocator.cc \ | |||
hybrid/executor/rt_callback_manager.cc \ | |||
@@ -243,15 +239,12 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
hybrid/executor/hybrid_model_executor.cc \ | |||
hybrid/executor/hybrid_model_async_executor.cc \ | |||
hybrid/executor/hybrid_execution_context.cc \ | |||
hybrid/executor/subgraph_context.cc \ | |||
hybrid/executor/subgraph_executor.cc \ | |||
hybrid/executor/worker/task_compile_engine.cc \ | |||
hybrid/executor/worker/shape_inference_engine.cc \ | |||
hybrid/executor/worker/execution_engine.cc \ | |||
hybrid/model/hybrid_model.cc \ | |||
hybrid/model/hybrid_model_builder.cc \ | |||
hybrid/model/node_item.cc \ | |||
hybrid/model/graph_item.cc \ | |||
hybrid/node_executor/aicore/aicore_node_executor.cc \ | |||
hybrid/node_executor/aicore/aicore_op_task.cc \ | |||
hybrid/node_executor/aicore/aicore_task_builder.cc \ | |||
@@ -260,9 +253,6 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
hybrid/node_executor/aicpu/aicpu_node_executor.cc \ | |||
hybrid/node_executor/compiledsubgraph/known_node_executor.cc \ | |||
hybrid/node_executor/hostcpu/ge_local_node_executor.cc \ | |||
hybrid/node_executor/controlop/control_op_executor.cc \ | |||
hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ | |||
hybrid/node_executor/hccl/hccl_node_executor.cc \ | |||
hybrid/node_executor/node_executor.cc \ | |||
hybrid/node_executor/task_context.cc \ | |||
hybrid/hybrid_davinci_model.cc \ | |||
@@ -348,28 +338,6 @@ LOCAL_SHARED_LIBRARIES += \ | |||
include $(BUILD_HOST_SHARED_LIBRARY) | |||
#compiler for GeRunner | |||
include $(CLEAR_VARS) | |||
LOCAL_MODULE := stub/libge_runner | |||
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 | |||
LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD | |||
ifeq ($(DEBUG), 1) | |||
LOCAL_CFLAGS += -g -O0 | |||
endif | |||
LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) | |||
LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc | |||
LOCAL_SHARED_LIBRARIES := | |||
LOCAL_LDFLAGS := -lrt -ldl | |||
include $(BUILD_HOST_SHARED_LIBRARY) | |||
# add engine_conf.json to host | |||
include $(CLEAR_VARS) | |||
@@ -439,7 +407,6 @@ LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD | |||
LOCAL_CFLAGS += -g -O0 | |||
LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) | |||
LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) | |||
LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) | |||
@@ -0,0 +1,333 @@ | |||
LOCAL_PATH := $(call my-dir) | |||
COMMON_LOCAL_SRC_FILES := \ | |||
proto/fusion_model.proto \ | |||
proto/optimizer_priority.proto \ | |||
session/inner_session.cc \ | |||
session/session_manager.cc \ | |||
common/ge/plugin_manager.cc\ | |||
common/fp16_t.cc \ | |||
common/formats/utils/formats_trans_utils.cc \ | |||
common/formats/format_transfers/datatype_transfer.cc \ | |||
common/formats/format_transfers/format_transfer_transpose.cc \ | |||
common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \ | |||
common/formats/format_transfers/format_transfer_fractal_z.cc \ | |||
common/formats/format_transfers/format_transfer_fractal_nz.cc \ | |||
common/formats/format_transfers/format_transfer_fractal_zz.cc \ | |||
common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \ | |||
common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \ | |||
common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \ | |||
common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \ | |||
common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \ | |||
common/formats/format_transfers/format_transfer_fracz_nchw.cc \ | |||
common/formats/format_transfers/format_transfer_fracz_nhwc.cc \ | |||
common/formats/format_transfers/format_transfer_fracz_hwcn.cc \ | |||
common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \ | |||
common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \ | |||
common/formats/formats.cc \ | |||
init/gelib.cc \ | |||
engine_manager/dnnengine_manager.cc \ | |||
opskernel_manager/ops_kernel_manager.cc \ | |||
graph/manager/graph_manager.cc \ | |||
graph/manager/graph_manager_utils.cc \ | |||
graph/manager/graph_context.cc \ | |||
graph/preprocess/graph_preprocess.cc \ | |||
graph/preprocess/multi_batch_copy_graph.cc \ | |||
graph/execute/graph_execute.cc \ | |||
graph/load/graph_loader.cc \ | |||
graph/optimize/graph_optimize.cc \ | |||
graph/passes/folding_pass.cc \ | |||
graph/optimize/summary_optimize.cc \ | |||
graph/build/graph_builder.cc \ | |||
graph/partition/engine_place.cc \ | |||
graph/partition/graph_partition.cc \ | |||
graph/partition/dynamic_shape_partition.cc \ | |||
generator/ge_generator.cc \ | |||
generator/generator_api.cc \ | |||
common/profiling/profiling_manager.cc \ | |||
ge_local_engine/engine/host_cpu_engine.cc \ | |||
common/helper/model_cache_helper.cc \ | |||
OMG_HOST_SRC_FILES := \ | |||
model/ge_model.cc \ | |||
model/ge_root_model.cc \ | |||
graph/common/transop_util.cc \ | |||
graph/manager/graph_var_manager.cc \ | |||
graph/manager/trans_var_data_utils.cc \ | |||
omm/csa_interact.cc \ | |||
graph/passes/pass_manager.cc \ | |||
graph/passes/pass_utils.cc \ | |||
graph/passes/base_pass.cc \ | |||
graph/passes/resource_pair_add_control_pass.cc \ | |||
graph/passes/resource_pair_remove_control_pass.cc \ | |||
graph/passes/constant_folding_pass.cc \ | |||
graph/passes/aicpu_constant_folding_pass.cc \ | |||
graph/passes/reshape_remove_pass.cc \ | |||
graph/passes/reshape_recovery_pass.cc \ | |||
graph/passes/transop_breadth_fusion_pass.cc \ | |||
graph/passes/transop_depth_fusion_pass.cc \ | |||
graph/passes/same_transdata_breadth_fusion_pass.cc \ | |||
graph/passes/transop_without_reshape_fusion_pass.cc \ | |||
graph/passes/compile_nodes_pass.cc \ | |||
graph/passes/transop_nearby_allreduce_fusion_pass.cc \ | |||
graph/passes/variable_prepare_op_pass.cc \ | |||
graph/passes/variable_ref_delete_op_pass.cc \ | |||
graph/passes/variable_ref_useless_control_out_delete_pass.cc \ | |||
graph/passes/variable_op_pass.cc \ | |||
graph/passes/cast_remove_pass.cc \ | |||
graph/passes/replace_transshape_pass.cc \ | |||
graph/passes/transpose_transdata_pass.cc \ | |||
graph/passes/identify_reference_pass.cc \ | |||
graph/passes/variable_format_pass.cc \ | |||
graph/passes/subgraph_pass.cc \ | |||
graph/passes/data_pass.cc \ | |||
graph/passes/net_output_pass.cc \ | |||
graph/passes/constant_fuse_same_pass.cc \ | |||
graph/passes/print_op_pass.cc \ | |||
graph/passes/no_use_reshape_remove_pass.cc \ | |||
graph/passes/iterator_op_pass.cc \ | |||
graph/passes/atomic_addr_clean_pass.cc \ | |||
graph/optimize/optimizer/allreduce_fusion_pass.cc \ | |||
graph/common/omg_util.cc \ | |||
graph/common/bcast.cc \ | |||
graph/passes/dimension_compute_pass.cc \ | |||
graph/passes/dimension_adjust_pass.cc \ | |||
graph/passes/get_original_format_pass.cc \ | |||
graph/passes/shape_operate_op_remove_pass.cc \ | |||
graph/passes/unused_op_remove_pass.cc \ | |||
graph/passes/assert_pass.cc \ | |||
graph/passes/dropout_pass.cc \ | |||
graph/passes/infershape_pass.cc \ | |||
graph/passes/unused_const_pass.cc \ | |||
graph/passes/isolated_op_remove_pass.cc \ | |||
graph/passes/permute_pass.cc \ | |||
graph/passes/ctrl_edge_transfer_pass.cc \ | |||
host_kernels/broadcast_gradient_args_kernel.cc \ | |||
host_kernels/greater_kernel.cc \ | |||
host_kernels/gather_v2_kernel.cc \ | |||
host_kernels/maximum_kernel.cc \ | |||
host_kernels/floormod_kernel.cc \ | |||
host_kernels/floordiv_kernel.cc \ | |||
host_kernels/range_kernel.cc \ | |||
host_kernels/shape_kernel.cc \ | |||
host_kernels/size_kernel.cc \ | |||
host_kernels/shape_n_kernel.cc \ | |||
host_kernels/rank_kernel.cc \ | |||
host_kernels/broadcast_args_kernel.cc \ | |||
host_kernels/fill_kernel.cc \ | |||
host_kernels/empty_kernel.cc \ | |||
host_kernels/expanddims_kernel.cc \ | |||
host_kernels/reshape_kernel.cc \ | |||
host_kernels/squeeze_kernel.cc \ | |||
host_kernels/kernel_utils.cc \ | |||
host_kernels/cast_kernel.cc \ | |||
host_kernels/transdata_kernel.cc \ | |||
host_kernels/transpose_kernel.cc \ | |||
host_kernels/permute_kernel.cc \ | |||
host_kernels/pack_kernel.cc \ | |||
host_kernels/concat_v2_kernel.cc \ | |||
host_kernels/concat_offset_kernel.cc \ | |||
host_kernels/strided_slice_kernel.cc \ | |||
host_kernels/ssd_prior_box_kernel.cc \ | |||
host_kernels/add_kernel.cc \ | |||
host_kernels/unpack_kernel.cc \ | |||
host_kernels/sub_kernel.cc \ | |||
host_kernels/mul_kernel.cc \ | |||
host_kernels/reduce_prod_kernel.cc \ | |||
host_kernels/rsqrt_kernel.cc \ | |||
host_kernels/slice_kernel.cc \ | |||
host_kernels/slice_d_kernel.cc \ | |||
host_kernels/dynamic_stitch_kernel.cc \ | |||
graph/passes/stop_gradient_pass.cc \ | |||
graph/passes/prevent_gradient_pass.cc \ | |||
graph/passes/identity_pass.cc \ | |||
graph/passes/placeholder_with_default_pass.cc \ | |||
graph/passes/snapshot_pass.cc \ | |||
graph/passes/guarantee_const_pass.cc \ | |||
graph/passes/var_is_initialized_op_pass.cc \ | |||
graph/passes/parallel_concat_start_op_pass.cc \ | |||
graph/passes/cast_translate_pass.cc \ | |||
graph/passes/addn_pass.cc \ | |||
graph/passes/common_subexpression_elimination_pass.cc \ | |||
graph/passes/transop_symmetry_elimination_pass.cc \ | |||
graph/passes/save_pass.cc \ | |||
graph/passes/switch_dead_branch_elimination.cc \ | |||
graph/passes/merge_pass.cc \ | |||
graph/passes/prune_pass.cc \ | |||
graph/passes/flow_ctrl_pass.cc \ | |||
graph/passes/control_trigger_pass.cc \ | |||
graph/passes/switch_data_edges_bypass.cc \ | |||
graph/passes/switch_op_pass.cc \ | |||
graph/passes/multi_batch_pass.cc \ | |||
graph/passes/switch_logic_remove_pass.cc \ | |||
graph/passes/next_iteration_pass.cc \ | |||
graph/passes/cond_pass.cc \ | |||
graph/passes/cond_remove_pass.cc \ | |||
graph/passes/for_pass.cc \ | |||
graph/passes/enter_pass.cc \ | |||
graph/passes/hccl_memcpy_pass.cc \ | |||
graph/passes/link_gen_mask_nodes_pass.cc \ | |||
graph/passes/replace_with_empty_const_pass.cc \ | |||
graph/passes/hccl_group_pass.cc \ | |||
OME_SRC_FILES := \ | |||
graph/manager/graph_mem_allocator.cc \ | |||
graph/manager/graph_caching_allocator.cc \ | |||
graph/manager/model_manager/event_manager.cc \ | |||
graph/manager/util/debug.cc \ | |||
graph/manager/util/rt_context_util.cc \ | |||
graph/manager/util/variable_accelerate_ctrl.cc \ | |||
graph/manager/util/hcom_util.cc \ | |||
graph/load/new_model_manager/model_manager.cc \ | |||
graph/load/new_model_manager/data_inputer.cc \ | |||
graph/load/new_model_manager/davinci_model.cc \ | |||
graph/load/new_model_manager/davinci_model_parser.cc \ | |||
graph/load/new_model_manager/model_utils.cc \ | |||
graph/load/new_model_manager/tbe_handle_store.cc \ | |||
graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||
graph/load/new_model_manager/zero_copy_task.cc \ | |||
graph/load/output/output.cc \ | |||
graph/load/new_model_manager/data_dumper.cc \ | |||
graph/load/new_model_manager/task_info/task_info.cc \ | |||
graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||
graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||
graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ | |||
graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ | |||
graph/load/new_model_manager/task_info/hccl_task_info.cc \ | |||
graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ | |||
graph/load/new_model_manager/task_info/kernel_task_info.cc \ | |||
graph/load/new_model_manager/task_info/label_set_task_info.cc \ | |||
graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ | |||
graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ | |||
graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ | |||
graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||
graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ | |||
graph/load/new_model_manager/task_info/stream_active_task_info.cc \ | |||
graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ | |||
graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ | |||
graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||
graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||
graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||
single_op/task/op_task.cc \ | |||
single_op/task/build_task_utils.cc \ | |||
single_op/task/tbe_task_builder.cc \ | |||
single_op/task/aicpu_task_builder.cc \ | |||
single_op/single_op.cc \ | |||
single_op/single_op_model.cc \ | |||
single_op/stream_resource.cc \ | |||
single_op/single_op_manager.cc \ | |||
hybrid/hybrid_davinci_model_stub.cc \ | |||
COMMON_LOCAL_C_INCLUDES := \ | |||
proto/om.proto \ | |||
proto/task.proto \ | |||
proto/insert_op.proto \ | |||
proto/ge_ir.proto \ | |||
proto/fwk_adapter.proto \ | |||
proto/op_mapping_info.proto \ | |||
proto/tensorflow/attr_value.proto \ | |||
proto/tensorflow/function.proto \ | |||
proto/tensorflow/graph.proto \ | |||
proto/tensorflow/node_def.proto \ | |||
proto/tensorflow/op_def.proto \ | |||
proto/tensorflow/resource_handle.proto \ | |||
proto/tensorflow/tensor.proto \ | |||
proto/tensorflow/tensor_shape.proto \ | |||
proto/tensorflow/types.proto \ | |||
proto/tensorflow/versions.proto \ | |||
$(LOCAL_PATH) ./ \ | |||
$(TOPDIR)inc \ | |||
$(TOPDIR)inc/external \ | |||
$(TOPDIR)inc/external/graph \ | |||
$(TOPDIR)inc/framework \ | |||
$(TOPDIR)inc/framework/common \ | |||
$(TOPDIR)inc/runtime \ | |||
$(TOPDIR)libc_sec/include \ | |||
$(TOPDIR)ops/built-in/op_proto/inc \ | |||
third_party/json/include \ | |||
third_party/protobuf/include \ | |||
third_party/opencv/include \ | |||
NEW_OMG_HOST_SRC_FILES := \ | |||
graph/preprocess/insert_op/util_insert_aipp_op.cc \ | |||
graph/preprocess/insert_op/ge_aipp_op.cc \ | |||
graph/build/model_builder.cc \ | |||
graph/build/task_generator.cc \ | |||
graph/build/stream_allocator.cc \ | |||
graph/build/logical_stream_allocator.cc \ | |||
graph/build/stream_graph_optimizer.cc \ | |||
graph/build/run_context.cc \ | |||
graph/build/label_allocator.cc \ | |||
graph/label/label_maker.cc \ | |||
graph/label/if_label_maker.cc \ | |||
graph/label/case_label_maker.cc \ | |||
graph/label/while_label_maker.cc \ | |||
graph/label/partitioned_call_label_maker.cc \ | |||
#compiler for host train | |||
include $(CLEAR_VARS) | |||
LOCAL_MODULE := libge_train | |||
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 | |||
LOCAL_CFLAGS += -DDAVINCI_CLOUD -DDAVINCI_TRAIN -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING | |||
LOCAL_CFLAGS += -DFMK_SUPPORT_DEBUG | |||
ifeq ($(DEBUG), 1) | |||
LOCAL_CFLAGS += -g -O0 | |||
endif | |||
LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||
LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) | |||
LOCAL_SRC_FILES += $(OMG_HOST_SRC_FILES) | |||
LOCAL_SRC_FILES += $(OME_SRC_FILES) | |||
LOCAL_SRC_FILES += $(NEW_OMG_HOST_SRC_FILES) | |||
LOCAL_STATIC_LIBRARIES := libge_memory \ | |||
LOCAL_SHARED_LIBRARIES := \ | |||
libc_sec \ | |||
libprotobuf \ | |||
libslog \ | |||
libmmpa \ | |||
libgraph \ | |||
libregister \ | |||
libge_common \ | |||
libhccl \ | |||
libmsprof \ | |||
LOCAL_LDFLAGS := -lrt -ldl | |||
LOCAL_SHARED_LIBRARIES += \ | |||
libruntime \ | |||
libresource \ | |||
include $(BUILD_HOST_SHARED_LIBRARY) | |||
# add engine_conf.json to host | |||
include $(CLEAR_VARS) | |||
LOCAL_MODULE := engine_conf.json | |||
LOCAL_SRC_FILES := engine_manager/engine_conf.json | |||
LOCAL_MODULE_CLASS := ETC | |||
LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/engine_conf.json | |||
include $(BUILD_HOST_PREBUILT) | |||
# add optimizer_priority.pbtxt to host | |||
include $(CLEAR_VARS) | |||
LOCAL_MODULE := optimizer_priority.pbtxt | |||
LOCAL_SRC_FILES := opskernel_manager/optimizer_priority.pbtxt | |||
LOCAL_MODULE_CLASS := ETC | |||
LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/optimizer_priority.pbtxt | |||
include $(BUILD_HOST_PREBUILT) |
@@ -207,13 +207,6 @@ class GeGenerator::Impl { | |||
GraphManager graph_manager_; | |||
SaveParam save_param_; | |||
bool is_offline_ = true; | |||
private: | |||
static std::string Trim(const std::string &str); | |||
bool ParseVersion(const std::string &line, std::string &version); | |||
bool GetVersionFromPath(const std::string &file_path, std::string &version); | |||
bool SetAtcVersionInfo(AttrHolder &obj); | |||
bool SetOppVersionInfo(AttrHolder &obj); | |||
}; | |||
Status GeGenerator::Initialize(const map<string, string> &options) { | |||
@@ -295,124 +288,6 @@ Status GeGenerator::GenerateInfershapeGraph(const Graph &graph) { | |||
return SUCCESS; | |||
} | |||
// Remove the space and tab before and after the string | |||
std::string GeGenerator::Impl::Trim(const std::string &str) { | |||
if (str.empty()) { | |||
return str; | |||
} | |||
std::string::size_type start = str.find_first_not_of(" \t\r\n"); | |||
if (start == std::string::npos) { | |||
return str; | |||
} | |||
std::string::size_type end = str.find_last_not_of(" \t\r\n") + 1; | |||
return str.substr(start, end); | |||
} | |||
// Parsing the command line | |||
bool GeGenerator::Impl::ParseVersion(const std::string &line, std::string &version) { | |||
std::string flag = "Version="; | |||
std::string temp = Trim(line); | |||
if (temp.empty()) { | |||
GELOGW("line is empty."); | |||
return false; | |||
} | |||
std::string::size_type pos = temp.find(flag); | |||
if (pos == std::string::npos) { | |||
GELOGW("Incorrect line [%s], it must include [%s].", line.c_str(), flag.c_str()); | |||
return false; | |||
} | |||
if (temp.size() == flag.size()) { | |||
GELOGW("version information is empty. %s", line.c_str()); | |||
return false; | |||
} | |||
version = temp.substr(pos + flag.size()); | |||
GELOGI("Version=%s", version.c_str()); | |||
return true; | |||
} | |||
bool GeGenerator::Impl::GetVersionFromPath(const std::string &file_path, std::string &version) { | |||
// Normalize the path | |||
string resolved_file_path = RealPath(file_path.c_str()); | |||
if (resolved_file_path.empty()) { | |||
GELOGW("Invalid input file path [%s], make sure that the file path is correct.", file_path.c_str()); | |||
return false; | |||
} | |||
std::ifstream fs(resolved_file_path, std::ifstream::in); | |||
if (!fs.is_open()) { | |||
GELOGW("Open %s failed.", file_path.c_str()); | |||
return false; | |||
} | |||
std::string line; | |||
if (getline(fs, line)) { | |||
if (!ParseVersion(line, version)) { | |||
GELOGW("Parse version failed. content is [%s].", line.c_str()); | |||
fs.close(); | |||
return false; | |||
} | |||
} else { | |||
GELOGW("No version information found in the file path:%s", file_path.c_str()); | |||
fs.close(); | |||
return false; | |||
} | |||
fs.close(); // close the file | |||
return true; | |||
} | |||
// Set package version information in the model | |||
bool GeGenerator::Impl::SetAtcVersionInfo(AttrHolder &obj) { | |||
std::string path_base = ge::GELib::GetPath(); | |||
path_base = path_base.substr(0, path_base.rfind('/')); | |||
path_base = path_base.substr(0, path_base.rfind('/') + 1); | |||
std::string version_path = path_base + "version.info"; | |||
GELOGI("version_path is %s", version_path.c_str()); | |||
std::string version; | |||
if (!GetVersionFromPath(version_path, version)) { | |||
GELOGW("Get atc version information failed!"); | |||
return false; | |||
} | |||
// set version info | |||
if (!ge::AttrUtils::SetStr(obj, ATTR_MODEL_ATC_VERSION, version)) { | |||
GELOGW("Ge model set atc version failed!"); | |||
return false; | |||
} | |||
GELOGI("Ge model set atc version information success."); | |||
return true; | |||
} | |||
// Set package version information in the model | |||
bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) { | |||
const char *path_env = std::getenv("ASCEND_OPP_PATH"); | |||
if (path_env == nullptr) { | |||
GELOGW("Get environment variable ASCEND_OPP_PATH failed!"); | |||
return false; | |||
} | |||
std::string version_path = path_env; | |||
version_path += "/version.info"; | |||
GELOGI("version_path is %s", version_path.c_str()); | |||
std::string version; | |||
if (!GetVersionFromPath(version_path, version)) { | |||
GELOGW("Get opp version information failed!"); | |||
return false; | |||
} | |||
// set version info | |||
if (!ge::AttrUtils::SetStr(obj, ATTR_MODEL_OPP_VERSION, version)) { | |||
GELOGW("Ge model set opp version failed!"); | |||
return false; | |||
} | |||
GELOGI("Ge Model set opp version information success."); | |||
return true; | |||
} | |||
Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs, | |||
ModelBufferData &model, bool is_offline) { | |||
rtContext_t ctx = nullptr; | |||
@@ -440,7 +315,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||
string model_name = ""; | |||
Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), model_name); | |||
if (name_ret != SUCCESS) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); | |||
GELOGE(FAILED, "Get model_name failed. Param --output is invalid"); | |||
return PARAM_INVALID; | |||
} | |||
@@ -590,14 +464,6 @@ Status GeGenerator::Impl::SaveParams(GeModelPtr &ge_model, const string &type, c | |||
} | |||
Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr &model, ModelBufferData &model_buff) { | |||
// set atc version | |||
if (!SetAtcVersionInfo(*(model.get()))) { | |||
GELOGW("SetPackageVersionInfo of atc failed!"); | |||
} | |||
// set opp version | |||
if (!SetOppVersionInfo(*(model.get()))) { | |||
GELOGW("SetPackageVersionInfo of ops failed!"); | |||
} | |||
ModelHelper model_helper; | |||
model_helper.SetSaveMode(is_offline_); | |||
Status ret = model_helper.SaveToOmModel(model, save_param_, file_name_prefix, model_buff); | |||
@@ -660,4 +526,5 @@ Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph, GraphId &g | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -18,41 +18,6 @@ | |||
#define GE_GE_CALL_WRAPPER_H_ | |||
#include "framework/common/debug/ge_log.h" | |||
#define GE_TIMESTAMP_START(stage) uint64_t startUsec_##stage = ge::GetCurrentTimestap() | |||
#define GE_TIMESTAMP_END(stage, stage_name) \ | |||
do { \ | |||
uint64_t endUsec_##stage = ge::GetCurrentTimestap(); \ | |||
GELOGI("[GEPERFTRACE] The time cost of %s is [%lu] micro second.", (stage_name), \ | |||
(endUsec_##stage - startUsec_##stage)); \ | |||
} while (0); | |||
#define GE_TIMESTAMP_EVENT_END(stage, stage_name) \ | |||
do { \ | |||
uint64_t endUsec_##stage = ge::GetCurrentTimestap(); \ | |||
GEEVENT("[GEPERFTRACE] The time cost of %s is [%lu] micro second.", (stage_name), \ | |||
(endUsec_##stage - startUsec_##stage)); \ | |||
} while (0); | |||
#define GE_TIMESTAMP_CALLNUM_START(stage) \ | |||
uint64_t startUsec_##stage = ge::GetCurrentTimestap(); \ | |||
uint64_t call_num_of##stage = 0; \ | |||
uint64_t time_of##stage = 0 | |||
#define GE_TIMESTAMP_RESTART(stage) (startUsec_##stage = ge::GetCurrentTimestap()) | |||
#define GE_TIMESTAMP_ADD(stage) \ | |||
time_of##stage += ge::GetCurrentTimestap() - startUsec_##stage; \ | |||
call_num_of##stage++ | |||
#define GE_TIMESTAMP_CALLNUM_END(stage, stage_name) \ | |||
GELOGI("[GEPERFTRACE] The time cost of %s is [%lu] micro second, call num is %lu", (stage_name), time_of##stage, \ | |||
call_num_of##stage) | |||
#define GE_TIMESTAMP_CALLNUM_EVENT_END(stage, stage_name) \ | |||
GEEVENT("[GEPERFTRACE] The time cost of %s is [%lu] micro second, call num is %lu", (stage_name), time_of##stage, \ | |||
call_num_of##stage) | |||
#define RUN_WITH_TIMESTAMP_NAME(var_name, prefix, func, ...) \ | |||
do { \ | |||
GE_TIMESTAMP_START(var_name); \ | |||
@@ -64,23 +29,10 @@ | |||
} \ | |||
} while (0) | |||
#define RUN_WITH_PERF_TIMESTAMP_NAME(var_name, prefix, func, ...) \ | |||
do { \ | |||
GE_TIMESTAMP_START(var_name); \ | |||
auto ret_inner_macro = func(__VA_ARGS__); \ | |||
GE_TIMESTAMP_EVENT_END(var_name, #prefix "::" #func) \ | |||
if (ret_inner_macro != ge::SUCCESS) { \ | |||
GELOGE(ret_inner_macro, "Failed to process " #prefix "_" #func); \ | |||
return ret_inner_macro; \ | |||
} \ | |||
} while (0) | |||
#define JOIN_NAME_INNER(a, b) a##b | |||
#define JOIN_NAME(a, b) JOIN_NAME_INNER(a, b) | |||
#define COUNTER_NAME(a) JOIN_NAME(a, __COUNTER__) | |||
#define GE_RUN(prefix, func, ...) \ | |||
RUN_WITH_TIMESTAMP_NAME(COUNTER_NAME(ge_timestamp_##prefix), prefix, func, __VA_ARGS__) | |||
#define GE_RUN_PERF(prefix, func, ...) \ | |||
RUN_WITH_PERF_TIMESTAMP_NAME(COUNTER_NAME(ge_timestamp_##prefix), prefix, func, __VA_ARGS__) | |||
#endif // GE_GE_CALL_WRAPPER_H_ |
@@ -120,7 +120,7 @@ Status GraphExecutor::FreeInOutBuffer() { | |||
} | |||
} | |||
Status GraphExecutor::MallocInOutBuffer(const std::vector<uint64_t> &buffer_size, std::vector<void *> &data_addr) { | |||
Status GraphExecutor::MallocInOutBuffer(const std::vector<uint32_t> &buffer_size, std::vector<void *> &data_addr) { | |||
if (malloc_flag_) { | |||
auto all_size_same = true; | |||
if (buffer_size.size() == buffer_size_.size()) { | |||
@@ -169,7 +169,7 @@ Status GraphExecutor::PrepareInputData(const std::vector<GeTensor> &input_tensor | |||
graph_input_data.timestamp = 0; | |||
std::size_t inputSize = input_tensor.size(); | |||
std::size_t output_size = output_desc.size(); | |||
std::vector<uint64_t> bufferSizeVec; | |||
std::vector<uint32_t> bufferSizeVec; | |||
std::vector<void *> addrVec; | |||
for (std::size_t i = 0; i < inputSize; ++i) { | |||
@@ -211,7 +211,7 @@ Status GraphExecutor::PrepareInputData(const std::vector<GeTensor> &input_tensor | |||
for (std::size_t j = 0; j < output_size; j++) { | |||
auto desc = output_desc[j]; | |||
uint64_t buffer_size = desc.size; | |||
uint32_t buffer_size = desc.size; | |||
DataBuffer out_data_buf; | |||
out_data_buf.data = reinterpret_cast<uint8_t *>(addrVec[inputSize + j]); | |||
@@ -225,13 +225,6 @@ Status GraphExecutor::PrepareInputData(const std::vector<GeTensor> &input_tensor | |||
Status GraphExecutor::SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &input_tensor, | |||
std::vector<GeTensor> &output_tensor) { | |||
auto model_manager = ge::ModelManager::GetInstance(); | |||
GE_CHECK_NOTNULL(model_manager); | |||
if (model_manager->IsDynamicShape(model_id)) { | |||
GELOGI("[ExecuteGraph] GetInputOutputDescInfo via dynamic shape model executor, modelId=%u", model_id); | |||
return model_manager->SyncExecuteModel(model_id, input_tensor, output_tensor); | |||
} | |||
// Prepare input and output | |||
std::vector<InputOutputDescInfo> inputs_desc; | |||
std::vector<InputOutputDescInfo> output_desc; | |||
@@ -582,4 +575,5 @@ Status GraphExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t inde | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -110,7 +110,7 @@ class GraphExecutor { | |||
Status FreeInOutBuffer(); | |||
Status MallocInOutBuffer(const std::vector<uint64_t> &buffer_size, std::vector<void *> &data_addr); | |||
Status MallocInOutBuffer(const std::vector<uint32_t> &buffer_size, std::vector<void *> &data_addr); | |||
bool init_flag_; | |||
@@ -129,7 +129,7 @@ class GraphExecutor { | |||
bool malloc_flag_; | |||
std::vector<void *> buffer_addr_; | |||
std::vector<uint64_t> buffer_size_; | |||
std::vector<uint32_t> buffer_size_; | |||
}; | |||
} // namespace ge | |||
@@ -350,8 +350,7 @@ Status GraphLoader::GetMemoryInfo(int64_t &free) { | |||
return RT_FAILED; | |||
} | |||
// Add small page memory size | |||
free = | |||
static_cast<int64_t>(free_mem + VarManager::Instance(GetContext().SessionId())->GetUseMaxMemorySize() - total_mem); | |||
free = static_cast<int64_t>(free_mem + VarManager::Instance(0)->GetUseMaxMemorySize() - total_mem); | |||
GELOGI("GetMemoryInfo free[%zu], total[%zu], return free[%ld]", free_mem, total_mem, free); | |||
return SUCCESS; | |||
} | |||
@@ -339,7 +339,7 @@ Status CpuTaskActiveEntry::Distribute() { | |||
return RT_FAILED; | |||
} | |||
GELOGI("Cpu kernel launch active entry task success."); | |||
GELOGI("Cpu kernel launch wait end task success."); | |||
return SUCCESS; | |||
} | |||
@@ -21,6 +21,7 @@ | |||
#include <utility> | |||
#include <vector> | |||
#include "common/debug/log.h" | |||
#include "common/properties_manager.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "framework/common/util.h" | |||
@@ -36,36 +37,9 @@ | |||
namespace { | |||
const uint32_t kAicpuLoadFlag = 1; | |||
const uint32_t kAicpuUnloadFlag = 0; | |||
const int64_t kOpDebugSize = 2048; | |||
const int64_t kOpDebugShape = 2048; | |||
const int8_t kDecimal = 10; | |||
const uint32_t kAddrLen = sizeof(void *); | |||
const char *const kDumpOutput = "output"; | |||
const char *const kDumpInput = "input"; | |||
const char *const kDumpAll = "all"; | |||
// parse for format like nodename:input:index | |||
static bool ParseNameIndex(const std::string &node_name_index, std::string &node_name, std::string &input_or_output, | |||
size_t &index) { | |||
auto sep = node_name_index.rfind(':'); | |||
if (sep == std::string::npos) { | |||
return false; | |||
} | |||
auto index_str = node_name_index.substr(sep + 1); | |||
index = static_cast<size_t>(std::strtol(index_str.c_str(), nullptr, kDecimal)); | |||
auto node_name_without_index = node_name_index.substr(0, sep); | |||
sep = node_name_without_index.rfind(':'); | |||
if (sep == std::string::npos) { | |||
return false; | |||
} | |||
node_name = node_name_without_index.substr(0, sep); | |||
input_or_output = node_name_without_index.substr(sep + 1); | |||
return !(input_or_output != kDumpInput && input_or_output != kDumpOutput); | |||
} | |||
static bool IsTensorDescWithSkipDumpAddrType(bool has_mem_type_attr, vector<int64_t> v_memory_type, size_t i) { | |||
return has_mem_type_attr && (v_memory_type[i] == RT_MEMORY_L1); | |||
} | |||
} // namespace | |||
static int32_t GetIrDataType(ge::DataType data_type) { | |||
@@ -164,13 +138,6 @@ void DataDumper::SaveEndGraphId(uint32_t task_id, uint32_t stream_id) { | |||
end_graph_stream_id_ = stream_id; | |||
} | |||
void DataDumper::SaveOpDebugId(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, bool is_op_debug) { | |||
op_debug_task_id_ = task_id; | |||
op_debug_stream_id_ = stream_id; | |||
op_debug_addr_ = op_debug_addr; | |||
is_op_debug_ = is_op_debug; | |||
} | |||
void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, | |||
uintptr_t args) { | |||
if (op_desc == nullptr) { | |||
@@ -235,121 +202,56 @@ static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uin | |||
} | |||
} | |||
Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
const uintptr_t &addr, size_t index) { | |||
output.set_data_type(static_cast<int32_t>(GetIrDataType(tensor_descs.at(index).GetDataType()))); | |||
output.set_format(static_cast<int32_t>(tensor_descs.at(index).GetFormat())); | |||
Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | |||
GELOGI("Start dump output"); | |||
if (inner_dump_info.is_task) { | |||
// tbe or aicpu op | |||
const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc(); | |||
const auto input_size = inner_dump_info.op->GetAllInputsDesc().size(); | |||
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op, false); | |||
if (output_descs.size() != output_addrs.size()) { | |||
GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(), | |||
inner_dump_info.op->GetName().c_str(), output_descs.size()); | |||
return PARAM_INVALID; | |||
} | |||
for (auto dim : tensor_descs.at(index).GetShape().GetDims()) { | |||
output.mutable_shape()->add_dim(dim); | |||
} | |||
int64_t output_size = 0; | |||
if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "Get output size filed"); | |||
return PARAM_INVALID; | |||
} | |||
GELOGD("Get output size in dump is %ld", output_size); | |||
std::string origin_name; | |||
int32_t origin_output_index = -1; | |||
(void)AttrUtils::GetStr(&tensor_descs.at(index), ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name); | |||
(void)AttrUtils::GetInt(&tensor_descs.at(index), ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index); | |||
output.set_size(output_size); | |||
output.set_original_name(origin_name); | |||
output.set_original_output_index(origin_output_index); | |||
output.set_original_output_format(static_cast<int32_t>(tensor_descs.at(index).GetOriginFormat())); | |||
output.set_original_output_data_type(static_cast<int32_t>(tensor_descs.at(index).GetOriginDataType())); | |||
output.set_address(static_cast<uint64_t>(addr)); | |||
return SUCCESS; | |||
} | |||
for (size_t i = 0; i < output_descs.size(); ++i) { | |||
aicpu::dump::Output output; | |||
output.set_data_type(static_cast<int32_t>(GetIrDataType(output_descs.at(i).GetDataType()))); | |||
output.set_format(static_cast<int32_t>(output_descs.at(i).GetFormat())); | |||
Status DataDumper::DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Output &output, | |||
size_t i, const std::string &node_name_index) { | |||
std::string dump_op_name; | |||
std::string input_or_output; | |||
size_t index; | |||
// parser and find which node's input or output tensor desc is chosen for dump info | |||
if (!ParseNameIndex(node_name_index, dump_op_name, input_or_output, index)) { | |||
GELOGE(PARAM_INVALID, "Op [%s] output desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s].", | |||
inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str()); | |||
return PARAM_INVALID; | |||
} | |||
GE_CHECK_NOTNULL(compute_graph_); | |||
auto replace_node = compute_graph_->FindNode(dump_op_name); | |||
GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(replace_node == nullptr, | |||
"Op [%s] output desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s]," | |||
" cannot find redirect node[%s].", | |||
inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str(), | |||
dump_op_name.c_str()); | |||
auto replace_opdesc = replace_node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(replace_opdesc); | |||
auto iter = ref_info_.find(replace_opdesc); | |||
GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(iter == ref_info_.end(), | |||
"Op [%s] output desc[%zu] cannot find any saved redirect node[%s]'s info.", | |||
inner_dump_info.op->GetName().c_str(), i, replace_opdesc->GetName().c_str()); | |||
GE_CHECK_NOTNULL(iter->second); | |||
auto addr = reinterpret_cast<uintptr_t>(iter->second); | |||
if (input_or_output == kDumpInput) { | |||
const auto &replace_input_descs = replace_opdesc->GetAllInputsDesc(); | |||
addr += kAddrLen * index; | |||
GE_CHK_STATUS_RET(GenerateOutput(output, replace_input_descs, addr, index), "Generate output failed"); | |||
} else if (input_or_output == kDumpOutput) { | |||
const auto &replace_output_descs = replace_opdesc->GetAllOutputsDesc(); | |||
const auto replace_input_size = replace_opdesc->GetAllInputsDesc().size(); | |||
addr += (index + replace_input_size) * kAddrLen; | |||
GE_CHK_STATUS_RET(GenerateOutput(output, replace_output_descs, addr, index), "Generate output failed"); | |||
} | |||
GELOGD("Op [%s] output desc[%zu] dump info is replaced by node[%s] [%s] tensor_desc [%zu]", | |||
inner_dump_info.op->GetName().c_str(), i, dump_op_name.c_str(), input_or_output.c_str(), index); | |||
return SUCCESS; | |||
} | |||
for (auto dim : output_descs.at(i).GetShape().GetDims()) { | |||
output.mutable_shape()->add_dim(dim); | |||
} | |||
Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | |||
const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc(); | |||
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op); | |||
if (output_descs.size() != output_addrs.size()) { | |||
GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(), | |||
inner_dump_info.op->GetName().c_str(), output_descs.size()); | |||
return PARAM_INVALID; | |||
} | |||
std::vector<int64_t> v_memory_type; | |||
bool has_mem_type_attr = ge::AttrUtils::GetListInt(inner_dump_info.op, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, v_memory_type); | |||
GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(has_mem_type_attr && (v_memory_type.size() != output_descs.size()), | |||
"DumpOutputWithTask[%s], output size[%zu], output memory type size[%zu]", | |||
inner_dump_info.op->GetName().c_str(), output_descs.size(), | |||
v_memory_type.size()); | |||
for (size_t i = 0; i < output_descs.size(); ++i) { | |||
aicpu::dump::Output output; | |||
std::string node_name_index; | |||
const auto &output_desc = output_descs.at(i); | |||
// check dump output tensor desc is redirected by attr ATTR_DATA_DUMP_REF | |||
if (AttrUtils::GetStr(&output_desc, ATTR_DATA_DUMP_REF, node_name_index)) { | |||
GE_CHK_STATUS_RET(DumpRefOutput(inner_dump_info, output, i, node_name_index), "DumpRefOutput failed"); | |||
} else { | |||
GE_IF_BOOL_EXEC( | |||
IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i), | |||
GELOGD("DumpOutputWithTask[%s] output[%zu] is l1 addr, skip it", inner_dump_info.op->GetName().c_str(), i); | |||
continue;); | |||
const auto input_size = inner_dump_info.op->GetInputsSize(); | |||
auto addr = inner_dump_info.args + (i + input_size) * kAddrLen; | |||
GE_CHK_STATUS_RET(GenerateOutput(output, output_descs, addr, i), "Generate output failed"); | |||
int64_t output_size = 0; | |||
if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "Get output size filed"); | |||
return PARAM_INVALID; | |||
} | |||
GELOGI("Get output size in dump is %ld", output_size); | |||
std::string origin_name; | |||
int32_t origin_output_index = -1; | |||
(void)AttrUtils::GetStr(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name); | |||
(void)AttrUtils::GetInt(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index); | |||
GE_IF_BOOL_EXEC(output_size <= 0, GELOGE(PARAM_INVALID, "Output size %ld is less than zero", output_size); | |||
return PARAM_INVALID) | |||
output.set_size(output_size); | |||
output.set_original_name(origin_name); | |||
output.set_original_output_index(origin_output_index); | |||
output.set_original_output_format(static_cast<int32_t>(output_descs.at(i).GetOriginFormat())); | |||
output.set_original_output_data_type(static_cast<int32_t>(output_descs.at(i).GetOriginDataType())); | |||
output.set_address(static_cast<uint64_t>(inner_dump_info.args + (i + input_size) * sizeof(void *))); | |||
task.mutable_output()->Add(std::move(output)); | |||
} | |||
task.mutable_output()->Add(std::move(output)); | |||
return SUCCESS; | |||
} | |||
return SUCCESS; | |||
} | |||
Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | |||
GELOGI("Start dump output"); | |||
if (inner_dump_info.is_task) { | |||
// tbe or aicpu op, these ops are with task | |||
return DumpOutputWithTask(inner_dump_info, task); | |||
} | |||
// else data, const or variable op | |||
aicpu::dump::Output output; | |||
auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index); | |||
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op); | |||
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op, false); | |||
if (output_tensor == nullptr) { | |||
GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index, | |||
inner_dump_info.op->GetOutputsSize()); | |||
@@ -367,6 +269,9 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump: | |||
int32_t origin_output_index = -1; | |||
(void)AttrUtils::GetStr(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name); | |||
(void)AttrUtils::GetInt(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index); | |||
GE_IF_BOOL_EXEC(inner_dump_info.data_size <= 0, | |||
GELOGE(PARAM_INVALID, "The size of data %ld is less than zero", inner_dump_info.data_size); | |||
return PARAM_INVALID) | |||
output.set_size(inner_dump_info.data_size); | |||
output.set_original_name(origin_name); | |||
output.set_original_output_index(origin_output_index); | |||
@@ -377,7 +282,7 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump: | |||
GELOGE(FAILED, "Index is out of range."); | |||
return FAILED; | |||
} | |||
auto data_addr = inner_dump_info.args + kAddrLen * static_cast<uint32_t>(inner_dump_info.input_anchor_index); | |||
auto data_addr = inner_dump_info.args + sizeof(void *) * static_cast<uint32_t>(inner_dump_info.input_anchor_index); | |||
output.set_address(static_cast<uint64_t>(data_addr)); | |||
task.mutable_output()->Add(std::move(output)); | |||
@@ -385,98 +290,37 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump: | |||
return SUCCESS; | |||
} | |||
Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
const uintptr_t &addr, size_t index) { | |||
input.set_data_type(static_cast<int32_t>(GetIrDataType(tensor_descs.at(index).GetDataType()))); | |||
input.set_format(static_cast<int32_t>(tensor_descs.at(index).GetFormat())); | |||
for (auto dim : tensor_descs.at(index).GetShape().GetDims()) { | |||
input.mutable_shape()->add_dim(dim); | |||
} | |||
int64_t input_size = 0; | |||
if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) { | |||
GELOGI("Get aipp input size according to attr is %ld", input_size); | |||
} else if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), input_size) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "Get input size filed"); | |||
return PARAM_INVALID; | |||
} | |||
GELOGD("Get input size in dump is %ld", input_size); | |||
input.set_size(input_size); | |||
input.set_address(static_cast<uint64_t>(addr)); | |||
return SUCCESS; | |||
} | |||
Status DataDumper::DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i, | |||
const std::string &node_name_index) { | |||
std::string dump_op_name; | |||
std::string input_or_output; | |||
size_t index; | |||
// parser and find which node's input or output tensor desc is chosen for dump info | |||
if (!ParseNameIndex(node_name_index, dump_op_name, input_or_output, index)) { | |||
GELOGE(PARAM_INVALID, "Op [%s] input desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s].", | |||
inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str()); | |||
return PARAM_INVALID; | |||
} | |||
GE_CHECK_NOTNULL(compute_graph_); | |||
auto replace_node = compute_graph_->FindNode(dump_op_name); | |||
GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(replace_node == nullptr, | |||
"Op [%s] input desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s]," | |||
" cannot find redirect node[%s].", | |||
inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str(), | |||
dump_op_name.c_str()); | |||
auto replace_opdesc = replace_node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(replace_opdesc); | |||
auto iter = ref_info_.find(replace_opdesc); | |||
GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(iter == ref_info_.end(), | |||
"Op [%s] input desc[%zu] cannot find any saved redirect node[%s]'s info.", | |||
inner_dump_info.op->GetName().c_str(), i, replace_opdesc->GetName().c_str()); | |||
GE_CHECK_NOTNULL(iter->second); | |||
auto addr = reinterpret_cast<uintptr_t>(iter->second); | |||
if (input_or_output == kDumpInput) { | |||
const auto &replace_input_descs = replace_opdesc->GetAllInputsDesc(); | |||
addr += kAddrLen * index; | |||
GE_CHK_STATUS_RET(GenerateInput(input, replace_input_descs, addr, index), "Generate input failed"); | |||
} else if (input_or_output == kDumpOutput) { | |||
const auto &replace_output_descs = replace_opdesc->GetAllOutputsDesc(); | |||
const auto replace_input_size = replace_opdesc->GetAllInputsDesc().size(); | |||
addr += (index + replace_input_size) * kAddrLen; | |||
GE_CHK_STATUS_RET(GenerateInput(input, replace_output_descs, addr, index), "Generate input failed"); | |||
} | |||
GELOGD("Op [%s] input desc[%zu] dump info is replaced by node[%s] [%s] tensor_desc [%zu]", | |||
inner_dump_info.op->GetName().c_str(), i, dump_op_name.c_str(), input_or_output.c_str(), index); | |||
return SUCCESS; | |||
} | |||
Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | |||
GELOGI("Start dump input"); | |||
const auto &input_descs = inner_dump_info.op->GetAllInputsDesc(); | |||
const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, inner_dump_info.op); | |||
const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, inner_dump_info.op, false); | |||
if (input_descs.size() != input_addrs.size()) { | |||
GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(), | |||
inner_dump_info.op->GetName().c_str(), input_descs.size()); | |||
return PARAM_INVALID; | |||
} | |||
std::vector<int64_t> v_memory_type; | |||
bool has_mem_type_attr = ge::AttrUtils::GetListInt(inner_dump_info.op, ATTR_NAME_INPUT_MEM_TYPE_LIST, v_memory_type); | |||
GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(has_mem_type_attr && (v_memory_type.size() != input_descs.size()), | |||
"DumpInput[%s], input size[%zu], input memory type size[%zu]", | |||
inner_dump_info.op->GetName().c_str(), input_descs.size(), v_memory_type.size()); | |||
for (size_t i = 0; i < input_descs.size(); ++i) { | |||
aicpu::dump::Input input; | |||
std::string node_name_index; | |||
// check dump input tensor desc is redirected by attr ATTR_DATA_DUMP_REF | |||
if (AttrUtils::GetStr(&input_descs.at(i), ATTR_DATA_DUMP_REF, node_name_index)) { | |||
GE_CHK_STATUS_RET(DumpRefInput(inner_dump_info, input, i, node_name_index), "DumpRefInput failed"); | |||
// normal dump without attr | |||
} else { | |||
GE_IF_BOOL_EXEC(IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i), | |||
GELOGD("DumpInput[%s] input[%zu] is l1 addr, skip it", inner_dump_info.op->GetName().c_str(), i); | |||
continue;); | |||
auto addr = inner_dump_info.args + kAddrLen * i; | |||
GE_CHK_STATUS_RET(GenerateInput(input, input_descs, addr, i), "Generate input failed"); | |||
input.set_data_type(static_cast<int32_t>(GetIrDataType(input_descs.at(i).GetDataType()))); | |||
input.set_format(static_cast<int32_t>(input_descs.at(i).GetFormat())); | |||
for (auto dim : input_descs.at(i).GetShape().GetDims()) { | |||
input.mutable_shape()->add_dim(dim); | |||
} | |||
int64_t input_size = 0; | |||
if (AttrUtils::GetInt(&input_descs.at(i), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) { | |||
GELOGI("Get aipp input size according to attr is %ld", input_size); | |||
} else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "Get input size filed"); | |||
return PARAM_INVALID; | |||
} | |||
GELOGI("Get input size in dump is %ld", input_size); | |||
GE_IF_BOOL_EXEC(input_size <= 0, GELOGE(PARAM_INVALID, "Input size %ld is less than zero", input_size); | |||
return PARAM_INVALID;) | |||
input.set_size(input_size); | |||
input.set_address(static_cast<uint64_t>(inner_dump_info.args + sizeof(void *) * i)); | |||
task.mutable_input()->Add(std::move(input)); | |||
} | |||
return SUCCESS; | |||
@@ -556,38 +400,36 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_ | |||
GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size); | |||
return SUCCESS; | |||
} | |||
Status DataDumper::LoadDumpInfo() { | |||
std::string dump_list_key; | |||
PrintCheckLog(dump_list_key); | |||
if (op_list_.empty()) { | |||
GELOGW("op_list_ is empty"); | |||
return SUCCESS; | |||
} | |||
aicpu::dump::OpMappingInfo op_mapping_info; | |||
auto dump_path = dump_properties_.GetDumpPath() + std::to_string(device_id_) + "/"; | |||
op_mapping_info.set_dump_path(dump_path); | |||
auto dump_path = PropertiesManager::Instance().GetDumpOutputPath(); | |||
op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + std::to_string(device_id_) + "/"); | |||
op_mapping_info.set_model_name(dump_list_key); | |||
op_mapping_info.set_model_id(model_id_); | |||
op_mapping_info.set_flag(kAicpuLoadFlag); | |||
op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | |||
op_mapping_info.set_dump_step(PropertiesManager::Instance().GetDumpStep()); | |||
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | |||
GELOGI("Dump step is %s and dump path is %s in load dump info", dump_properties_.GetDumpStep().c_str(), | |||
GELOGI("Dump step is %s and dump path is %s in load dump info", PropertiesManager::Instance().GetDumpStep().c_str(), | |||
dump_path.c_str()); | |||
for (const auto &op_iter : op_list_) { | |||
auto op_desc = op_iter.op; | |||
GELOGD("Op %s in model %s begin to add task in op_mapping_info", op_desc->GetName().c_str(), dump_list_key.c_str()); | |||
aicpu::dump::Task task; | |||
auto op_desc = op_iter.op; | |||
task.set_end_graph(false); | |||
task.set_task_id(op_iter.task_id); | |||
task.set_stream_id(op_iter.stream_id); | |||
task.mutable_op()->set_op_name(op_desc->GetName()); | |||
task.mutable_op()->set_op_type(op_desc->GetType()); | |||
if (dump_properties_.GetDumpMode() == kDumpOutput) { | |||
if (PropertiesManager::Instance().GetDumpMode() == kDumpOutput) { | |||
if (DumpOutput(op_iter, task) != SUCCESS) { | |||
GELOGE(FAILED, "Dump output failed"); | |||
return FAILED; | |||
@@ -595,7 +437,7 @@ Status DataDumper::LoadDumpInfo() { | |||
op_mapping_info.mutable_task()->Add(std::move(task)); | |||
continue; | |||
} | |||
if (dump_properties_.GetDumpMode() == kDumpInput) { | |||
if (PropertiesManager::Instance().GetDumpMode() == kDumpInput) { | |||
if (op_iter.is_task) { | |||
if (DumpInput(op_iter, task) != SUCCESS) { | |||
GELOGE(FAILED, "Dump input failed"); | |||
@@ -605,7 +447,7 @@ Status DataDumper::LoadDumpInfo() { | |||
op_mapping_info.mutable_task()->Add(std::move(task)); | |||
continue; | |||
} | |||
if (dump_properties_.GetDumpMode() == kDumpAll) { | |||
if (PropertiesManager::Instance().GetDumpMode() == kDumpAll) { | |||
auto ret = DumpOutput(op_iter, task); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Dump output failed when in dumping all"); | |||
@@ -625,22 +467,19 @@ Status DataDumper::LoadDumpInfo() { | |||
SetEndGraphIdToAicpu(end_graph_task_id_, end_graph_stream_id_, op_mapping_info); | |||
SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); | |||
if (!op_list_.empty() || is_op_debug_) { | |||
auto ret = ExecuteLoadDumpInfo(op_mapping_info); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Execute load dump info failed"); | |||
return FAILED; | |||
} | |||
auto ret = ExecuteLoadDumpInfo(op_mapping_info); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Execute load dump info failed"); | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, | |||
aicpu::dump::OpMappingInfo &op_mapping_info) { | |||
if (dump_properties_.GetDumpMode() == kDumpOutput || dump_properties_.GetDumpMode() == kDumpInput || | |||
dump_properties_.GetDumpMode() == kDumpAll) { | |||
if (PropertiesManager::Instance().GetDumpMode() == kDumpOutput || | |||
PropertiesManager::Instance().GetDumpMode() == kDumpInput || | |||
PropertiesManager::Instance().GetDumpMode() == kDumpAll) { | |||
GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); | |||
aicpu::dump::Task task; | |||
task.set_end_graph(true); | |||
@@ -652,37 +491,6 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, | |||
} | |||
} | |||
void DataDumper::SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, | |||
aicpu::dump::OpMappingInfo &op_mapping_info) { | |||
if (is_op_debug_) { | |||
GELOGI("add op_debug_info to aicpu, task_id is %u, stream_id is %u", task_id, stream_id); | |||
aicpu::dump::Task task; | |||
task.set_end_graph(false); | |||
task.set_task_id(task_id); | |||
task.set_stream_id(stream_id); | |||
task.mutable_op()->set_op_name(NODE_NAME_OP_DEBUG); | |||
task.mutable_op()->set_op_type(OP_TYPE_OP_DEBUG); | |||
// set output | |||
aicpu::dump::Output output; | |||
output.set_data_type(DT_UINT8); | |||
output.set_format(FORMAT_ND); | |||
output.mutable_shape()->add_dim(kOpDebugShape); | |||
output.set_original_name(NODE_NAME_OP_DEBUG); | |||
output.set_original_output_index(0); | |||
output.set_original_output_format(FORMAT_ND); | |||
output.set_original_output_data_type(DT_UINT8); | |||
// due to lhisi virtual addr bug, cannot use args now | |||
output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr))); | |||
output.set_size(kOpDebugSize); | |||
task.mutable_output()->Add(std::move(output)); | |||
op_mapping_info.mutable_task()->Add(std::move(task)); | |||
} | |||
} | |||
Status DataDumper::UnloadDumpInfo() { | |||
if (!load_flag_) { | |||
GELOGI("No need to UnloadDumpInfo."); | |||
@@ -709,17 +517,15 @@ Status DataDumper::UnloadDumpInfo() { | |||
} | |||
void DataDumper::PrintCheckLog(string &dump_list_key) { | |||
std::set<std::string> model_list = dump_properties_.GetAllDumpModel(); | |||
std::set<std::string> model_list = PropertiesManager::Instance().GetAllDumpModel(); | |||
if (model_list.empty()) { | |||
GELOGI("No model need dump."); | |||
return; | |||
} | |||
GELOGI("%zu op need dump in %s.", op_list_.size(), model_name_.c_str()); | |||
bool not_find_by_omname = model_list.find(om_name_) == model_list.end(); | |||
bool not_find_by_modelname = model_list.find(model_name_) == model_list.end(); | |||
dump_list_key = not_find_by_omname ? model_name_ : om_name_; | |||
GELOGI("%zu op need dump in %s.", op_list_.size(), dump_list_key.c_str()); | |||
if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) { | |||
if (not_find_by_omname && not_find_by_modelname) { | |||
std::string model_list_str; | |||
@@ -727,12 +533,12 @@ void DataDumper::PrintCheckLog(string &dump_list_key) { | |||
model_list_str += "[" + model + "]."; | |||
} | |||
GELOGW("Model %s will not be set to dump, dump list: %s", dump_list_key.c_str(), model_list_str.c_str()); | |||
GELOGW("Model %s will not be set to dump, dump list: %s", model_name_.c_str(), model_list_str.c_str()); | |||
return; | |||
} | |||
} | |||
std::set<std::string> config_dump_op_list = dump_properties_.GetPropertyValue(dump_list_key); | |||
dump_list_key = not_find_by_omname ? model_name_ : om_name_; | |||
std::set<std::string> config_dump_op_list = PropertiesManager::Instance().GetDumpPropertyValue(dump_list_key); | |||
std::set<std::string> dump_op_list; | |||
for (auto &inner_dump_info : op_list_) { | |||
// oplist value OpDescPtr is not nullptr | |||
@@ -23,9 +23,7 @@ | |||
#include <vector> | |||
#include "framework/common/ge_inner_error_codes.h" | |||
#include "common/properties_manager.h" | |||
#include "graph/node.h" | |||
#include "graph/compute_graph.h" | |||
#include "proto/ge_ir.pb.h" | |||
#include "proto/op_mapping_info.pb.h" | |||
#include "runtime/mem.h" | |||
@@ -46,9 +44,7 @@ class DataDumper { | |||
device_id_(0), | |||
global_step_(0), | |||
loop_per_iter_(0), | |||
loop_cond_(0), | |||
compute_graph_(nullptr), | |||
ref_info_() {} | |||
loop_cond_(0) {} | |||
~DataDumper(); | |||
@@ -60,10 +56,6 @@ class DataDumper { | |||
void SetDeviceId(uint32_t device_id) { device_id_ = device_id; } | |||
void SetComputeGraph(const ComputeGraphPtr &compute_graph) { compute_graph_ = compute_graph; }; | |||
void SetRefInfo(const std::map<OpDescPtr, void *> &ref_info) { ref_info_ = ref_info; }; | |||
void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond); | |||
void SaveDumpInput(const std::shared_ptr<Node> &node); | |||
@@ -73,15 +65,11 @@ class DataDumper { | |||
void SaveEndGraphId(uint32_t task_id, uint32_t stream_id); | |||
void SetOmName(const std::string &om_name) { om_name_ = om_name; } | |||
void SaveOpDebugId(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, bool is_op_debug); | |||
Status LoadDumpInfo(); | |||
Status UnloadDumpInfo(); | |||
void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; } | |||
const DumpProperties &GetDumpProperties() const { return dump_properties_; } | |||
private: | |||
void ReleaseDevMem(void **ptr) noexcept; | |||
@@ -109,32 +97,12 @@ class DataDumper { | |||
uintptr_t global_step_; | |||
uintptr_t loop_per_iter_; | |||
uintptr_t loop_cond_; | |||
ComputeGraphPtr compute_graph_; | |||
std::map<OpDescPtr, void *> ref_info_; | |||
uint32_t op_debug_task_id_ = 0; | |||
uint32_t op_debug_stream_id_ = 0; | |||
void *op_debug_addr_ = nullptr; | |||
bool is_op_debug_ = false; | |||
DumpProperties dump_properties_; | |||
Status DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); | |||
Status DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Output &output, size_t i, | |||
const std::string &node_name_index); | |||
Status DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); | |||
Status DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); | |||
Status DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i, | |||
const std::string &node_name_index); | |||
Status ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); | |||
void SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, aicpu::dump::OpMappingInfo &op_mapping_info); | |||
void SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, | |||
aicpu::dump::OpMappingInfo &op_mapping_info); | |||
Status ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); | |||
Status GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
const uintptr_t &addr, size_t index); | |||
Status GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs, | |||
const uintptr_t &addr, size_t index); | |||
}; | |||
struct DataDumper::InnerDumpInfo { | |||
uint32_t task_id; | |||
@@ -29,7 +29,6 @@ | |||
#include "common/helper/om_file_helper.h" | |||
#include "common/opskernel/ge_task_info.h" | |||
#include "common/types.h" | |||
#include "common/properties_manager.h" | |||
#include "framework/common/util.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/load/new_model_manager/data_dumper.h" | |||
@@ -48,10 +47,6 @@ | |||
#include "task_info/task_info.h" | |||
namespace ge { | |||
// op debug need 2048 bits buffer | |||
const size_t kOpDebugMemorySize = 2048UL; | |||
const size_t kDebugP2pSize = 8UL; | |||
typedef enum tagModelProcStage { | |||
MODEL_LOAD_START = 1, | |||
MODEL_LOAD_END, | |||
@@ -176,6 +171,13 @@ class DavinciModel { | |||
// get session id | |||
uint64_t SessionId() const { return runtime_param_.session_id; } | |||
vector<OpDescPtr> GetOpDesc() { | |||
vector<OpDescPtr> opDescVector; | |||
GE_IF_BOOL_EXEC(AttrUtils::GetListOpDesc(GetGeModel(), MODEL_ATTR_FUSION_MODEL_DEF, opDescVector), | |||
GELOGI("get opDesc of opDescVector")); | |||
return opDescVector; | |||
} | |||
// get model priority | |||
int32_t Priority() const { return priority_; } | |||
@@ -246,9 +248,15 @@ class DavinciModel { | |||
/// | |||
Format GetFormat(); | |||
rtModel_t GetRtModelHandle() const { return rt_model_handle_; } | |||
rtModel_t GetRtModelHandle() { | |||
rtModel_t res = rt_model_handle_; | |||
return res; | |||
} | |||
rtStream_t GetRtModelStream() const { return rt_model_stream_; } | |||
rtStream_t GetRtModelStream() { | |||
rtModel_t res = rt_model_stream_; | |||
return res; | |||
} | |||
uint64_t GetRtBaseAddr() const { return runtime_param_.logic_mem_base; } | |||
@@ -287,7 +295,7 @@ class DavinciModel { | |||
/// @param [out] batch_info | |||
/// @return execute result | |||
/// | |||
Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info) const; | |||
Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info); | |||
void GetCurShape(std::vector<int64_t> &batch_info); | |||
@@ -336,9 +344,10 @@ class DavinciModel { | |||
/// | |||
/// @ingroup ge | |||
/// @brief dump all op input and output information | |||
/// @return void | |||
/// @param [in] op_list model_id | |||
/// @return Status | |||
/// | |||
void DumpOpInputOutput(); | |||
Status DumpOpInputOutput(); | |||
/// | |||
/// @ingroup ge | |||
@@ -394,9 +403,7 @@ class DavinciModel { | |||
/// | |||
uint32_t GetDeviceId() const { return device_id_; } | |||
bool NeedDestroyAicpuKernel() const { return need_destroy_aicpu_kernel_; } | |||
Status UpdateSessionId(uint64_t session_id); | |||
GeModelPtr GetGeModel() { return ge_model_; } | |||
const RuntimeParam &GetRuntimeParam() { return runtime_param_; } | |||
@@ -456,19 +463,6 @@ class DavinciModel { | |||
void *cur_args = static_cast<char *>(args_) + offset; | |||
return cur_args; | |||
} | |||
void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); | |||
int64_t GetFixedAddrsSize(string tensor_name); | |||
void *GetCurrentFixedAddr(int64_t offset) const { | |||
void *cur_addr = static_cast<char *>(fixed_addrs_) + offset; | |||
return cur_addr; | |||
} | |||
uint32_t GetFixedAddrOutputIndex(string tensor_name) { | |||
if (tensor_name_to_peer_output_index_.find(tensor_name) != tensor_name_to_peer_output_index_.end()) { | |||
return tensor_name_to_peer_output_index_[tensor_name]; | |||
} | |||
return UINT32_MAX; | |||
} | |||
void SetKnownNode(bool known_node) { known_node_ = known_node; } | |||
bool IsKnownNode() { return known_node_; } | |||
Status MallocKnownArgs(); | |||
@@ -483,9 +477,6 @@ class DavinciModel { | |||
// om file name | |||
void SetOmName(string om_name) { om_name_ = om_name; } | |||
void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); } | |||
const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } | |||
private: | |||
// memory address of weights | |||
uint8_t *weights_mem_base_; | |||
@@ -502,6 +493,8 @@ class DavinciModel { | |||
struct timeInfo time_info_; | |||
int32_t dataInputTid; | |||
void InitZeroCopyUtil(bool is_dynamic_batch, bool &input_zero_copy, bool &output_zero_copy); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Save Batch label Info. | |||
@@ -537,13 +530,6 @@ class DavinciModel { | |||
/// | |||
bool CheckInputAndModelSize(const int64_t &input_size, const int64_t &op_size, bool is_dynamic); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Set copy only for No task feed NetOutput address. | |||
/// @return None. | |||
/// | |||
void SetCopyOnlyOutput(); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Copy Input/Output to model for direct use. | |||
@@ -569,10 +555,14 @@ class DavinciModel { | |||
Status CopyInputData(const InputData &input_data, bool device_data = false); | |||
Status CopyOutputData(uint32_t data_id, OutputData &output_data, rtMemcpyKind_t kind); | |||
Status CopyOutputData(uint32_t data_id, OutputData &output_data); | |||
Status CopyOutputDataToUser(OpDescPtr &op_desc, std::vector<DataBuffer> &blobs, uint32_t &data_index); | |||
Status SyncVarData(); | |||
Status SyncDataAndDump(); | |||
Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize); | |||
void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); | |||
@@ -599,12 +589,7 @@ class DavinciModel { | |||
bool IsAicpuKernelConnectSpecifiedLayer(); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Reduce memory usage after task sink. | |||
/// @return: void | |||
/// | |||
void Shrink(); | |||
Status MarkSpecifiedAicpuKernel(); | |||
/// | |||
/// @ingroup ge | |||
@@ -740,9 +725,10 @@ class DavinciModel { | |||
/// | |||
/// @ingroup ge | |||
/// @brief definiteness queue schedule, active original model stream. | |||
/// @param [in] streams: streams will active by S0. | |||
/// @return: 0 for success / others for fail | |||
/// | |||
Status CpuActiveStream(); | |||
Status CpuActiveStream(const std::vector<rtStream_t> &stream_list); | |||
/// | |||
/// @ingroup ge | |||
@@ -760,9 +746,6 @@ class DavinciModel { | |||
/// | |||
Status CpuModelRepeat(); | |||
Status InitEntryTask(); | |||
Status AddHeadStream(); | |||
/// | |||
/// @ingroup ge | |||
/// @brief set ts device. | |||
@@ -770,10 +753,6 @@ class DavinciModel { | |||
/// | |||
Status SetTSDevice(); | |||
Status OpDebugRegister(); | |||
void OpDebugUnRegister(); | |||
void CheckHasHcomOp(); | |||
Status DoTaskSink(); | |||
@@ -781,17 +760,17 @@ class DavinciModel { | |||
void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); | |||
Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | |||
Status CopyVarData(ComputeGraphPtr &graph); | |||
// get desc info of graph for profiling | |||
Status GetComputeGraphInfo(const ComputeGraphPtr &graph, vector<ComputeGraphDescInfo> &graph_desc_info); | |||
Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &compute_graph_desc_info); | |||
void SetDataDumperArgs(const ComputeGraphPtr &compute_graph); | |||
void SetDataDumperArgs(); | |||
Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, | |||
std::vector<ge::OutputTensorInfo> &outputs); | |||
void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); | |||
void GetFixedAddrAttr(const OpDescPtr &op_desc); | |||
bool is_model_has_inited_; | |||
uint32_t model_id_; | |||
@@ -804,9 +783,6 @@ class DavinciModel { | |||
uint32_t version_; | |||
GeModelPtr ge_model_; | |||
bool need_destroy_aicpu_kernel_{false}; | |||
vector<std::string> out_node_name_; | |||
map<uint32_t, OpDescPtr> op_list_; | |||
// data op_desc | |||
@@ -867,11 +843,6 @@ class DavinciModel { | |||
bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_. | |||
bool is_pure_head_stream_{false}; | |||
rtStream_t rt_head_stream_{nullptr}; | |||
rtStream_t rt_entry_stream_{nullptr}; | |||
rtAicpuDeployType_t deploy_type_{AICPU_DEPLOY_RESERVED}; | |||
// ACL queue schedule, save queue ids for Init. | |||
std::vector<TaskInfoPtr> cpu_task_list_; | |||
std::vector<uint32_t> input_queue_ids_; // input queue ids created by caller. | |||
@@ -893,6 +864,8 @@ class DavinciModel { | |||
std::vector<rtStream_t> active_stream_list_; | |||
std::set<uint32_t> active_stream_indication_; | |||
std::shared_ptr<domi::ModelTaskDef> model_task_def_; | |||
std::set<uint32_t> aicpu_streams_; | |||
std::set<uint32_t> hcom_streams_; | |||
RuntimeParam runtime_param_; | |||
@@ -904,39 +877,22 @@ class DavinciModel { | |||
// for profiling task and graph info | |||
std::map<uint32_t, std::string> op_name_map_; | |||
std::vector<TaskDescInfo> task_desc_info_; | |||
ComputeGraphPtr compute_graph_; | |||
int64_t maxDumpOpNum_; | |||
// for data dump | |||
DataDumper data_dumper_; | |||
uint64_t iterator_count_; | |||
bool is_l1_fusion_enable_; | |||
std::map<OpDescPtr, void *> saved_task_addrs_; | |||
bool known_node_ = false; | |||
uint32_t total_args_size_ = 0; | |||
void *args_ = nullptr; | |||
void *args_host_ = nullptr; | |||
void *fixed_addrs_ = nullptr; | |||
int64_t total_fixed_addr_size_ = 0; | |||
std::map<const void *, void *> knonw_input_data_info_; | |||
std::map<const void *, void *> knonw_output_data_info_; | |||
vector<vector<int64_t>> batch_info_; | |||
vector<uint64_t> batch_size_; | |||
// key: input tensor name, generally rts op; | |||
// value: the fixed addr of input anchor, same as the peer output anchor addr of the peer op | |||
std::map<string, int64_t> tensor_name_to_fixed_addr_size_; | |||
// key: input tensor name, generally rts op; value: the peer output anchor of the peer op | |||
std::map<string, int64_t> tensor_name_to_peer_output_index_; | |||
// if model is first execute | |||
bool is_first_execute_; | |||
// for op debug | |||
std::mutex debug_reg_mutex_; | |||
bool is_op_debug_reg_ = false; | |||
void *op_debug_addr_ = nullptr; | |||
void *p2p_debug_addr_ = nullptr; | |||
bool is_new_model_desc_{false}; | |||
}; | |||
} // namespace ge | |||
@@ -22,9 +22,8 @@ | |||
#include "common/profiling/profiling_manager.h" | |||
#include "common/properties_manager.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "framework/common/util.h" | |||
#include "graph/common/ge_call_wrapper.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "framework/common/util.h" | |||
#include "graph/load/new_model_manager/davinci_model.h" | |||
#include "graph/load/new_model_manager/davinci_model_parser.h" | |||
#include "model/ge_root_model.h" | |||
@@ -34,10 +33,9 @@ thread_local uint32_t device_count = 0; | |||
namespace { | |||
const int kCmdParSize = 2; | |||
const int kDumpCmdPairSize = 2; | |||
const char *const kNeedDestroySpecifiedAicpuKernel = "need_destroy_specified_aicpu_kernel"; | |||
} // namespace | |||
DumpProperties ModelManager::dump_properties_; | |||
std::shared_ptr<ModelManager> ModelManager::GetInstance() { | |||
static const std::shared_ptr<ModelManager> instance_ptr = | |||
shared_ptr<ModelManager>(new (std::nothrow) ModelManager(), ModelManager::FinalizeForPtr); | |||
@@ -274,10 +272,6 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||
davinci_model->SetId(model_id); | |||
davinci_model->SetDeviceId(GetContext().DeviceId()); | |||
const DumpProperties &dump_properties = PropertiesManager::Instance().GetDumpProperties(GetContext().SessionId()); | |||
davinci_model->SetDumpProperties(dump_properties); | |||
dump_properties_ = dump_properties; | |||
auto root_graph = ge_root_model->GetRootGraph(); | |||
GE_CHECK_NOTNULL(root_graph); | |||
string root_model_name = root_graph->GetName(); | |||
@@ -302,6 +296,9 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + | |||
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond | |||
davinci_model->SetProfileTime(MODEL_LOAD_END); | |||
if (davinci_model->SinkModelProfile() != SUCCESS) { | |||
GELOGW("Sink model profile failed."); | |||
} | |||
} | |||
} while (0); | |||
@@ -614,10 +611,10 @@ Status ModelManager::HandleDumpCommand(const Command &command) { | |||
GELOGE(PARAM_INVALID, "parser dump model failed"); | |||
return FAILED; | |||
} | |||
GELOGI("dump model = %s.", dump_model.c_str()); | |||
GELOGI("dump status = %s.", dump_model.c_str()); | |||
if (dump_status == "off" || dump_status == "OFF") { | |||
dump_properties_.DeletePropertyValue(dump_model); | |||
PropertiesManager::Instance().DeleteDumpPropertyValue(dump_model); | |||
return SUCCESS; | |||
} | |||
@@ -634,10 +631,9 @@ Status ModelManager::HandleDumpCommand(const Command &command) { | |||
return FAILED; | |||
} | |||
if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') { | |||
dump_path = dump_path + "/"; | |||
dump_path = dump_path + "/" + CurrentTimeInStr() + "/"; | |||
} | |||
dump_path = dump_path + CurrentTimeInStr() + "/"; | |||
GELOGI("dump path = %s.", dump_path.c_str()); | |||
GELOGI("dump status = %s.", dump_path.c_str()); | |||
ret = ParserPara(command, DUMP_MODE, dump_mode); | |||
if (ret != SUCCESS) { | |||
@@ -646,10 +642,20 @@ Status ModelManager::HandleDumpCommand(const Command &command) { | |||
} | |||
GELOGI("dump mode = %s", dump_mode.c_str()); | |||
dump_properties_.AddPropertyValue(dump_model, dump_layers); | |||
dump_properties_.SetDumpPath(dump_path); | |||
dump_properties_.SetDumpMode(dump_mode); | |||
auto iter_dump_mode = std::find(command.cmd_params.begin(), command.cmd_params.end(), DUMP_MODE); | |||
if (iter_dump_mode != command.cmd_params.end()) { | |||
++iter_dump_mode; | |||
if (iter_dump_mode == command.cmd_params.end()) { | |||
GELOGE(PARAM_INVALID, "Invalid access."); | |||
return PARAM_INVALID; | |||
} | |||
dump_mode = *iter_dump_mode; | |||
GELOGI("dump mode = %s", dump_mode.c_str()); | |||
} | |||
PropertiesManager::Instance().AddDumpPropertyValue(dump_model, dump_layers); | |||
PropertiesManager::Instance().SetDumpOutputPath(dump_path); | |||
PropertiesManager::Instance().SetDumpMode(dump_mode); | |||
return SUCCESS; | |||
} | |||
@@ -765,6 +771,17 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { | |||
return SUCCESS; | |||
} | |||
Status ModelManager::UpdateSessionId(std::shared_ptr<DavinciModel> &davinci_model, uint64_t session_id) { | |||
GeModelPtr ge_model_current = davinci_model->GetGeModel(); | |||
GE_CHECK_NOTNULL(ge_model_current); | |||
if (!ge::AttrUtils::SetInt(ge_model_current, ge::MODEL_ATTR_SESSION_ID, static_cast<int64_t>(session_id))) { | |||
GELOGW("Set attr[%s] failed in updating session_id.", MODEL_ATTR_SESSION_ID.c_str()); | |||
} | |||
GELOGD("Update session id: %lu.", session_id); | |||
return SUCCESS; | |||
} | |||
Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener, | |||
void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | |||
GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, PARAM_INVALID, | |||
@@ -807,7 +824,6 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
} | |||
davinci_model->SetDeviceId(device_id); | |||
davinci_model->SetOmName(model.om_name); | |||
davinci_model->SetDumpProperties(dump_properties_); | |||
/// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. | |||
/// These session_ids come from the same model, so the values of session_id are the same. | |||
@@ -815,7 +831,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
uint64_t new_session_id; | |||
ret = GenSessionId(new_session_id); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Generate session_id for infer failed."); | |||
ret = davinci_model->UpdateSessionId(new_session_id); | |||
ret = UpdateSessionId(davinci_model, new_session_id); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Update session_id for infer failed."); | |||
ret = davinci_model->Init(dev_ptr, mem_size, weight_ptr, weight_size); | |||
@@ -830,6 +846,9 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + | |||
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond | |||
davinci_model->SetProfileTime(MODEL_LOAD_END); | |||
if (davinci_model->SinkModelProfile() != SUCCESS) { | |||
GELOGW("Sink model profile failed."); | |||
} | |||
} | |||
GE_IF_BOOL_EXEC(ret == SUCCESS, device_count++); | |||
@@ -879,7 +898,7 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d | |||
uint64_t new_session_id; | |||
ret = GenSessionId(new_session_id); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed."); | |||
ret = davinci_model->UpdateSessionId(new_session_id); | |||
ret = UpdateSessionId(davinci_model, new_session_id); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed."); | |||
GenModelId(&model_id); | |||
@@ -890,8 +909,6 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d | |||
return ret; | |||
} | |||
davinci_model->SetDumpProperties(dump_properties_); | |||
ret = davinci_model->Init(); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "init model failed."); | |||
@@ -918,8 +935,12 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy | |||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id); | |||
if (davinci_model->NeedDestroyAicpuKernel()) { | |||
GELOGI("Start to destroy specified aicpu kernel."); | |||
GeModelPtr ge_model_current = davinci_model->GetGeModel(); | |||
bool need_destroy_aicpu_kernel = false; | |||
bool result = ge::AttrUtils::GetBool(ge_model_current, kNeedDestroySpecifiedAicpuKernel, need_destroy_aicpu_kernel); | |||
if (result && need_destroy_aicpu_kernel) { | |||
GELOGI("Get attr %s successfully, start to destroy specified aicpu kernel.", kNeedDestroySpecifiedAicpuKernel); | |||
// Zero copy is enabled by default, no need to judge. | |||
uint64_t session_id_davinci = davinci_model->GetSessionId(); | |||
uint32_t model_id_davinci = davinci_model->GetModelId(); | |||
@@ -1029,19 +1050,4 @@ Status ModelManager::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index | |||
return davinci_model->GetAllAippInputOutputDims(index, input_dims, output_dims); | |||
} | |||
bool ModelManager::IsDynamicShape(uint32_t model_id) { | |||
auto model = GetHybridModel(model_id); | |||
return model != nullptr; | |||
} | |||
ge::Status ModelManager::SyncExecuteModel(uint32_t model_id, const vector<GeTensor> &inputs, | |||
vector<GeTensor> &outputs) { | |||
auto model = GetHybridModel(model_id); | |||
if (model == nullptr) { | |||
GELOGE(FAILED, "Hybrid model not found. model id = %u.", model_id); | |||
return FAILED; | |||
} | |||
return model->Execute(inputs, outputs); | |||
} | |||
} // namespace ge |
@@ -31,7 +31,6 @@ | |||
#include "common/ge_types.h" | |||
#include "common/helper/model_helper.h" | |||
#include "common/helper/om_file_helper.h" | |||
#include "common/properties_manager.h" | |||
#include "common/types.h" | |||
#include "ge/ge_api_types.h" | |||
#include "graph/ge_context.h" | |||
@@ -142,8 +141,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
ge::Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | |||
OutputData &output_data); | |||
ge::Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs); | |||
/// | |||
/// @ingroup domi_ome | |||
/// @brief model stop | |||
@@ -252,8 +249,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims, | |||
std::vector<InputOutputDims> &output_dims); | |||
bool IsDynamicShape(uint32_t model_id); | |||
private: | |||
/// | |||
/// @ingroup domi_ome | |||
@@ -281,6 +276,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
ge::Status DeleteModel(uint32_t id); | |||
void GenModelId(uint32_t *id); | |||
ge::Status UpdateSessionId(std::shared_ptr<DavinciModel> &davinci_model, uint64_t session_id); | |||
std::map<uint32_t, std::shared_ptr<DavinciModel>> model_map_; | |||
std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_; | |||
@@ -291,8 +287,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
std::mutex session_id_create_mutex_; | |||
uint64_t session_id_bias_; | |||
std::set<uint64_t> sess_ids_; | |||
static DumpProperties dump_properties_; | |||
}; | |||
} // namespace ge | |||
@@ -31,7 +31,7 @@ | |||
namespace ge { | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get input size. | |||
/// @return vector<uint32_t> | |||
/// | |||
@@ -43,26 +43,22 @@ vector<int64_t> ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { | |||
const vector<bool> v_is_input_const = op_desc->GetIsInputConst(); | |||
for (size_t i = 0; i < inputs_size; ++i) { | |||
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i); | |||
if (tensor_desc == nullptr) { | |||
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | |||
continue; | |||
} | |||
int64_t tensor_size = 0; | |||
if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) { | |||
// TBE: add weights size to input | |||
GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size)); | |||
GeTensorDesc tensor_desc = op_desc->GetInputDesc(i); | |||
int64_t tensor_size = 0; | |||
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||
if (tensor_size) { | |||
v_input_size.push_back(tensor_size); | |||
} | |||
continue; | |||
} | |||
int64_t tensor_size = 0; | |||
GE_IF_BOOL_EXEC( | |||
TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS, | |||
TensorUtils::GetSize(op_desc->GetInputDesc(i), tensor_size) != GRAPH_SUCCESS, | |||
GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); | |||
continue); | |||
continue;); | |||
v_input_size.push_back(tensor_size); | |||
} | |||
@@ -71,7 +67,7 @@ vector<int64_t> ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { | |||
} | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get output size. | |||
/// @return vector<uint32_t> | |||
/// | |||
@@ -86,17 +82,11 @@ vector<int64_t> ModelUtils::GetOutputSize(ConstOpDescPtr op_desc) { | |||
return v_output_size;); | |||
for (size_t i = 0; i < outputs_size; ++i) { | |||
const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); | |||
if (tensor_desc == nullptr) { | |||
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | |||
continue; | |||
} | |||
int64_t tensor_size = 0; | |||
GE_IF_BOOL_EXEC( | |||
TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS, | |||
TensorUtils::GetSize(op_desc->GetOutputDesc(i), tensor_size) != GRAPH_SUCCESS, | |||
GELOGI("Get size from TensorDesc failed, op : %s, output index : %zu", op_desc->GetName().c_str(), i); | |||
continue); | |||
continue;); | |||
v_output_size.push_back(tensor_size); | |||
} | |||
@@ -105,7 +95,7 @@ vector<int64_t> ModelUtils::GetOutputSize(ConstOpDescPtr op_desc) { | |||
} | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get workspace size. | |||
/// @return vector<uint32_t> | |||
/// | |||
@@ -128,7 +118,7 @@ vector<int64_t> ModelUtils::GetWorkspaceSize(ConstOpDescPtr op_desc) { | |||
} | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get weight size. | |||
/// @return vector<uint32_t> | |||
/// | |||
@@ -152,14 +142,8 @@ vector<int64_t> ModelUtils::GetWeightSize(ConstOpDescPtr op_desc) { | |||
const vector<bool> v_is_input_const = op_desc->GetIsInputConst(); | |||
for (size_t i = 0; i < inputs_size; ++i) { | |||
if ((i < v_is_input_const.size()) && v_is_input_const[i]) { | |||
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i); | |||
if (tensor_desc == nullptr) { | |||
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | |||
continue; | |||
} | |||
int64_t tensor_size = 0; | |||
(void)TensorUtils::GetSize(*tensor_desc, tensor_size); | |||
(void)TensorUtils::GetSize(op_desc->GetInputDesc(i), tensor_size); | |||
v_weight_size.push_back(tensor_size); | |||
} | |||
} | |||
@@ -168,7 +152,7 @@ vector<int64_t> ModelUtils::GetWeightSize(ConstOpDescPtr op_desc) { | |||
} | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get weights. | |||
/// @return vector<ConstGeTensorPtr> | |||
/// | |||
@@ -192,14 +176,9 @@ vector<ConstGeTensorPtr> ModelUtils::GetWeights(ConstOpDescPtr op_desc) { | |||
const vector<bool> v_is_input_const = op_desc->GetIsInputConst(); | |||
for (size_t i = 0; i < inputs_size; ++i) { | |||
if ((i < v_is_input_const.size()) && v_is_input_const[i]) { | |||
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i); | |||
if (tensor_desc == nullptr) { | |||
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | |||
continue; | |||
} | |||
ConstGeTensorPtr weight = nullptr; | |||
if (AttrUtils::GetTensor(*tensor_desc, ATTR_NAME_WEIGHTS, weight)) { | |||
GeTensorDesc tensor_desc = op_desc->GetInputDesc(i); | |||
if (AttrUtils::GetTensor(tensor_desc, ATTR_NAME_WEIGHTS, weight)) { | |||
v_weights.push_back(weight); | |||
} | |||
} | |||
@@ -209,7 +188,7 @@ vector<ConstGeTensorPtr> ModelUtils::GetWeights(ConstOpDescPtr op_desc) { | |||
} | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get AiCpuOp Input descriptor. | |||
/// @return vector<::tagCcAICPUTensor> | |||
/// | |||
@@ -226,25 +205,20 @@ vector<::tagCcAICPUTensor> ModelUtils::GetInputDescs(ConstOpDescPtr op_desc) { | |||
continue; | |||
} | |||
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i); | |||
if (tensor_desc == nullptr) { | |||
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | |||
continue; | |||
} | |||
uint32_t dim_cnt = 0; | |||
GE_CHK_BOOL_EXEC_WARN(TensorUtils::GetRealDimCnt(*tensor_desc, dim_cnt) == GRAPH_SUCCESS, continue, | |||
const auto &descriptor = op_desc->GetInputDesc(i); | |||
GE_CHK_BOOL_EXEC_WARN(TensorUtils::GetRealDimCnt(descriptor, dim_cnt) == GRAPH_SUCCESS, continue, | |||
"Get dim_cnt failed"); | |||
opTensor_t tmp; | |||
uint32_t tmp_fmt = tensor_desc->GetFormat(); | |||
uint32_t tmp_fmt = descriptor.GetFormat(); | |||
tmp.format = tagOpTensorFormat(tmp_fmt); | |||
tmp.dim_cnt = static_cast<int32_t>(dim_cnt); | |||
uint32_t tmp_type = tensor_desc->GetDataType(); | |||
uint32_t tmp_type = descriptor.GetDataType(); | |||
tmp.data_type = tagOpDataType(tmp_type); | |||
for (int32_t j = 0; j < 4; j++) { // 4 dims | |||
tmp.dim[j] = (j < tmp.dim_cnt ? tensor_desc->GetShape().GetDim(j) : 1); | |||
tmp.dim[j] = (j < tmp.dim_cnt ? descriptor.GetShape().GetDim(j) : 1); | |||
} | |||
v_input_descs.push_back(tmp); | |||
@@ -254,7 +228,7 @@ vector<::tagCcAICPUTensor> ModelUtils::GetInputDescs(ConstOpDescPtr op_desc) { | |||
} | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get AiCpuOp Output descriptor. | |||
/// @return vector<::tagCcAICPUTensor> | |||
/// | |||
@@ -266,25 +240,20 @@ vector<::tagCcAICPUTensor> ModelUtils::GetOutputDescs(ConstOpDescPtr op_desc) { | |||
// init op output opTensor_t struct | |||
const size_t output_num = op_desc->GetOutputsSize(); | |||
for (size_t i = 0; i < output_num; ++i) { | |||
const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); | |||
if (tensor_desc == nullptr) { | |||
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | |||
continue; | |||
} | |||
uint32_t dim_cnt = 0; | |||
GE_CHK_BOOL_EXEC_WARN(TensorUtils::GetRealDimCnt(*tensor_desc, dim_cnt) == GRAPH_SUCCESS, continue, | |||
const auto &descriptor = op_desc->GetOutputDesc(i); | |||
GE_CHK_BOOL_EXEC_WARN(TensorUtils::GetRealDimCnt(descriptor, dim_cnt) == GRAPH_SUCCESS, continue, | |||
"Get dim_cnt failed"); | |||
opTensor_t tmp; | |||
uint32_t tmp_fmt = tensor_desc->GetFormat(); | |||
uint32_t tmp_fmt = descriptor.GetFormat(); | |||
tmp.format = tagOpTensorFormat(tmp_fmt); | |||
tmp.dim_cnt = static_cast<int32_t>(dim_cnt); | |||
uint32_t tmp_type = tensor_desc->GetDataType(); | |||
uint32_t tmp_type = descriptor.GetDataType(); | |||
tmp.data_type = tagOpDataType(tmp_type); | |||
for (int32_t j = 0; j < 4; j++) { // 4 dims | |||
tmp.dim[j] = (j < tmp.dim_cnt ? tensor_desc->GetShape().GetDim(j) : 1); | |||
tmp.dim[j] = (j < tmp.dim_cnt ? descriptor.GetShape().GetDim(j) : 1); | |||
} | |||
v_output_descs.push_back(tmp); | |||
@@ -294,14 +263,44 @@ vector<::tagCcAICPUTensor> ModelUtils::GetOutputDescs(ConstOpDescPtr op_desc) { | |||
} | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get input data address. | |||
/// @return vector<void*> | |||
/// | |||
vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc) { | |||
vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc, | |||
bool need_convert) { | |||
vector<void *> v_input_data_addr; // init as:buf_base + op_def_->input(i)); | |||
GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_data_addr); | |||
uint64_t session_id = model_param.session_id; | |||
uint8_t *mem_base = model_param.mem_base; | |||
uint8_t *var_base = model_param.var_base; | |||
uint8_t *weight_base = model_param.weight_base; | |||
const uint64_t logic_mem_base = 0; | |||
uint64_t logic_weight_base = 0; | |||
uint64_t logic_var_base = model_param.logic_var_base; | |||
uint64_t mem_size = model_param.mem_size; | |||
uint64_t weight_size = model_param.weight_size; | |||
uint64_t var_size = model_param.var_size; | |||
if (need_convert) { | |||
Status status = ConvertVirtualAddressToPhysical(mem_base, mem_size, mem_base); | |||
if (status != SUCCESS) { | |||
GELOGE(RT_FAILED, "Convert virtual address to physical for mem_base failed."); | |||
return v_input_data_addr; | |||
} | |||
status = ConvertVirtualAddressToPhysical(weight_base, weight_size, weight_base); | |||
if (status != SUCCESS) { | |||
GELOGE(RT_FAILED, "Convert virtual address to physical for weight_base failed."); | |||
return v_input_data_addr; | |||
} | |||
status = ConvertVirtualAddressToPhysical(var_base, var_size, var_base); | |||
if (status != SUCCESS) { | |||
GELOGE(RT_FAILED, "Convert virtual address to physical for var_base failed."); | |||
return v_input_data_addr; | |||
} | |||
} | |||
const size_t inputs_size = op_desc->GetInputsSize(); | |||
const vector<int64_t> v_input_offset = op_desc->GetInputOffset(); | |||
@@ -320,18 +319,13 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||
for (size_t i = 0; i < inputs_size; ++i) { | |||
if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) { | |||
// TBE: add weights address to input | |||
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i); | |||
if (tensor_desc == nullptr) { | |||
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | |||
continue; | |||
} | |||
GeTensorDesc tensor_desc = op_desc->GetInputDesc(i); | |||
int64_t tensor_size = 0; | |||
GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size)); | |||
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||
if (tensor_size) { | |||
int64_t data_offset = 0; | |||
GE_CHK_STATUS(TensorUtils::GetDataOffset(*tensor_desc, data_offset)); | |||
uint8_t *weight_addr = model_param.weight_base + data_offset; | |||
GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, data_offset)); | |||
uint8_t *weight_addr = static_cast<uint8_t *>(weight_base + data_offset - logic_weight_base); | |||
v_input_data_addr.push_back(weight_addr); | |||
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[C] name[%s] input[%zu] memaddr[%p]", model_param.graph_id, | |||
op_desc->GetName().c_str(), i, weight_addr); | |||
@@ -346,13 +340,17 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||
int64_t input_offset = v_input_offset[non_const_index]; | |||
non_const_index++; | |||
GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), | |||
uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; | |||
GE_IF_BOOL_EXEC(var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), | |||
uint8_t *variable_addr = var_base + input_offset - logic_var_base; | |||
v_input_data_addr.push_back(variable_addr); | |||
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", | |||
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | |||
continue); | |||
continue;); | |||
bool input_tensor = false; | |||
GE_IF_BOOL_EXEC(TensorUtils::GetInputTensor(op_desc->GetOutputDesc(i), input_tensor) != GRAPH_SUCCESS, | |||
GELOGW("get size from TensorDesc failed, op: %s, input index: %zu", op_desc->GetName().c_str(), i); | |||
continue;); | |||
// feature maps | |||
uint8_t *mem_addr = nullptr; | |||
// fusion | |||
@@ -360,7 +358,7 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||
mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(input_offset)); | |||
v_input_data_addr.push_back(mem_addr); | |||
} else { | |||
mem_addr = model_param.mem_base + input_offset; | |||
mem_addr = static_cast<uint8_t *>(mem_base + input_offset - logic_mem_base); | |||
v_input_data_addr.push_back(mem_addr); | |||
} | |||
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[F] name[%s] input[%zu] memaddr[%p]", model_param.graph_id, | |||
@@ -371,20 +369,41 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||
} | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get output data address. | |||
/// @return vector<void*> | |||
/// | |||
vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc) { | |||
vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc, | |||
bool need_convert) { | |||
vector<void *> v_output_data_addr; // init as:buf_base + op_def_->output(i) | |||
GE_CHECK_NOTNULL_EXEC(op_desc, return v_output_data_addr); | |||
uint64_t session_id = model_param.session_id; | |||
uint8_t *mem_base = model_param.mem_base; | |||
uint8_t *var_base = model_param.var_base; | |||
const uint64_t logic_mem_base = 0; | |||
uint64_t logic_var_base = model_param.logic_var_base; | |||
uint64_t mem_size = model_param.mem_size; | |||
uint64_t var_size = model_param.var_size; | |||
if (need_convert) { | |||
Status status = ConvertVirtualAddressToPhysical(mem_base, mem_size, mem_base); | |||
if (status != SUCCESS) { | |||
GELOGE(RT_FAILED, "Convert virtual address to physical for mem_base failed."); | |||
return v_output_data_addr; | |||
} | |||
status = ConvertVirtualAddressToPhysical(var_base, var_size, var_base); | |||
if (status != SUCCESS) { | |||
GELOGE(RT_FAILED, "Convert virtual address to physical for var_base failed."); | |||
return v_output_data_addr; | |||
} | |||
} | |||
const size_t outputs_size = op_desc->GetOutputsSize(); | |||
const vector<int64_t> v_output_offset = op_desc->GetOutputOffset(); | |||
GE_IF_BOOL_EXEC(v_output_offset.size() != outputs_size, | |||
GELOGW("Output param invalid: output_offset=%zu, outputs=%zu.", v_output_offset.size(), outputs_size); | |||
return v_output_data_addr); | |||
return v_output_data_addr;); | |||
vector<int64_t> v_memory_type; | |||
bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, v_memory_type); | |||
if (has_mem_type_attr && (v_memory_type.size() != outputs_size)) { | |||
@@ -394,12 +413,12 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C | |||
return v_output_data_addr; | |||
} | |||
for (size_t i = 0; i < outputs_size; ++i) { | |||
GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), | |||
uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; | |||
GE_IF_BOOL_EXEC(var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), | |||
uint8_t *variable_addr = static_cast<uint8_t *>(var_base + v_output_offset[i] - logic_var_base); | |||
v_output_data_addr.push_back(variable_addr); | |||
GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", | |||
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | |||
continue); | |||
continue;); | |||
// feature maps | |||
uint8_t *mem_addr = nullptr; | |||
// fusion | |||
@@ -407,7 +426,7 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C | |||
mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_output_offset[i])); | |||
v_output_data_addr.push_back(mem_addr); | |||
} else { | |||
mem_addr = static_cast<uint8_t *>(model_param.mem_base + v_output_offset[i]); | |||
mem_addr = static_cast<uint8_t *>(mem_base + v_output_offset[i] - logic_mem_base); | |||
v_output_data_addr.push_back(mem_addr); | |||
} | |||
GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[F] name[%s] output[%zu] memaddr[%p]", model_param.graph_id, | |||
@@ -417,13 +436,24 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C | |||
} | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get workspace data address. | |||
/// @return vector<void*> | |||
/// | |||
vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc) { | |||
vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc, | |||
bool need_convert) { | |||
vector<void *> v_workspace_data_addr; | |||
GE_CHECK_NOTNULL_EXEC(op_desc, return v_workspace_data_addr); | |||
uint8_t *mem_base = model_param.mem_base; | |||
uint64_t mem_size = model_param.mem_size; | |||
if (need_convert) { | |||
Status status = ConvertVirtualAddressToPhysical(mem_base, mem_size, mem_base); | |||
if (status != SUCCESS) { | |||
GELOGE(RT_FAILED, "Convert virtual address to physical for mem_base failed."); | |||
return v_workspace_data_addr; | |||
} | |||
} | |||
const vector<int64_t> v_workspace_offset = op_desc->GetWorkspace(); | |||
const vector<int64_t> v_workspace_bytes = op_desc->GetWorkspaceBytes(); | |||
@@ -436,13 +466,13 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param | |||
bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, v_memory_type); | |||
for (size_t i = 0; i < v_workspace_bytes.size(); ++i) { | |||
if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { | |||
v_workspace_data_addr.push_back(reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_workspace_offset[i]))); | |||
v_workspace_data_addr.push_back(reinterpret_cast<uint8_t *>(v_workspace_offset[i])); | |||
GELOGI("Fusion: op: %s, GetWorkspaceDataAddrs mem_addr[workspace index %zu]:%p", op_desc->GetName().c_str(), i, | |||
reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_workspace_offset[i]))); | |||
} else { | |||
int64_t workspace_offset = v_workspace_offset[i]; | |||
int64_t workspace_bytes = v_workspace_bytes[i]; | |||
uint8_t *mem_addr = workspace_bytes == 0 ? nullptr : model_param.mem_base + workspace_offset; | |||
uint8_t *mem_addr = workspace_bytes == 0 ? nullptr : mem_base + workspace_offset; | |||
v_workspace_data_addr.push_back(mem_addr); | |||
GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]", | |||
model_param.graph_id, op_desc->GetName().c_str(), i, workspace_offset, workspace_bytes, mem_addr); | |||
@@ -452,32 +482,21 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param | |||
return v_workspace_data_addr; | |||
} | |||
/// | |||
/// @ingroup ge | |||
/// @brief Get runtime memory address. | |||
/// @return Status | |||
/// | |||
Status ModelUtils::GetRtAddress(const RuntimeParam ¶m, uintptr_t logic_addr, uint8_t *&mem_addr) { | |||
uint8_t *runtime_base_addr = nullptr; | |||
if ((param.logic_mem_base <= logic_addr) && (logic_addr < param.logic_mem_base + param.mem_size)) { | |||
runtime_base_addr = param.mem_base - param.logic_mem_base; | |||
GELOGI("The logic addr:0x%lx is data address, base:0x%lx, size:%lu", logic_addr, param.logic_mem_base, | |||
param.mem_size); | |||
} else if ((param.logic_weight_base <= logic_addr) && (logic_addr < param.logic_weight_base + param.weight_size)) { | |||
runtime_base_addr = param.weight_base - param.logic_weight_base; | |||
GELOGI("The logic addr:0x%lx is weight address, base:0x%lx, size:%lu", logic_addr, param.logic_weight_base, | |||
param.weight_size); | |||
} else if ((param.logic_var_base <= logic_addr) && (logic_addr < param.logic_var_base + param.var_size)) { | |||
runtime_base_addr = param.var_base - param.logic_var_base; | |||
GELOGI("The logic addr:0x%lx is variable address, base:0x%lx, size:%lu", logic_addr, param.logic_var_base, | |||
param.var_size); | |||
} else if (logic_addr != 0) { | |||
mem_addr = nullptr; | |||
GELOGE(PARAM_INVALID, "The logic addr:0x%lx is abnormal", logic_addr); | |||
return PARAM_INVALID; | |||
Status ModelUtils::ConvertVirtualAddressToPhysical(uint8_t *virtual_address, uint64_t size, | |||
uint8_t *&physical_address) { | |||
// Indicates whether use physical address. | |||
const char *use_physical_address = std::getenv("GE_USE_PHYSICAL_ADDRESS"); | |||
if (use_physical_address == nullptr || virtual_address == 0 || size == 0) { | |||
return SUCCESS; | |||
} | |||
rtError_t ret = rtKernelConfigTransArg(virtual_address, size, 0, reinterpret_cast<void **>(&physical_address)); | |||
if (ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rtKernelConfigTransArg failed, ret: 0x%X", ret); | |||
return RT_FAILED; | |||
} | |||
mem_addr = runtime_base_addr + logic_addr; | |||
GELOGD("virtual_address=%p, physical_address=%p", virtual_address, physical_address); | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -34,79 +34,78 @@ class ModelUtils { | |||
~ModelUtils() = default; | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get input size. | |||
/// @return vector<uint32_t> | |||
/// | |||
static vector<int64_t> GetInputSize(ConstOpDescPtr op_desc); | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get output size. | |||
/// @return vector<uint32_t> | |||
/// | |||
static vector<int64_t> GetOutputSize(ConstOpDescPtr op_desc); | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get workspace size. | |||
/// @return vector<uint32_t> | |||
/// | |||
static vector<int64_t> GetWorkspaceSize(ConstOpDescPtr op_desc); | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get weight size. | |||
/// @return vector<uint32_t> | |||
/// | |||
static vector<int64_t> GetWeightSize(ConstOpDescPtr op_desc); | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get weights. | |||
/// @return vector<ConstGeTensorPtr> | |||
/// | |||
static vector<ConstGeTensorPtr> GetWeights(ConstOpDescPtr op_desc); | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get AiCpuOp Input descriptor. | |||
/// @return vector<::tagCcAICPUTensor> | |||
/// | |||
static vector<::tagCcAICPUTensor> GetInputDescs(ConstOpDescPtr op_desc); | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get AiCpuOp Output descriptor. | |||
/// @return vector<::tagCcAICPUTensor> | |||
/// | |||
static vector<::tagCcAICPUTensor> GetOutputDescs(ConstOpDescPtr op_desc); | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get input data address. | |||
/// @return vector<void*> | |||
/// | |||
static vector<void *> GetInputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc); | |||
static vector<void *> GetInputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc, | |||
bool need_convert = true); | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get output data address. | |||
/// @return vector<void*> | |||
/// | |||
static vector<void *> GetOutputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc); | |||
static vector<void *> GetOutputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc, | |||
bool need_convert = true); | |||
/// | |||
/// @ingroup ge | |||
/// @ingroup domi_ome | |||
/// @brief Get workspace data address. | |||
/// @return vector<void*> | |||
/// | |||
static vector<void *> GetWorkspaceDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc); | |||
static vector<void *> GetWorkspaceDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc, | |||
bool need_convert = true); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Get memory runtime base. | |||
/// @return Status | |||
/// | |||
static Status GetRtAddress(const RuntimeParam &model_param, uintptr_t logic_addr, uint8_t *&mem_addr); | |||
static ge::Status ConvertVirtualAddressToPhysical(uint8_t *virtual_address, uint64_t size, | |||
uint8_t *&physical_address); | |||
}; | |||
} // namespace ge | |||
@@ -45,7 +45,7 @@ Status EndGraphTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
Status EndGraphTaskInfo::Distribute() { | |||
GELOGI("EndGraphTaskInfo Distribute Start."); | |||
GE_CHECK_NOTNULL(davinci_model_); | |||
auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel(); | |||
auto all_dump_model = PropertiesManager::Instance().GetAllDumpModel(); | |||
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | |||
all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() || | |||
all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) { | |||
@@ -80,4 +80,5 @@ Status EndGraphTaskInfo::Distribute() { | |||
} | |||
REGISTER_TASK_INFO(RT_MODEL_TASK_MODEL_END_GRAPH, EndGraphTaskInfo); | |||
} // namespace ge |
@@ -22,7 +22,7 @@ | |||
namespace ge { | |||
class EndGraphTaskInfo : public TaskInfo { | |||
public: | |||
EndGraphTaskInfo() {} | |||
EndGraphTaskInfo() : model_(0) {} | |||
~EndGraphTaskInfo() override { model_ = nullptr; } | |||
@@ -35,10 +35,10 @@ class EndGraphTaskInfo : public TaskInfo { | |||
uint32_t GetStreamId() override { return stream_id_; } | |||
private: | |||
rtModel_t model_{nullptr}; | |||
DavinciModel *davinci_model_{nullptr}; | |||
uint32_t task_id_{0}; | |||
uint32_t stream_id_{0}; | |||
rtModel_t model_; | |||
DavinciModel *davinci_model_; | |||
uint32_t task_id_; | |||
uint32_t stream_id_; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ |
@@ -42,7 +42,6 @@ HcclTaskInfo::~HcclTaskInfo() { | |||
davinci_model_ = nullptr; | |||
ops_kernel_store_ = nullptr; | |||
max_node_of_hccl_stream_ = 0; | |||
args_ = nullptr; | |||
} | |||
Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
GELOGI("HcclTaskInfo Init Start."); | |||
@@ -61,61 +60,54 @@ Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_m | |||
GELOGI("HcclTaskInfo Init, op_index is: %u", op_index); | |||
// Get HCCL op | |||
op_desc_ = davinci_model->GetOpByIndex(op_index); | |||
GE_CHECK_NOTNULL(op_desc_); | |||
OpDescPtr op_desc = davinci_model->GetOpByIndex(op_index); | |||
GE_CHECK_NOTNULL(op_desc); | |||
// Create the kernel hccl infos | |||
CreateKernelHcclInfo(op_desc_); | |||
CreateKernelHcclInfo(op_desc); | |||
// Initialize the hccl_type of all kernel hccl info | |||
HcomOmeUtil::GetHcclType(task_def, kernel_hccl_infos_); | |||
// Only in Horovod scenario should get the inputName and GeShape | |||
ret = HcomOmeUtil::GetHorovodInputs(op_desc_, kernel_hccl_infos_); | |||
ret = HcomOmeUtil::GetHorovodInputs(op_desc, kernel_hccl_infos_); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "davinci_model: GetHorovodInputs fail! domi error: %u", ret); | |||
return FAILED; | |||
} | |||
Status dmrt = HcomOmeUtil::GetHcclDataType(op_desc_, kernel_hccl_infos_); | |||
Status dmrt = HcomOmeUtil::GetHcclDataType(op_desc, kernel_hccl_infos_); | |||
if (dmrt != SUCCESS) { | |||
GELOGE(FAILED, "davinci_model: GetHcomDataType fail! domi error: %u", dmrt); | |||
return FAILED; | |||
} | |||
dmrt = HcomOmeUtil::GetHcclCount(op_desc_, kernel_hccl_infos_); | |||
dmrt = HcomOmeUtil::GetHcclCount(op_desc, kernel_hccl_infos_); | |||
if (dmrt != SUCCESS) { | |||
GELOGE(FAILED, "davinci_model: GetHcomCount fail! domi error: %u", dmrt); | |||
return FAILED; | |||
} | |||
// Only HCOMBROADCAST and HVDCALLBACKBROADCAST need to get the rootId | |||
dmrt = HcomOmeUtil::GetAllRootId(op_desc_, kernel_hccl_infos_); | |||
dmrt = HcomOmeUtil::GetAllRootId(op_desc, kernel_hccl_infos_); | |||
if (dmrt != SUCCESS) { | |||
GELOGE(FAILED, "davinci_model: Get rootId fail! domi error: %u", dmrt); | |||
return FAILED; | |||
} | |||
// GE's new process: hccl declares the number of streams required, creates a stream by GE, and sends it to hccl | |||
ret = SetFollowStream(op_desc_, davinci_model); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "SetStream Fail."); | |||
return ret; | |||
} | |||
if (davinci_model_->IsKnownNode()) { | |||
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||
GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); | |||
} | |||
ret = SetAddrs(op_desc_, kernel_hccl_infos_); | |||
ret = SetAddrs(op_desc, kernel_hccl_infos_); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Setaddrs Fail."); | |||
return ret; | |||
} | |||
// GE's new process: hccl declares the need for Workspace size, and GE allocates Workspace | |||
ret = SetWorkspace(op_desc_, kernel_hccl_infos_); | |||
ret = SetWorkspace(op_desc, kernel_hccl_infos_); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "SetWorkspace Fail."); | |||
return ret; | |||
} | |||
// GE's new process: hccl declares the number of streams required, creates a stream by GE, and sends it to hccl | |||
ret = SetFollowStream(op_desc, davinci_model); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "SetStream Fail."); | |||
return ret; | |||
} | |||
GELOGI("HcclTaskInfo Init Success"); | |||
return SUCCESS; | |||
@@ -217,83 +209,40 @@ Status HcclTaskInfo::Distribute() { | |||
GELOGI("HcclTaskInfo Distribute Success."); | |||
return SUCCESS; | |||
} | |||
Status HcclTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
GE_CHECK_NOTNULL(davinci_model); | |||
auto hccl_def = task_def.kernel_hccl(); | |||
uint32_t op_index = hccl_def.op_index(); | |||
GELOGI("HcclTaskInfo Init, op_index is: %u", op_index); | |||
// Get HCCL op | |||
auto op_desc = davinci_model->GetOpByIndex(op_index); | |||
GE_CHECK_NOTNULL(op_desc); | |||
GELOGI("Calc opType[%s] args size. Node name is [%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str()); | |||
// Only need the number of addr to allocate args memory | |||
auto input_size = op_desc->GetInputsSize(); | |||
auto output_size = op_desc->GetOutputsSize(); | |||
auto workspace_size = op_desc->GetWorkspaceBytes().size(); | |||
uint32_t args_size = sizeof(void *) * (input_size + output_size + workspace_size); | |||
args_offset_ = davinci_model->GetTotalArgsSize(); | |||
davinci_model->SetTotalArgsSize(args_size); | |||
GELOGI("Calculate hccl task args , args_size %u, args_offset %u", args_size, args_offset_); | |||
return SUCCESS; | |||
} | |||
Status HcclTaskInfo::UpdateArgs() { | |||
GELOGI("HcclTaskInfo::UpdateArgs in."); | |||
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||
input_data_addrs_ = ModelUtils::GetInputDataAddrs(rts_param, op_desc_); | |||
output_data_addrs_ = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_); | |||
workspace_data_addrs_ = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); | |||
vector<void *> io_addrs; | |||
io_addrs.insert(io_addrs.end(), input_data_addrs_.begin(), input_data_addrs_.end()); | |||
io_addrs.insert(io_addrs.end(), output_data_addrs_.begin(), output_data_addrs_.end()); | |||
io_addrs.insert(io_addrs.end(), workspace_data_addrs_.begin(), workspace_data_addrs_.end()); | |||
GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), | |||
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str()); | |||
GELOGI("HcclTaskInfo::UpdateArgs success."); | |||
return SUCCESS; | |||
} | |||
Status HcclTaskInfo::SetAddrs(const std::shared_ptr<OpDesc> &op_desc, | |||
std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos) { | |||
GE_CHECK_NOTNULL(op_desc); | |||
GE_CHK_STATUS_RET(HcomOmeUtil::CheckKernelHcclInfo(op_desc, kernel_hccl_infos), | |||
"HcomOmeUtil:: the number of GETaskKernelHcclInfo is invalid."); | |||
if (HcomOmeUtil::CheckKernelHcclInfo(op_desc, kernel_hccl_infos) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "HcomOmeUtil:: the number of GETaskKernelHcclInfo is invalid."); | |||
return PARAM_INVALID; | |||
} | |||
GELOGI("Set hccl task input output address, node[%s}, type[%s] kernel_hccl_infos.size[%zu].", | |||
op_desc->GetName().c_str(), op_desc->GetType().c_str(), kernel_hccl_infos.size()); | |||
if (op_desc->GetType() == HVDWAIT) { | |||
return SUCCESS; | |||
} | |||
domi::Status dmrt; | |||
hcclRedOp_t op_type = HCCL_REP_OP_SUM; | |||
GE_CHECK_NOTNULL(davinci_model_); | |||
GELOGI("Calc opType[%s] input address before. Node name[%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str()); | |||
if (!davinci_model_->IsKnownNode()) { | |||
input_data_addrs_ = ModelUtils::GetInputDataAddrs(davinci_model_->GetRuntimeParam(), op_desc); | |||
output_data_addrs_ = ModelUtils::GetOutputDataAddrs(davinci_model_->GetRuntimeParam(), op_desc); | |||
} | |||
void *input_data_addr = nullptr; | |||
void *output_data_addr = nullptr; | |||
auto input_data_addr_list = ModelUtils::GetInputDataAddrs(davinci_model_->GetRuntimeParam(), op_desc); | |||
auto output_data_addr_list = ModelUtils::GetOutputDataAddrs(davinci_model_->GetRuntimeParam(), op_desc); | |||
// initialize every kernel_hccl_info inputDataAddr | |||
for (size_t i = 0; i < kernel_hccl_infos.size(); i++) { | |||
std::string hccl_type = kernel_hccl_infos[i].hccl_type; | |||
if (davinci_model_->IsKnownNode()) { | |||
input_data_addr = reinterpret_cast<void *>(reinterpret_cast<uint64_t *>(args_) + i); | |||
output_data_addr = reinterpret_cast<void *>(reinterpret_cast<uint64_t *>(args_) + op_desc->GetInputsSize() + i); | |||
GELOGI("Hccl task info known input addr %p, output addr %p.", input_data_addr, output_data_addr); | |||
} else { | |||
input_data_addr = input_data_addrs_.empty() ? nullptr : input_data_addrs_[i]; | |||
output_data_addr = output_data_addrs_.empty() ? nullptr : output_data_addrs_[i]; | |||
} | |||
void *input_data_addr = input_data_addr_list.empty() ? nullptr : input_data_addr_list[i]; | |||
kernel_hccl_infos[i].inputDataAddr = input_data_addr; | |||
void *output_data_addr = output_data_addr_list.empty() ? nullptr : output_data_addr_list[i]; | |||
if (hccl_type == HCOMALLGATHER || hccl_type == HCOMRECEIVE || hccl_type == HVDCALLBACKALLGATHER) { | |||
kernel_hccl_infos[i].outputDataAddr = output_data_addr; | |||
} else if (hccl_type == HCOMALLREDUCE || hccl_type == HCOMREDUCESCATTER || hccl_type == HVDCALLBACKALLREDUCE) { | |||
GE_CHK_STATUS_RET(HcomOmeUtil::GetHcclOperationType(op_desc, op_type), | |||
"davinci_model: GetHcomOperationType fail!"); | |||
dmrt = HcomOmeUtil::GetHcclOperationType(op_desc, op_type); | |||
if (dmrt != SUCCESS) { | |||
GELOGE(FAILED, "davinci_model: GetHcomOperationType fail! domi error: %u", dmrt); | |||
return FAILED; | |||
} | |||
kernel_hccl_infos[i].outputDataAddr = output_data_addr; | |||
kernel_hccl_infos[i].opType = op_type; | |||
} | |||
@@ -361,7 +310,6 @@ void HcclTaskInfo::CreateKernelHcclInfo(const ge::ConstOpDescPtr &op_desc) { | |||
Status HcclTaskInfo::SetWorkspace(const std::shared_ptr<OpDesc> &op_desc, | |||
std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos) { | |||
GE_CHECK_NOTNULL(op_desc); | |||
GE_CHECK_NOTNULL(davinci_model_); | |||
GELOGI("SetWorkspace Node[%s] opType[%s] set workspace.", op_desc->GetName().c_str(), op_desc->GetType().c_str()); | |||
uint64_t workspace_mem_size = 0; | |||
void *workspace_addr = nullptr; | |||
@@ -371,12 +319,11 @@ Status HcclTaskInfo::SetWorkspace(const std::shared_ptr<OpDesc> &op_desc, | |||
GELOGI("hccl need workSpaceMemSize=%lu", workspace_mem_size_tmp); | |||
if (workspace_mem_size_tmp != 0) { | |||
workspace_mem_size = workspace_mem_size_tmp; | |||
if (davinci_model_->IsKnownNode()) { | |||
workspace_addr = reinterpret_cast<void *>(reinterpret_cast<uint64_t *>(args_) + op_desc->GetInputsSize() + | |||
op_desc->GetOutputsSize()); | |||
} else { | |||
workspace_data_addrs_ = ModelUtils::GetWorkspaceDataAddrs(davinci_model_->GetRuntimeParam(), op_desc); | |||
workspace_addr = workspace_data_addrs_.empty() ? nullptr : workspace_data_addrs_[0]; | |||
vector<void *> workspace_data_addrs = | |||
ModelUtils::GetWorkspaceDataAddrs(davinci_model_->GetRuntimeParam(), op_desc); | |||
if (!workspace_data_addrs.empty()) { | |||
GELOGI("Get workSpaceAddr"); | |||
workspace_addr = workspace_data_addrs[0]; | |||
} | |||
} | |||
} | |||
@@ -34,10 +34,7 @@ class HcclTaskInfo : public TaskInfo { | |||
hccl_stream_list_(), | |||
ops_kernel_store_(nullptr), | |||
private_def_(nullptr), | |||
private_def_len_(0), | |||
op_desc_(nullptr), | |||
args_(nullptr), | |||
args_offset_(0) {} | |||
private_def_len_(0) {} | |||
~HcclTaskInfo() override; | |||
@@ -47,10 +44,6 @@ class HcclTaskInfo : public TaskInfo { | |||
uint32_t GetTaskID() override { return id_; } | |||
Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | |||
Status UpdateArgs() override; | |||
private: | |||
ge::Status SetAddrs(const std::string &hccl_type, const std::shared_ptr<OpDesc> &op); | |||
@@ -79,12 +72,6 @@ class HcclTaskInfo : public TaskInfo { | |||
static std::mutex hccl_follow_stream_mutex_; | |||
static uint32_t max_node_of_hccl_stream_; | |||
vector<GETaskKernelHcclInfo> kernel_hccl_infos_; | |||
vector<void *> input_data_addrs_; | |||
vector<void *> output_data_addrs_; | |||
vector<void *> workspace_data_addrs_; | |||
OpDescPtr op_desc_; | |||
void *args_; | |||
uint32_t args_offset_; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_HCCL_TASK_INFO_H_ |
@@ -79,9 +79,6 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
return FAILED;) | |||
} | |||
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc_->GetName().c_str(), | |||
op_desc_->GetType().c_str(), ext_info.size(), ext_info_addr_); | |||
// 2.1 get loop cond variable for tensor array write | |||
uint64_t step_id_addr = 0; | |||
OpDescPtr step_id_node = davinci_model_->GetVariableOp(NODE_NAME_GLOBAL_STEP); | |||
@@ -100,11 +97,6 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(), kernel_id) != SUCCESS, | |||
GELOGE(FAILED, "CreateAicpuKernel error."); | |||
return FAILED;) | |||
// 2.3 Create session | |||
GE_CHECK_NOTNULL(ModelManager::GetInstance()); | |||
GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuSession(session_id) != SUCCESS, | |||
GELOGE(FAILED, "CreateAicpuSession error. session id: %lu", session_id); | |||
return FAILED;) | |||
kernel_buf_size_ = sizeof(STR_FWK_OP_KERNEL); | |||
if (davinci_model_->IsKnownNode()) { | |||
@@ -161,8 +153,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); | |||
return FAILED;) | |||
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
op_desc->GetName())) { | |||
if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
op_desc->GetName())) { | |||
dump_flag_ = RT_KERNEL_DUMPFLAG; | |||
dump_args_ = input_output_addr_; | |||
} | |||
@@ -175,7 +167,12 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = ext_info.size(); | |||
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_addr_); | |||
// 4. Return result | |||
// 4. Create session | |||
GE_CHECK_NOTNULL(ModelManager::GetInstance()); | |||
GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuSession(session_id) != SUCCESS, | |||
GELOGE(FAILED, "CreateAicpuSession error. session id: %lu", session_id); | |||
return FAILED;) | |||
// 5. Return result | |||
rtError_t rt_ret = rtMalloc(&kernel_buf_, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;) | |||
@@ -183,7 +180,12 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) | |||
davinci_model_->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), input_output_addr_, addrs_size, 0); | |||
vector<void *> virtual_io_addrs; // use virtual address for zero copy key. | |||
const vector<void *> virtual_in_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc, false); | |||
const vector<void *> virtual_out_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc, false); | |||
virtual_io_addrs.insert(virtual_io_addrs.end(), virtual_in_addrs.begin(), virtual_in_addrs.end()); | |||
virtual_io_addrs.insert(virtual_io_addrs.end(), virtual_out_addrs.begin(), virtual_out_addrs.end()); | |||
davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, io_addrs.data(), input_output_addr_, addrs_size, 0); | |||
GELOGI("KernelExTaskInfo Init Success. session id: %lu", session_id); | |||
return SUCCESS; | |||
@@ -205,55 +207,19 @@ Status KernelExTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciMod | |||
uint32_t mem_size = sizeof(uint64_t) * mem_length; | |||
davinci_model->SetTotalArgsSize(mem_size); | |||
GELOGI("kernel task name %s, args_size %u, args_offset %u", op_desc->GetName().c_str(), mem_size, args_offset_); | |||
// alloc fixed addr | |||
string peer_input_name; | |||
if (AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { | |||
uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); | |||
if (output_index > outputs_size) { | |||
GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", outputs_size, output_index); | |||
return FAILED; | |||
} | |||
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); | |||
auto tensor_desc = op_desc->GetOutputDesc(output_index); | |||
int64_t tensor_size = 0; | |||
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||
davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); | |||
GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size, | |||
fixed_addr_offset_); | |||
} | |||
return SUCCESS; | |||
} | |||
Status KernelExTaskInfo::UpdateArgs() { | |||
GELOGI("KernelExTaskInfo::UpdateArgs in."); | |||
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||
vector<void *> io_addrs; | |||
vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_); | |||
vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_); | |||
vector<void *> io_addrs; | |||
if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { | |||
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||
} else { | |||
string peer_input_name; | |||
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) { | |||
uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name); | |||
if (output_index > output_data_addrs.size()) { | |||
GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.", | |||
output_data_addrs.size(), output_index); | |||
return FAILED; | |||
} | |||
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
for (size_t i = 0; i < output_data_addrs.size(); ++i) { | |||
if (i == output_index) { | |||
void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); | |||
io_addrs.emplace_back(fixed_addr); | |||
continue; | |||
} | |||
io_addrs.emplace_back(output_data_addrs[i]); | |||
} | |||
} | |||
} | |||
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||
GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), | |||
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str()); | |||
@@ -265,7 +231,7 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const | |||
const OpDescPtr &op_desc) { | |||
// Userspace copy need virtual address. | |||
const vector<int64_t> workspace_data_sizes = ModelUtils::GetWorkspaceSize(op_desc); | |||
const vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc); | |||
const vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc, false); | |||
if (workspace_data_addrs.empty() || workspace_data_sizes.empty()) { | |||
GELOGE(FAILED, "Node:%s invalid workspace, addrs is %zu, size is %zu.", op_desc->GetName().c_str(), | |||
workspace_data_addrs.size(), workspace_data_sizes.size()); | |||
@@ -54,7 +54,6 @@ class KernelExTaskInfo : public TaskInfo { | |||
auto ret = reinterpret_cast<uintptr_t>(dump_args_); | |||
return ret; | |||
} | |||
bool CallSaveDumpInfo() override { return true; }; | |||
private: | |||
Status CopyTaskInfo(const domi::KernelExDef &kernel_def, const RuntimeParam &rts_param, const OpDescPtr &op_desc); | |||
@@ -70,7 +69,6 @@ class KernelExTaskInfo : public TaskInfo { | |||
void *dump_args_; | |||
OpDescPtr op_desc_ = nullptr; | |||
uint32_t args_offset_ = 0; | |||
int64_t fixed_addr_offset_ = 0; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ |
@@ -47,16 +47,16 @@ const uint32_t kAddrLen = sizeof(void *); | |||
namespace ge { | |||
KernelTaskInfo::SuperKernelTaskInfo KernelTaskInfo::skt_info_ = { | |||
0, 0, 0, 0, nullptr, nullptr, {}, {}, {}, {}, {}, RT_KERNEL_DEFAULT, kInvalidGroupKey, 0, nullptr}; | |||
0, 0, 0, 0, nullptr, nullptr, {}, {}, RT_KERNEL_DEFAULT, kInvalidGroupKey, 0, nullptr}; | |||
Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
if (davinci_model == nullptr) { | |||
GELOGE(PARAM_INVALID, "davinci model is null!"); | |||
GELOGE(PARAM_INVALID, "davinci_model is null!"); | |||
return PARAM_INVALID; | |||
} | |||
davinci_model_ = davinci_model; | |||
is_l1_fusion_enable_ = davinci_model_->GetL1FusionEnableOption(); | |||
GELOGD("KernelTaskInfo init start, ge.enableL1Fusion in davinci model is %d.", is_l1_fusion_enable_); | |||
GELOGD("KernelTaskInfo Init Start, ge.enableL1Fusion in davinci model is %d.", is_l1_fusion_enable_); | |||
Status ret = SetStream(task_def.stream_id(), davinci_model_->GetStreamList()); | |||
if (ret != SUCCESS) { | |||
@@ -73,7 +73,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||
// get opdesc | |||
op_desc_ = davinci_model_->GetOpByIndex(context.op_index()); | |||
if (op_desc_ == nullptr) { | |||
GELOGE(INTERNAL_ERROR, "Get op desc failed, index is out of range!"); | |||
GELOGE(INTERNAL_ERROR, "Get op_desc failed, index is out of range!"); | |||
return INTERNAL_ERROR; | |||
} | |||
(void)AttrUtils::GetBool(*op_desc_, ATTR_N_BATCH_SPILT, is_n_batch_spilt_); | |||
@@ -138,21 +138,14 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||
ret = InitCceTask(kernel_def); | |||
} | |||
GELOGD("KernelTaskInfo init finish, result=%u.", ret); | |||
GELOGD("KernelTaskInfo Init finish, result=%u.", ret); | |||
return ret; | |||
} | |||
Status KernelTaskInfo::SaveSKTDumpInfo() { | |||
GE_CHECK_NOTNULL(davinci_model_); | |||
if (skt_dump_flag_ == RT_KERNEL_DEFAULT) { | |||
GELOGD("no need save skt dump info"); | |||
return SUCCESS; | |||
} | |||
// all op in super kernel share one taskid and streamid | |||
for (size_t i = 0; i < skt_info_.op_desc_list.size(); i++) { | |||
davinci_model_->SaveDumpTask(skt_info_.last_task_id, skt_info_.last_stream_id, skt_info_.op_desc_list[i], | |||
skt_info_.dump_args_list[i]); | |||
} | |||
davinci_model_->SaveDumpTask(skt_info_.last_task_id, skt_info_.last_stream_id, skt_info_.last_op, | |||
skt_info_.last_dump_args); | |||
return SUCCESS; | |||
} | |||
@@ -194,9 +187,6 @@ Status KernelTaskInfo::SKTFinalize() { | |||
GELOGI("SuperKernel Distribute [skt_id:%u]", skt_id_); | |||
skt_info_.kernel_list.clear(); | |||
skt_info_.arg_list.clear(); | |||
skt_info_.dump_flag_list.clear(); | |||
skt_info_.op_desc_list.clear(); | |||
skt_info_.dump_args_list.clear(); | |||
skt_info_.last_stream = nullptr; | |||
skt_info_.last_block_dim = 0; | |||
skt_info_.last_sm_desc = sm_desc_; | |||
@@ -207,15 +197,6 @@ Status KernelTaskInfo::SKTFinalize() { | |||
return SUCCESS; | |||
} | |||
uint32_t KernelTaskInfo::GetDumpFlag() { | |||
for (auto flag : skt_info_.dump_flag_list) { | |||
if (flag == RT_KERNEL_DUMPFLAG) { | |||
return RT_KERNEL_DUMPFLAG; | |||
} | |||
} | |||
return RT_KERNEL_DEFAULT; | |||
} | |||
Status KernelTaskInfo::SuperKernelLaunch() { | |||
if (skt_info_.kernel_list.empty()) { | |||
GELOGI("SuperKernelLaunch: Skt_kernel_list has no task, just return"); | |||
@@ -225,7 +206,7 @@ Status KernelTaskInfo::SuperKernelLaunch() { | |||
auto &skt_kernel_list = skt_info_.kernel_list; | |||
auto &skt_arg_list = skt_info_.arg_list; | |||
GELOGI("SuperKernelLaunch: Skt_kernel_list size[%d] skt_arg_list[%d]", skt_kernel_list.size(), skt_arg_list.size()); | |||
if (skt_kernel_list.size() == kSKTSingleSize && skt_arg_list.size() == kSKTSingleSize) { | |||
if (skt_kernel_list.size() == kSKTSingleSize) { | |||
rt_ret = rtKernelLaunchWithFlag(skt_info_.kernel_list[0], static_cast<uint32_t>(skt_info_.last_block_dim), | |||
skt_info_.arg_list[0], skt_info_.last_args_size, | |||
static_cast<rtSmDesc_t *>(skt_info_.last_sm_desc), skt_info_.last_stream, | |||
@@ -234,7 +215,6 @@ Status KernelTaskInfo::SuperKernelLaunch() { | |||
GELOGE(RT_FAILED, "SuperKernelLaunch: Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
} | |||
call_save_dump_ = true; | |||
GE_CHK_STATUS_RET(SKTFinalize(), "Skt finalize failed"); | |||
return SUCCESS; | |||
} | |||
@@ -246,22 +226,18 @@ Status KernelTaskInfo::SuperKernelLaunch() { | |||
return RT_FAILED; | |||
} | |||
// Call the fuse API | |||
std::unique_ptr<skt::SuperKernel> superKernel = nullptr; | |||
skt::SuperKernel *superKernel = nullptr; | |||
if (factory->FuseKernels(skt_kernel_list, skt_arg_list, skt_info_.last_block_dim, superKernel) != SUCCESS) { | |||
GELOGE(RT_FAILED, "SuperKernelLaunch: fuse call failed"); | |||
return RT_FAILED; | |||
} | |||
// Launch a super kernel | |||
skt_dump_flag_ = GetDumpFlag(); | |||
if (superKernel->Launch(skt_info_.last_stream, skt_dump_flag_) != SUCCESS) { | |||
if (superKernel->Launch(skt_info_.last_stream, RT_KERNEL_DUMPFLAG) != SUCCESS) { | |||
GELOGE(RT_FAILED, "SuperKernelLaunch: launch failed"); | |||
return RT_FAILED; | |||
} | |||
GELOGI("SuperKernelLaunch: success[skt_kernel_list size[%zu] skt_arg_list[%zu]]", skt_kernel_list.size(), | |||
skt_arg_list.size()); | |||
// record skt addr for release | |||
superkernel_dev_nav_table_ = superKernel->GetNavTablePtr(); | |||
superkernel_device_args_addr_ = superKernel->GetDeviceArgsPtr(); | |||
GE_CHK_STATUS_RET(SKTFinalize(), "Skt finalize failed"); | |||
return SUCCESS; | |||
} | |||
@@ -274,9 +250,6 @@ Status KernelTaskInfo::SaveSuperKernelInfo() { | |||
skt_info_.last_args_size = args_size_; | |||
skt_info_.last_sm_desc = sm_desc_; | |||
skt_info_.last_dump_flag = dump_flag_; | |||
skt_info_.dump_flag_list.push_back(dump_flag_); | |||
skt_info_.op_desc_list.push_back(op_desc_); | |||
skt_info_.dump_args_list.push_back(reinterpret_cast<uintptr_t>(dump_args_)); | |||
skt_info_.last_group_key = group_key_; | |||
skt_info_.last_dump_args = reinterpret_cast<uintptr_t>(dump_args_); | |||
skt_info_.last_op = op_desc_; | |||
@@ -355,7 +328,6 @@ Status KernelTaskInfo::SuperKernelDistribute() { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return FAILED; | |||
} | |||
call_save_dump_ = true; | |||
UpdateTaskId(); | |||
GELOGI("Current Common Task Distribute [taskid:%u]", task_id_); | |||
} else { | |||
@@ -384,7 +356,6 @@ Status KernelTaskInfo::Distribute() { | |||
rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name_.c_str()), | |||
reinterpret_cast<const void *>(kernel_name_.c_str()), 1, args_, args_size_, | |||
nullptr, stream_, dump_flag_); | |||
call_save_dump_ = true; | |||
} else { | |||
/* default: not skt launch */ | |||
GELOGI( | |||
@@ -398,7 +369,6 @@ Status KernelTaskInfo::Distribute() { | |||
// call rtKernelLaunch for current task | |||
rt_ret = rtKernelLaunchWithFlag(stub_func_, block_dim_, args_, args_size_, static_cast<rtSmDesc_t *>(sm_desc_), | |||
stream_, dump_flag_); | |||
call_save_dump_ = true; | |||
} | |||
} | |||
if (rt_ret != RT_ERROR_NONE) { | |||
@@ -422,31 +392,9 @@ Status KernelTaskInfo::UpdateArgs() { | |||
vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); | |||
vector<void *> io_addrs; | |||
if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { | |||
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | |||
} else { | |||
string peer_input_name; | |||
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) { | |||
uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name); | |||
if (output_index > output_data_addrs.size()) { | |||
GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.", | |||
output_data_addrs.size(), output_index); | |||
return FAILED; | |||
} | |||
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
for (size_t i = 0; i < output_data_addrs.size(); ++i) { | |||
if (i == output_index) { | |||
void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); | |||
io_addrs.emplace_back(fixed_addr); | |||
continue; | |||
} | |||
io_addrs.emplace_back(output_data_addrs[i]); | |||
} | |||
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | |||
} | |||
} | |||
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | |||
GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), | |||
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str()); | |||
@@ -460,8 +408,6 @@ Status KernelTaskInfo::Release() { | |||
return SUCCESS; | |||
} | |||
FreeRtMem(&args_); | |||
FreeRtMem(&superkernel_device_args_addr_); | |||
FreeRtMem(&superkernel_dev_nav_table_); | |||
FreeRtMem(&flowtable_); | |||
FreeRtMem(&custom_info_.input_descs); | |||
FreeRtMem(&custom_info_.input_addrs); | |||
@@ -526,29 +472,6 @@ Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel | |||
args_offset_ = davinci_model->GetTotalArgsSize(); | |||
davinci_model->SetTotalArgsSize(args_size); | |||
GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||
// get opcontext stored in model | |||
const domi::KernelContext &context = kernel_def.context(); | |||
// get opdesc | |||
op_desc_ = davinci_model->GetOpByIndex(context.op_index()); | |||
GE_CHECK_NOTNULL(op_desc_); | |||
// alloc fixed addr | |||
string peer_input_name; | |||
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { | |||
uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); | |||
if (output_index > op_desc_->GetOutputsSize()) { | |||
GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(), | |||
output_index); | |||
return FAILED; | |||
} | |||
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); | |||
auto tensor_desc = op_desc_->GetOutputDesc(output_index); | |||
int64_t tensor_size = 0; | |||
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||
davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); | |||
GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size, | |||
fixed_addr_offset_); | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -626,8 +549,8 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
return FAILED; | |||
} | |||
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
op_desc->GetName())) { | |||
if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
op_desc->GetName())) { | |||
dump_flag_ = RT_KERNEL_DUMPFLAG; | |||
dump_args_ = static_cast<char *>(args_) + offset; | |||
} | |||
@@ -638,8 +561,10 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
} | |||
vector<void *> virtual_io_addrs; // use virtual address for zero copy key. | |||
virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||
virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||
const vector<void *> virtual_in_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc, false); | |||
const vector<void *> virtual_out_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc, false); | |||
virtual_io_addrs.insert(virtual_io_addrs.end(), virtual_in_addrs.begin(), virtual_in_addrs.end()); | |||
virtual_io_addrs.insert(virtual_io_addrs.end(), virtual_out_addrs.begin(), virtual_out_addrs.end()); | |||
davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_info.data(), args_, args_size_, offset); | |||
GELOGD("Do InitTVMTask end"); | |||
@@ -677,6 +602,7 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel | |||
const std::vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); | |||
Status ret = StoreInputOutputTensor(input_data_addrs, output_data_addrs, ModelUtils::GetInputDescs(op_desc), | |||
ModelUtils::GetOutputDescs(op_desc)); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "StoreInputOutputTensor Failed"); | |||
return ret; | |||
@@ -741,9 +667,11 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel | |||
return RT_FAILED; | |||
} | |||
davinci_model_->SetZeroCopyAddr(op_desc, input_data_addrs, input_data_addrs.data(), custom_info_.input_addrs, | |||
input_data_addrs.size() * kAddrLen, 0); | |||
davinci_model_->SetZeroCopyAddr(op_desc, output_data_addrs, output_data_addrs.data(), custom_info_.output_addrs, | |||
const vector<void *> virtual_in_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc, false); | |||
const vector<void *> virtual_out_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc, false); | |||
davinci_model_->SetZeroCopyAddr(op_desc, virtual_in_addrs, input_data_addrs.data(), custom_info_.input_addrs, | |||
virtual_in_addrs.size() * kAddrLen, 0); | |||
davinci_model_->SetZeroCopyAddr(op_desc, virtual_out_addrs, output_data_addrs.data(), custom_info_.output_addrs, | |||
output_data_addrs.size() * kAddrLen, 0); | |||
return SUCCESS; | |||
} | |||
@@ -873,9 +801,6 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); | |||
return init_ret; | |||
} | |||
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), | |||
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); | |||
aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | |||
aicpu_param_head->extInfoLength = reinterpret_cast<uintptr_t>(ext_info.size()); | |||
@@ -894,13 +819,19 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
return RT_FAILED; | |||
} | |||
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
op_desc->GetName())) { | |||
if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
op_desc->GetName())) { | |||
dump_flag_ = RT_KERNEL_DUMPFLAG; | |||
dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead); | |||
} | |||
davinci_model_->SetZeroCopyAddr(op_desc, io_addrs, args_addr.get(), args_, args_size_, sizeof(aicpu::AicpuParamHead)); | |||
vector<void *> virtual_io_addrs; // use virtual address for zero copy key. | |||
const vector<void *> virtual_in_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc, false); | |||
const vector<void *> virtual_out_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc, false); | |||
virtual_io_addrs.insert(virtual_io_addrs.end(), virtual_in_addrs.begin(), virtual_in_addrs.end()); | |||
virtual_io_addrs.insert(virtual_io_addrs.end(), virtual_out_addrs.begin(), virtual_out_addrs.end()); | |||
davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_addr.get(), args_, args_size_, | |||
sizeof(aicpu::AicpuParamHead)); | |||
return SUCCESS; | |||
} | |||
@@ -61,8 +61,6 @@ class KernelTaskInfo : public TaskInfo { | |||
sm_desc_ = nullptr; | |||
flowtable_ = nullptr; | |||
args_ = nullptr; | |||
superkernel_device_args_addr_ = nullptr; | |||
superkernel_dev_nav_table_ = nullptr; | |||
} | |||
Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | |||
@@ -90,8 +88,6 @@ class KernelTaskInfo : public TaskInfo { | |||
uint32_t GetSktTaskID() override { return skt_id_; } | |||
bool CallSaveDumpInfo() override { return call_save_dump_; }; | |||
cce::ccOpContext ctx_; | |||
FusionOpInfo fusion_op_info_; | |||
@@ -134,7 +130,6 @@ class KernelTaskInfo : public TaskInfo { | |||
void UpdateSKTTaskId(); | |||
Status SKTFinalize(); | |||
Status SuperKernelLaunch(); | |||
uint32_t GetDumpFlag(); | |||
Status SaveSuperKernelInfo(); | |||
bool IsMarkedLastNode(); | |||
bool IsMarkedFirstNode(); | |||
@@ -158,8 +153,6 @@ class KernelTaskInfo : public TaskInfo { | |||
OpDescPtr op_desc_; | |||
DavinciModel *davinci_model_; | |||
uint32_t args_offset_ = 0; | |||
int64_t fixed_addr_offset_ = 0; | |||
bool call_save_dump_ = false; | |||
// aicpu ext_info device mem | |||
void *aicpu_ext_info_addr_ = nullptr; | |||
@@ -171,9 +164,6 @@ class KernelTaskInfo : public TaskInfo { | |||
bool is_n_batch_spilt_; | |||
int64_t group_key_; | |||
bool has_group_key_; | |||
uint32_t skt_dump_flag_ = RT_KERNEL_DEFAULT; | |||
void *superkernel_device_args_addr_ = nullptr; | |||
void *superkernel_dev_nav_table_ = nullptr; | |||
struct AICPUCustomInfo { | |||
void *input_descs = nullptr; | |||
@@ -193,9 +183,6 @@ class KernelTaskInfo : public TaskInfo { | |||
void *last_sm_desc; | |||
std::vector<void *> kernel_list; | |||
std::vector<void *> arg_list; | |||
std::vector<uint32_t> dump_flag_list; | |||
std::vector<OpDescPtr> op_desc_list; | |||
std::vector<uintptr_t> dump_args_list; | |||
uint32_t last_dump_flag; | |||
int64_t last_group_key; | |||
uintptr_t last_dump_args; | |||
@@ -16,8 +16,8 @@ | |||
#include "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/load/new_model_manager/davinci_model.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
namespace ge { | |||
constexpr uint8_t kLabelSwitchIndexNum = 1; | |||
@@ -59,13 +59,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||
op_desc->GetName().c_str(), input_data_addr.size(), kLabelSwitchIndexNum); | |||
return INTERNAL_ERROR; | |||
} | |||
if (davinci_model->IsKnownNode()) { | |||
index_value_ = davinci_model->GetCurrentFixedAddr(fixed_addr_offset_); | |||
} else { | |||
index_value_ = input_data_addr[0]; | |||
} | |||
index_value_ = input_data_addr[0]; | |||
davinci_model->DisableZeroCopy(index_value_); | |||
std::vector<uint32_t> label_idx_list; | |||
@@ -130,28 +124,5 @@ Status LabelSwitchByIndexTaskInfo::Distribute() { | |||
return SUCCESS; | |||
} | |||
Status LabelSwitchByIndexTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
GE_CHECK_NOTNULL(davinci_model); | |||
auto label_switch = task_def.label_switch_by_index(); | |||
uint32_t op_index = label_switch.op_index(); | |||
GELOGI("Begin to calculate args, op_index is: %u", op_index); | |||
auto op_desc = davinci_model->GetOpByIndex(op_index); | |||
GE_CHECK_NOTNULL(op_desc); | |||
GELOGI("Calc opType[%s] args size. Node name is [%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str()); | |||
if (op_desc->GetInputsSize() != kLabelSwitchIndexNum) { | |||
GELOGE(FAILED, "Label switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize()); | |||
return FAILED; | |||
} | |||
string input_tensor_name = op_desc->GetInputNameByIndex(0); | |||
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(input_tensor_name); | |||
auto tensor_desc = op_desc->GetInputDesc(0); | |||
int64_t tensor_size = 0; | |||
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||
davinci_model->SetTotalFixedAddrsSize(input_tensor_name, tensor_size); | |||
GELOGI("Calculate stream switchn task args , tensor_size %ld, fixed_addr_offset %ld", tensor_size, | |||
fixed_addr_offset_); | |||
return SUCCESS; | |||
} | |||
REGISTER_TASK_INFO(RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, LabelSwitchByIndexTaskInfo); | |||
} // namespace ge |
@@ -22,8 +22,7 @@ | |||
namespace ge { | |||
class LabelSwitchByIndexTaskInfo : public TaskInfo { | |||
public: | |||
LabelSwitchByIndexTaskInfo() | |||
: index_value_(nullptr), branch_max_(0), args_(nullptr), args_size_(0), fixed_addr_offset_(0) {} | |||
LabelSwitchByIndexTaskInfo() : index_value_(nullptr), branch_max_(0), args_(nullptr), args_size_(0) {} | |||
~LabelSwitchByIndexTaskInfo() override; | |||
@@ -31,15 +30,13 @@ class LabelSwitchByIndexTaskInfo : public TaskInfo { | |||
Status Distribute() override; | |||
Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | |||
private: | |||
void *index_value_; // switch index input. | |||
uint32_t branch_max_; // max branch count. | |||
void *args_; // label info memory. | |||
uint32_t args_size_; // label info length. | |||
std::vector<rtLabel_t> label_list_; | |||
int64_t fixed_addr_offset_; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ |
@@ -21,9 +21,9 @@ | |||
namespace ge { | |||
Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
GELOGI("MemcpyAddrAsyncTaskInfo Init Start"); | |||
GELOGI("MemcpyAddrAsyncTaskInfo Init Start."); | |||
if (davinci_model == nullptr) { | |||
GELOGE(PARAM_INVALID, "davinci_model is null"); | |||
GELOGE(PARAM_INVALID, "davinci_model is null!"); | |||
return PARAM_INVALID; | |||
} | |||
@@ -32,27 +32,45 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel | |||
return ret; | |||
} | |||
const auto &memcpy_async = task_def.memcpy_async(); | |||
OpDescPtr op_desc = davinci_model->GetOpByIndex(memcpy_async.op_index()); | |||
auto memcpy_async_def = task_def.memcpy_async(); | |||
uint32_t op_index = memcpy_async_def.op_index(); | |||
OpDescPtr op_desc = davinci_model->GetOpByIndex(op_index); | |||
if (op_desc == nullptr) { | |||
GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async.op_index()); | |||
GELOGE(INTERNAL_ERROR, "Init MemcpyAddrAsyncTaskInfo error, index is out of range!"); | |||
return INTERNAL_ERROR; | |||
} | |||
ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.src(), src_); | |||
uint64_t logic_dst = memcpy_async_def.dst(); | |||
uint64_t logic_src = memcpy_async_def.src(); | |||
dst_max_ = memcpy_async_def.dst_max(); | |||
uint64_t update_base_addr = 0; | |||
ret = GetUpdateBaseAddr(davinci_model, logic_src, update_base_addr); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
src_ = reinterpret_cast<uint8_t *>(update_base_addr + logic_src); | |||
if (src_ == nullptr) { | |||
GELOGE(PARAM_INVALID, "src_ is null!"); | |||
return PARAM_INVALID; | |||
} | |||
ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.dst(), dst_); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
uint64_t mem_base = reinterpret_cast<uint64_t>(davinci_model->MemBase()); | |||
uint64_t logic_mem_base = davinci_model->GetRtBaseAddr(); | |||
dst_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(mem_base + (logic_dst - logic_mem_base))); | |||
if (dst_ == nullptr) { | |||
GELOGE(PARAM_INVALID, "dst_ is null!"); | |||
return PARAM_INVALID; | |||
} | |||
vector<void *> io_addrs; | |||
io_addrs.emplace_back(src_); | |||
io_addrs.emplace_back(dst_); | |||
count_ = memcpy_async_def.count(); | |||
kind_ = memcpy_async_def.kind(); | |||
// malloc args memory | |||
size_t args_size = sizeof(void *) * io_addrs.size(); | |||
rtError_t rt_ret = rtMalloc(&args_, args_size, RT_MEMORY_HBM); | |||
@@ -70,18 +88,20 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel | |||
return RT_FAILED; | |||
} | |||
count_ = memcpy_async.count(); | |||
kind_ = memcpy_async.kind(); | |||
dst_max_ = memcpy_async.dst_max(); | |||
GELOGI("InitMemcpyAddrAsyncTaskInfo, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu, args:%p, size:%zu", | |||
memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_, args_, args_size); | |||
// Just dest addr need zero copy. | |||
davinci_model->SetZeroCopyAddr(op_desc, {dst_}, io_addrs.data(), args_, args_size, sizeof(void *)); | |||
GELOGI("InitMemcpyAddrAsyncTaskInfo, logic_src:%p, logic_dst:%p, src:%p, dst:%p, src_args:%p, dst_args:%p", | |||
reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(logic_src)), | |||
reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(logic_dst)), src_, dst_, args_, | |||
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(args_) + args_size)); | |||
davinci_model->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), args_, args_size, 0); | |||
return SUCCESS; | |||
} | |||
Status MemcpyAddrAsyncTaskInfo::Distribute() { | |||
GELOGI("MemcpyAddrAsyncTaskInfo Distribute Start, dst_max:%lu, count:%lu, kind:%u", dst_max_, count_, kind_); | |||
GELOGI("MemcpyAddrAsyncTaskInfo Distribute Start."); | |||
GELOGI("Distribute MemcpyAddrAsync, dst_max:%lu, count:%lu, kind:%u.", dst_max_, count_, kind_); | |||
rtError_t rt_ret = rtMemcpyAsync(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(args_) + sizeof(void *)), | |||
dst_max_, args_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_); | |||
@@ -93,5 +113,39 @@ Status MemcpyAddrAsyncTaskInfo::Distribute() { | |||
return SUCCESS; | |||
} | |||
Status MemcpyAddrAsyncTaskInfo::GetUpdateBaseAddr(DavinciModel *davinci_model, uint64_t update_addr, | |||
uint64_t &base_addr) { | |||
GE_CHECK_NOTNULL(davinci_model); | |||
uint64_t data_base_addr = | |||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(davinci_model->MemBase())) - davinci_model->GetRtBaseAddr(); | |||
uint64_t weight_base_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(davinci_model->WeightsMemBase())) - | |||
davinci_model->GetRtWeightAddr(); | |||
uint64_t var_base_addr = | |||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(davinci_model->VarMemBase())) - davinci_model->GetRtVarAddr(); | |||
uint64_t data_base_addr_start = davinci_model->GetRtBaseAddr(); | |||
uint64_t data_base_addr_end = davinci_model->GetRtBaseAddr() + davinci_model->TotalMemSize(); | |||
uint64_t wight_base_addr_start = davinci_model->GetRtWeightAddr(); | |||
uint64_t wight_base_addr_end = davinci_model->GetRtWeightAddr() + davinci_model->TotalWeightsMemSize(); | |||
uint64_t varible_base_addr_start = davinci_model->GetRtVarAddr(); | |||
uint64_t varible_base_addr_end = davinci_model->GetRtVarAddr() + davinci_model->TotalVarMemSize(); | |||
if ((data_base_addr_start <= update_addr) && (update_addr <= data_base_addr_end)) { | |||
base_addr = data_base_addr; | |||
GELOGI("The update_addr is data address."); | |||
} else if ((wight_base_addr_start <= update_addr) && (update_addr <= wight_base_addr_end)) { | |||
base_addr = weight_base_addr; | |||
GELOGI("The update_addr is weight address."); | |||
} else if ((varible_base_addr_start <= update_addr) && (update_addr <= varible_base_addr_end)) { | |||
base_addr = var_base_addr; | |||
GELOGI("The update_addr is variable address."); | |||
} else if (update_addr != 0) { | |||
base_addr = 0; | |||
GELOGE(PARAM_INVALID, "The update_addr is abnormal."); | |||
return PARAM_INVALID; | |||
} | |||
return SUCCESS; | |||
} | |||
REGISTER_TASK_INFO(RT_MODEL_TASK_MEMCPY_ADDR_ASYNC, MemcpyAddrAsyncTaskInfo); | |||
} // namespace ge |
@@ -16,7 +16,6 @@ | |||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ | |||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ | |||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||
namespace ge { | |||
@@ -33,8 +32,9 @@ class MemcpyAddrAsyncTaskInfo : public TaskInfo { | |||
if (ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret); | |||
} | |||
args_ = nullptr; | |||
} | |||
args_ = nullptr; | |||
} | |||
Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | |||
@@ -42,9 +42,11 @@ class MemcpyAddrAsyncTaskInfo : public TaskInfo { | |||
Status Distribute() override; | |||
private: | |||
uint8_t *dst_; | |||
Status GetUpdateBaseAddr(DavinciModel *davinci_model, uint64_t update_addr, uint64_t &base_addr); | |||
void *dst_; | |||
uint64_t dst_max_; | |||
uint8_t *src_; | |||
void *src_; | |||
void *args_; | |||
uint64_t count_; | |||
uint32_t kind_; | |||
@@ -21,9 +21,9 @@ | |||
namespace ge { | |||
Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
GELOGI("MemcpyAsyncTaskInfo Init Start"); | |||
GELOGI("MemcpyAsyncTaskInfo Init Start."); | |||
if (davinci_model == nullptr) { | |||
GELOGE(PARAM_INVALID, "davinci_model is null"); | |||
GELOGE(PARAM_INVALID, "davinci_model is null!"); | |||
return PARAM_INVALID; | |||
} | |||
@@ -32,38 +32,35 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||
return ret; | |||
} | |||
memcpy_async = task_def.memcpy_async(); | |||
count_ = memcpy_async.count(); | |||
kind_ = memcpy_async.kind(); | |||
dst_max_ = memcpy_async.dst_max(); | |||
if (davinci_model->IsKnownNode()) { | |||
src_ = reinterpret_cast<uint8_t *>(davinci_model_->GetCurrentArgsAddr(args_offset_)); | |||
dst_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(src_) + sizeof(void *)); | |||
// for zero copy | |||
kind_ = RT_MEMCPY_ADDR_DEVICE_TO_DEVICE; | |||
GELOGI("MemcpyAsyncTaskInfo src_ %p, dst_ %p, args_offset %u.", src_, dst_, args_offset_); | |||
return SUCCESS; | |||
} | |||
ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.src(), src_); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
auto memcpy_async_def = task_def.memcpy_async(); | |||
uint64_t logic_dst = memcpy_async_def.dst(); | |||
uint64_t logic_src = memcpy_async_def.src(); | |||
dst_max_ = memcpy_async_def.dst_max(); | |||
ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.dst(), dst_); | |||
uint64_t update_base_addr = 0; | |||
ret = GetUpdateBaseAddr(davinci_model, logic_src, update_base_addr); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
src_ = reinterpret_cast<uint8_t *>(update_base_addr + logic_src); | |||
davinci_model->DisableZeroCopy(src_); | |||
GELOGI("MemcpyAsyncTaskInfo Init Success, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu", | |||
memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_); | |||
uint64_t mem_base = reinterpret_cast<uint64_t>(davinci_model->MemBase()); | |||
uint64_t logic_mem_base = davinci_model->GetRtBaseAddr(); | |||
dst_ = reinterpret_cast<uint8_t *>(mem_base + (logic_dst - logic_mem_base)); | |||
count_ = memcpy_async_def.count(); | |||
kind_ = memcpy_async_def.kind(); | |||
GELOGI("MemcpyAsyncTaskInfo Init Success, logic_src:%p, logic_dst:%p, src:%p, dst:%p", | |||
reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(logic_src)), | |||
reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(logic_dst)), src_, dst_); | |||
davinci_model->DisableZeroCopy(src_); | |||
davinci_model->DisableZeroCopy(dst_); | |||
return SUCCESS; | |||
} | |||
Status MemcpyAsyncTaskInfo::Distribute() { | |||
GELOGI("MemcpyAsyncTaskInfo Distribute Start. dst_max:%lu, count:%lu, kind:%u", dst_max_, count_, kind_); | |||
GELOGI("MemcpyAsyncTaskInfo Distribute Start. dst_max:%lu, count:%lu, kind:%u.", dst_max_, count_, kind_); | |||
rtError_t rt_ret = rtMemcpyAsync(dst_, dst_max_, src_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
@@ -71,41 +68,40 @@ Status MemcpyAsyncTaskInfo::Distribute() { | |||
return RT_FAILED; | |||
} | |||
GELOGI("MemcpyAsyncTaskInfo Distribute Success"); | |||
return SUCCESS; | |||
} | |||
Status MemcpyAsyncTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
// the num of src and dst size is 2 | |||
uint32_t args_size = sizeof(void *) * 2; | |||
args_offset_ = davinci_model->GetTotalArgsSize(); | |||
davinci_model->SetTotalArgsSize(args_size); | |||
davinci_model_ = davinci_model; | |||
GELOGI("MemcpyAsyncTaskInfo kernel args_size %u, args_offset %u", args_size, args_offset_); | |||
GELOGI("MemcpyAsyncTaskInfo Distribute Success."); | |||
return SUCCESS; | |||
} | |||
Status MemcpyAsyncTaskInfo::UpdateArgs() { | |||
GELOGI("MemcpyAsyncTaskInfo::UpdateArgs in."); | |||
GE_CHECK_NOTNULL(davinci_model_); | |||
Status ret = ModelUtils::GetRtAddress(davinci_model_->GetRuntimeParam(), memcpy_async.src(), src_); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
ret = ModelUtils::GetRtAddress(davinci_model_->GetRuntimeParam(), memcpy_async.dst(), dst_); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
Status MemcpyAsyncTaskInfo::GetUpdateBaseAddr(DavinciModel *davinci_model, uint64_t update_addr, uint64_t &base_addr) { | |||
GE_CHECK_NOTNULL(davinci_model); | |||
uint64_t data_base_addr = | |||
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(davinci_model->MemBase())) - davinci_model->GetRtBaseAddr(); | |||
uint64_t weight_base_addr = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(davinci_model->WeightsMemBase())) - | |||
davinci_model->GetRtWeightAddr(); | |||
uint64_t var_base_addr = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(davinci_model->VarMemBase())) - | |||
davinci_model->GetRtVarAddr(); | |||
uint64_t data_base_addr_start = davinci_model->GetRtBaseAddr(); | |||
uint64_t data_base_addr_end = davinci_model->GetRtBaseAddr() + davinci_model->TotalMemSize(); | |||
uint64_t wight_base_addr_start = davinci_model->GetRtWeightAddr(); | |||
uint64_t wight_base_addr_end = davinci_model->GetRtWeightAddr() + davinci_model->TotalWeightsMemSize(); | |||
uint64_t varible_base_addr_start = davinci_model->GetRtVarAddr(); | |||
uint64_t varible_base_addr_end = davinci_model->GetRtVarAddr() + davinci_model->TotalVarMemSize(); | |||
if ((data_base_addr_start <= update_addr) && (update_addr <= data_base_addr_end)) { | |||
base_addr = data_base_addr; | |||
GELOGI("The update_addr is data address."); | |||
} else if ((wight_base_addr_start <= update_addr) && (update_addr <= wight_base_addr_end)) { | |||
base_addr = weight_base_addr; | |||
GELOGI("The update_addr is weight address."); | |||
} else if ((varible_base_addr_start <= update_addr) && (update_addr <= varible_base_addr_end)) { | |||
base_addr = var_base_addr; | |||
GELOGI("The update_addr is variable address."); | |||
} else if (update_addr != 0) { | |||
base_addr = 0; | |||
GELOGE(PARAM_INVALID, "The update_addr is abnormal."); | |||
return PARAM_INVALID; | |||
} | |||
vector<void *> io_addrs; | |||
io_addrs.emplace_back(reinterpret_cast<void *>(src_)); | |||
io_addrs.emplace_back(reinterpret_cast<void *>(dst_)); | |||
GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), | |||
"update memcpyasync in known node zero copy addr failed."); | |||
GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success."); | |||
return SUCCESS; | |||
} | |||
@@ -16,7 +16,6 @@ | |||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ | |||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ | |||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||
namespace ge { | |||
@@ -33,19 +32,14 @@ class MemcpyAsyncTaskInfo : public TaskInfo { | |||
Status Distribute() override; | |||
Status UpdateArgs() override; | |||
Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | |||
private: | |||
uint8_t *dst_; | |||
Status GetUpdateBaseAddr(DavinciModel *davinci_model, uint64_t update_addr, uint64_t &base_addr); | |||
void *dst_; | |||
uint64_t dst_max_; | |||
uint8_t *src_; | |||
void *src_; | |||
uint64_t count_; | |||
uint32_t kind_; | |||
DavinciModel *davinci_model_ = nullptr; | |||
uint32_t args_offset_ = 0; | |||
domi::MemcpyAsyncDef memcpy_async; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ |
@@ -42,11 +42,16 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d | |||
auto stream_switch_def = task_def.stream_switch(); | |||
uint32_t op_index = stream_switch_def.op_index(); | |||
// get StreamSwitch op | |||
OpDescPtr op_desc = davinci_model->GetOpByIndex(op_index); | |||
GE_CHECK_NOTNULL(op_desc); | |||
auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc); | |||
SetInputAndValuePtr(davinci_model, input_data_addr); | |||
if (!input_data_addr.empty() && input_data_addr.size() >= STREAM_SWITCH_INPUT_NUM) { | |||
input_ptr_ = input_data_addr[0]; | |||
value_ptr_ = input_data_addr[1]; | |||
} | |||
uint32_t cond = 0; | |||
if (!AttrUtils::GetInt(op_desc, ATTR_NAME_STREAM_SWITCH_COND, cond)) { | |||
GELOGE(INTERNAL_ERROR, "StreamSwitchOp get attr STREAM_SWITCH_COND fail."); | |||
@@ -110,42 +115,6 @@ Status StreamSwitchTaskInfo::Distribute() { | |||
GELOGI("StreamSwitchTaskInfo Distribute Success. cond:%d, stream:%p, datatype:%d.", cond_, true_stream_, data_type_); | |||
return SUCCESS; | |||
} | |||
Status StreamSwitchTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
GE_CHECK_NOTNULL(davinci_model); | |||
auto stream_switch_def = task_def.stream_switch(); | |||
uint32_t op_index = stream_switch_def.op_index(); | |||
GELOGI("Begin to calculate args, op_index is: %u", op_index); | |||
auto op_desc = davinci_model->GetOpByIndex(op_index); | |||
GE_CHECK_NOTNULL(op_desc); | |||
GELOGI("Calc opType[%s] args size. Node name is [%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str()); | |||
if (op_desc->GetInputsSize() != STREAM_SWITCH_INPUT_NUM) { | |||
GELOGE(FAILED, "Stream switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize()); | |||
return FAILED; | |||
} | |||
for (uint32_t i = 0; i < STREAM_SWITCH_INPUT_NUM; ++i) { | |||
string input_tensor_name = op_desc->GetInputNameByIndex(i); | |||
int64_t fixed_addr_offset = davinci_model->GetFixedAddrsSize(input_tensor_name); | |||
fixed_addr_offset_.emplace_back(fixed_addr_offset); | |||
auto tensor_desc = op_desc->GetInputDesc(i); | |||
int64_t tensor_size = 0; | |||
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||
davinci_model->SetTotalFixedAddrsSize(input_tensor_name, tensor_size); | |||
GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr[%u] offset %ld", tensor_size, i, | |||
fixed_addr_offset); | |||
} | |||
return SUCCESS; | |||
} | |||
void StreamSwitchTaskInfo::SetInputAndValuePtr(DavinciModel *davinci_model, const vector<void *> &input_data_addrs) { | |||
if (davinci_model->IsKnownNode() && fixed_addr_offset_.size() == STREAM_SWITCH_INPUT_NUM) { | |||
input_ptr_ = davinci_model->GetCurrentFixedAddr(fixed_addr_offset_[0]); | |||
value_ptr_ = davinci_model->GetCurrentFixedAddr(fixed_addr_offset_[1]); | |||
} else { | |||
if (!input_data_addrs.empty() && input_data_addrs.size() >= STREAM_SWITCH_INPUT_NUM) { | |||
input_ptr_ = input_data_addrs[0]; | |||
value_ptr_ = input_data_addrs[1]; | |||
} | |||
} | |||
} | |||
REGISTER_TASK_INFO(RT_MODEL_TASK_STREAM_SWITCH, StreamSwitchTaskInfo); | |||
} // namespace ge |
@@ -39,18 +39,13 @@ class StreamSwitchTaskInfo : public TaskInfo { | |||
Status Distribute() override; | |||
Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | |||
private: | |||
void SetInputAndValuePtr(DavinciModel *davinci_model, const vector<void *> &input_data_addrs); | |||
void *input_ptr_; | |||
rtCondition_t cond_; | |||
void *value_ptr_; | |||
rtStream_t true_stream_; | |||
uint32_t true_stream_id_; | |||
rtSwitchDataType_t data_type_; | |||
static const uint32_t kInputNum = 2; | |||
vector<int64_t> fixed_addr_offset_; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ |
@@ -24,15 +24,18 @@ | |||
namespace { | |||
const uint32_t kDynamicBtachParamNum = 1; | |||
const uint32_t kDynamicResolutionParamNum = 2; | |||
const uint8_t kStreamSwitchnInputNum = 1; | |||
} // namespace | |||
namespace ge { | |||
Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
GELOGI("StreamSwitchNTaskInfo Init Start."); | |||
GE_CHECK_NOTNULL(davinci_model); | |||
if (davinci_model == nullptr) { | |||
GELOGE(PARAM_INVALID, "davinci_model is null!"); | |||
return PARAM_INVALID; | |||
} | |||
if (SetStream(task_def.stream_id(), davinci_model->GetStreamList()) != SUCCESS) { | |||
Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); | |||
if (ret != SUCCESS) { | |||
return FAILED; | |||
} | |||
@@ -72,16 +75,14 @@ Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel * | |||
GELOGE(FAILED, "Get true stream ptr of switchN op failed."); | |||
return FAILED; | |||
} | |||
if (davinci_model->IsKnownNode()) { | |||
input_ptr_ = davinci_model->GetCurrentFixedAddr(args_offset_); | |||
} else { | |||
auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc); | |||
if (input_data_addr.empty()) { | |||
GELOGE(FAILED, "Input data addr is nullptr."); | |||
return FAILED; | |||
} | |||
input_ptr_ = input_data_addr[0]; | |||
// set input_ptr_ | |||
auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc); | |||
if (input_data_addr.empty()) { | |||
GELOGE(FAILED, "Input data addr is nullptr."); | |||
return FAILED; | |||
} | |||
input_ptr_ = input_data_addr[0]; | |||
davinci_model->DisableZeroCopy(input_ptr_); | |||
GELOGI("StreamSwitchNTaskInfo Init Success, inputSize:%u, elementSize:%d, trueStreamID:%ld.", input_size_, | |||
element_size_, op_desc->GetStreamId()); | |||
@@ -139,26 +140,5 @@ Status StreamSwitchNTaskInfo::GetTrueStreamPtr(const OpDescPtr &op_desc, Davinci | |||
return SUCCESS; | |||
} | |||
Status StreamSwitchNTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
GE_CHECK_NOTNULL(davinci_model); | |||
auto stream_switchn_def = task_def.stream_switch_n(); | |||
uint32_t op_index = stream_switchn_def.op_index(); | |||
GELOGI("Begin to calculate args, op_index is: %u", op_index); | |||
auto op_desc = davinci_model->GetOpByIndex(op_index); | |||
GE_CHECK_NOTNULL(op_desc); | |||
GELOGI("Calc opType[%s] args size. Node name is [%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str()); | |||
if (op_desc->GetInputsSize() != kStreamSwitchnInputNum) { | |||
GELOGE(FAILED, "Stream switchn op only have one data input. Now input size is %zu", op_desc->GetInputsSize()); | |||
return FAILED; | |||
} | |||
string input_tensor_name = op_desc->GetInputNameByIndex(0); | |||
args_offset_ = davinci_model->GetFixedAddrsSize(input_tensor_name); | |||
auto tensor_desc = op_desc->GetInputDesc(0); | |||
int64_t tensor_size = 0; | |||
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||
davinci_model->SetTotalFixedAddrsSize(input_tensor_name, tensor_size); | |||
GELOGI("Calculate stream switchn task args , tensor_size %ld, args_offset %ld", tensor_size, args_offset_); | |||
return SUCCESS; | |||
} | |||
REGISTER_TASK_INFO(RT_MODEL_TASK_STREAM_SWITCH_N, StreamSwitchNTaskInfo); | |||
} // namespace ge |
@@ -29,8 +29,7 @@ class StreamSwitchNTaskInfo : public TaskInfo { | |||
value_ptr_(nullptr), | |||
true_stream_ptr_(nullptr), | |||
element_size_(0), | |||
data_type_(RT_SWITCH_INT64), | |||
args_offset_(0) {} | |||
data_type_(RT_SWITCH_INT64) {} | |||
~StreamSwitchNTaskInfo() override {} | |||
@@ -38,8 +37,6 @@ class StreamSwitchNTaskInfo : public TaskInfo { | |||
Status Distribute() override; | |||
Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | |||
private: | |||
Status GetTrueStreamPtr(const OpDescPtr &op_desc, DavinciModel *davinci_model); | |||
void *input_ptr_; | |||
@@ -50,7 +47,6 @@ class StreamSwitchNTaskInfo : public TaskInfo { | |||
rtSwitchDataType_t data_type_; | |||
vector<rtStream_t> true_stream_list_; | |||
vector<int64_t> value_list_; | |||
int64_t args_offset_; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ |
@@ -34,13 +34,22 @@ class SuperKernel { | |||
public: | |||
SuperKernel(const void *stub, void *ptr, uint64_t sz, uint32_t dim) | |||
: func_stub_(stub), dev_nav_table_(ptr), nav_table_size_(sz), block_dim_(dim) {} | |||
~SuperKernel() = default; | |||
~SuperKernel() { | |||
// free memory when all releasing | |||
if (device_args_addr_ != nullptr) { | |||
GE_CHK_RT(rtFree(device_args_addr_)); | |||
GELOGI("SKT: super_kernel args addr free."); | |||
} | |||
if (dev_nav_table_ != nullptr) { | |||
GE_CHK_RT(rtFree(dev_nav_table_)); | |||
GELOGI("SKT: super_kernel args addr free."); | |||
} | |||
} | |||
Status Launch(rtStream_t stream, uint32_t dump_flag); | |||
const void *GetFuncStub() const { return func_stub_; } | |||
const void *GetNavTablePtr() const { return dev_nav_table_; } | |||
uint64_t GetNavTableSize() const { return nav_table_size_; } | |||
uint32_t GetBlockDim() const { return block_dim_; } | |||
void *GetNavTablePtr() const { return dev_nav_table_; } | |||
void *GetDeviceArgsPtr() const { return device_args_addr_; } | |||
}; | |||
} // namespace skt | |||
} // namespace ge | |||
@@ -42,10 +42,21 @@ Status SuperKernelFactory::Init() { | |||
rt_ret = rtGetAddrByFun(this->func_stub_, &this->func_ptr_); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtGetAddrByFun failed. error: 0x%X", rt_ret); | |||
return FAILED;) | |||
GELOGD( | |||
"SKT: fuseKernels super_kernel_template subFunc %p, device func " | |||
"address %p", | |||
this->func_stub_, this->func_ptr_); | |||
if (this->use_physical_address_ != nullptr) { | |||
void *skt_func = nullptr; | |||
rt_ret = rtKernelConfigTransArg(this->func_ptr_, sizeof(uint64_t), 0, &skt_func); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelConfigTransArg failed. error: 0x%X", rt_ret); | |||
return FAILED;) | |||
GELOGD( | |||
"SKT: fuseKernels super_kernel_template subFunc %p, device func " | |||
"address %p, device physic PC %p", | |||
this->func_stub_, this->func_ptr_, skt_func); | |||
} else { | |||
GELOGD( | |||
"SKT: fuseKernels super_kernel_template subFunc %p, device func " | |||
"address %p", | |||
this->func_stub_, this->func_ptr_); | |||
} | |||
} | |||
is_init_ = true; | |||
@@ -60,8 +71,7 @@ Status SuperKernelFactory::Uninitialize() { | |||
} | |||
Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list, | |||
const std::vector<void *> &args_addr_list, uint32_t block_dim, | |||
std::unique_ptr<skt::SuperKernel> &h) { | |||
const std::vector<void *> &args_addr_list, uint32_t block_dim, SuperKernel *&h) { | |||
// Iterate through the ops to be fused | |||
// Each subkernel to be fused contains 2 fields: fn address offset, args | |||
// address. | |||
@@ -91,28 +101,70 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list | |||
rtError_t rt_ret; | |||
void *hbm_nav_table_addr = nullptr; | |||
for (unsigned i = 0; i < stub_func_list.size(); i++) { | |||
void *sub_device_func = nullptr; | |||
rt_ret = rtGetAddrByFun(stub_func_list[i], &sub_device_func); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtGetAddrByFun failed. error: 0x%X", rt_ret); | |||
return FAILED;) | |||
GELOGD("SKT: fuseKernels subFunc %p, device func address %p", stub_func_list[i], sub_device_func); | |||
// store two uint64_t address | |||
// address divided by 4 because of 32bits encoding, call offset will *4 when calculating | |||
nav_table[i * 2] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(sub_device_func)) / 4; | |||
GELOGD("SKT: CALL offet %lu", nav_table[i * 2]); | |||
nav_table[i * 2 + 1] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_list[i])); | |||
GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]); | |||
if (this->use_physical_address_ != nullptr) { | |||
for (unsigned i = 0; i < stub_func_list.size(); i++) { | |||
void *sub_device_func = nullptr; | |||
rt_ret = rtGetAddrByFun(stub_func_list[i], &sub_device_func); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtGetAddrByFun failed. error: 0x%X", rt_ret); | |||
return FAILED;) | |||
void *sub_device_func_pys = nullptr; | |||
void *args_addr_pys = nullptr; | |||
rt_ret = rtKernelConfigTransArg(sub_device_func, sizeof(uint64_t), 0, &sub_device_func_pys); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelConfigTransArg failed. error: 0x%X", rt_ret); | |||
return FAILED;) | |||
rt_ret = rtKernelConfigTransArg(args_addr_list[i], sizeof(uint64_t), 0, &args_addr_pys); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelConfigTransArg failed. error: 0x%X", rt_ret); | |||
return FAILED;) | |||
GELOGD( | |||
"SKT: fuseKernels subFunc %p, device func address %p, device " | |||
"physic func address %p", | |||
stub_func_list[i], sub_device_func, sub_device_func_pys); | |||
// store two uint64_t address | |||
// address divided by 4 because of 32bits encoding, call offset will *4 when calculating | |||
nav_table[i * 2] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(sub_device_func_pys)) / 4; | |||
GELOGD("SKT: CALL offset %lu", nav_table[i * 2]); | |||
nav_table[i * 2 + 1] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_pys)); | |||
GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]); | |||
} | |||
void *hbm_nav_table_addr_pys = nullptr; | |||
rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;) | |||
rt_ret = | |||
rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); | |||
GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;) | |||
rt_ret = rtKernelConfigTransArg(hbm_nav_table_addr, sizeof(uint64_t), 0, &hbm_nav_table_addr_pys); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelConfigTransArg failed. error: 0x%X", rt_ret); | |||
GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;) | |||
GELOGD("SKT: hbm_nav_table_addr %p, hbm_nav_table_addr_pys %p", hbm_nav_table_addr, hbm_nav_table_addr_pys); | |||
// Create the necessary metadata for the super kernel | |||
h = new SuperKernel(this->func_stub_, hbm_nav_table_addr_pys, nav_table_size, block_dim); | |||
} else { | |||
for (unsigned i = 0; i < stub_func_list.size(); i++) { | |||
void *sub_device_func = nullptr; | |||
rt_ret = rtGetAddrByFun(stub_func_list[i], &sub_device_func); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtGetAddrByFun failed. error: 0x%X", rt_ret); | |||
return FAILED;) | |||
GELOGD("SKT: fuseKernels subFunc %p, device func address %p", stub_func_list[i], sub_device_func); | |||
// store two uint64_t address | |||
// address divided by 4 because of 32bits encoding, call offset will *4 when calculating | |||
nav_table[i * 2] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(sub_device_func)) / 4; | |||
GELOGD("SKT: CALL offet %lu", nav_table[i * 2]); | |||
nav_table[i * 2 + 1] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_list[i])); | |||
GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]); | |||
} | |||
rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;) | |||
rt_ret = | |||
rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); | |||
GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;) | |||
// Create the necessary metadata for the super kernel | |||
h = new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim); | |||
} | |||
rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;) | |||
rt_ret = | |||
rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); | |||
GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;) | |||
// Create the necessary metadata for the super kernel | |||
h = | |||
std::unique_ptr<skt::SuperKernel>(new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim)); | |||
return SUCCESS; | |||
} | |||
} // namespace skt | |||
@@ -29,6 +29,7 @@ class SuperKernelFactory { | |||
void *func_ptr_ = nullptr; | |||
void *handle_ = nullptr; | |||
std::string sk_stub_name_ = "_Z21super_kernel_templatePmm"; | |||
const char *use_physical_address_ = getenv("GE_USE_PHYSICAL_ADDRESS"); | |||
bool is_init_ = false; | |||
SuperKernelFactory(){}; | |||
~SuperKernelFactory() { | |||
@@ -47,7 +48,7 @@ class SuperKernelFactory { | |||
Status Init(); | |||
Status Uninitialize(); | |||
Status FuseKernels(const std::vector<void *> &stub_func_list, const std::vector<void *> &args_addr_list, | |||
uint32_t block_dim, std::unique_ptr<skt::SuperKernel> &h); | |||
uint32_t block_dim, SuperKernel *&h); | |||
}; | |||
} // namespace skt | |||
} // namespace ge | |||
@@ -72,8 +72,6 @@ class TaskInfo { | |||
virtual uint32_t GetTaskID() { return 0xFFFFFFFF; } | |||
virtual bool CallSaveDumpInfo() { return false; } | |||
virtual uint32_t GetStreamId() { return 0xFFFFFFFF; } | |||
virtual uintptr_t GetDumpArgs() { return 0; } | |||
@@ -86,5 +86,5 @@ class TaskInfoFactory { | |||
return ptr; \ | |||
} \ | |||
TaskInfoFactory::Registerar g_##type##_Task_Info_Creator(type, Creator_##type##_Task_Info); | |||
} // namespace ge | |||
}; // namespace ge | |||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_TASK_INFO_FACTORY_H_ |
@@ -129,6 +129,12 @@ Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, const DataBuffer &data, | |||
} | |||
auto dst_addr = static_cast<uint8_t *>(data.data); | |||
auto dst_size = static_cast<uint64_t>(data.length); | |||
if (ModelUtils::ConvertVirtualAddressToPhysical(dst_addr, dst_size, dst_addr) != SUCCESS) { | |||
GELOGE(FAILED, "[ZCPY] Convert virtual address to physical for dst_addr failed."); | |||
return FAILED; | |||
} | |||
GELOGI("[ZCPY] %s update task, args: %p, size: %zu, offset: %zu, addr: 0x%lx, length: %u", name_.c_str(), | |||
args_addr_, args_size_, offset, addr, data.length); | |||
*(uintptr_t *)(args_info + offset) = reinterpret_cast<uintptr_t>(dst_addr); | |||
@@ -0,0 +1,175 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "graph/load/output/output.h" | |||
#include <memory.h> | |||
#include "common/properties_manager.h" | |||
#include "graph/load/new_model_manager/davinci_model.h" | |||
#include "graph/manager/graph_var_manager.h" | |||
#include "graph/utils/op_desc_utils.h" | |||
#include "graph/utils/tensor_utils.h" | |||
namespace ge { | |||
Output::Output(const OpDescPtr &op_desc, DavinciModel *model) | |||
: base_(nullptr), | |||
var_base_(nullptr), | |||
logic_base_(0), | |||
logic_var_base_(0), | |||
model_(model), | |||
op_desc_(op_desc), | |||
input_num_(0) {} | |||
Output::~Output() { | |||
var_base_ = nullptr; | |||
base_ = nullptr; | |||
model_ = nullptr; | |||
} | |||
/// | |||
/// @ingroup domi | |||
/// @brief Initialize input/output params | |||
/// @return Status | |||
/// | |||
Status Output::Init() { | |||
if (op_desc_ == nullptr || model_ == nullptr) { | |||
GELOGE(INTERNAL_ERROR, "The op_desc_ or model_ is nullptr."); | |||
return INTERNAL_ERROR; | |||
} | |||
base_ = model_->MemBase(); | |||
var_base_ = model_->VarMemBase(); | |||
logic_base_ = model_->GetRtBaseAddr(); | |||
logic_var_base_ = model_->GetRtVarAddr(); | |||
input_num_ = op_desc_->GetInputsSize(); | |||
v_input_size_.clear(); | |||
v_input_data_addr_.clear(); | |||
auto input_vector = op_desc_->GetInputOffset(); | |||
if (input_num_ != input_vector.size()) { | |||
GELOGE(INTERNAL_ERROR, "input desc size: %zu != input offset size: %zu.", input_num_, input_vector.size()); | |||
return INTERNAL_ERROR; | |||
} | |||
for (size_t i = 0; i < input_num_; i++) { | |||
int64_t tensor_size = 0; | |||
auto input_desc = op_desc_->GetInputDescPtr(i); | |||
GE_CHECK_NOTNULL(input_desc); | |||
Status ret = TensorUtils::GetSize(*input_desc, tensor_size); | |||
if (ret != GRAPH_SUCCESS) { | |||
GELOGE(ret, "Get size from TensorDesc failed, op : %s, input index : %zu", op_desc_->GetName().c_str(), i); | |||
return ret; | |||
} | |||
v_input_size_.push_back(tensor_size); | |||
if (VarManager::Instance(model_->SessionId())->IsVarAddr(input_vector[i])) { | |||
v_input_data_addr_.push_back(static_cast<uint8_t *>(var_base_ + input_vector[i] - logic_var_base_)); | |||
} else { | |||
v_input_data_addr_.push_back(static_cast<uint8_t *>(base_ + input_vector[i])); | |||
} | |||
} | |||
GELOGI("Init output:%lu, %lu, %lu", input_num_, v_input_size_.size(), v_input_data_addr_.size()); | |||
return SUCCESS; | |||
} | |||
/// | |||
/// @ingroup domi | |||
/// @brief Copy Op Output to user space. | |||
/// @brief when model running, Add one DataOp as input node, Add one Output Op as output node. | |||
/// @return Status | |||
/// | |||
Status Output::CopyResult(OutputData &rslt, uint32_t data_begin, uint32_t &data_index, bool support_mem_share) { | |||
uint32_t data_count = 0; | |||
if (input_num_ > rslt.blobs.size() - data_begin) { | |||
GELOGE(FAILED, "Tensor num %zu, data_buf num: %zu.", input_num_, rslt.blobs.size() - data_begin); | |||
return FAILED; | |||
} else if (input_num_ < rslt.blobs.size() - data_begin) { | |||
GELOGW("Tensor num %zu, data_buf num: %zu.", input_num_, rslt.blobs.size() - data_begin); | |||
} | |||
for (size_t i = 0; i < input_num_; i++) { | |||
DataBuffer data_buf = rslt.blobs[data_begin + data_count]; | |||
Status ret = SetDataBuf(data_buf, data_count, i, support_mem_share); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Copy data to host error. index: %zu", i); | |||
return ret; | |||
} | |||
data_index = data_begin + data_count; | |||
} | |||
return SUCCESS; | |||
} | |||
Status Output::SetDataBuf(DataBuffer &data_buf, uint32_t &data_count, size_t i, bool support_mem_share) { | |||
if (data_buf.length == 0) { | |||
++data_count; | |||
GELOGD("Length of data_buffer is zero, No need to copy. output op : %s, output tensor index : %zu!", | |||
op_desc_->GetName().c_str(), i); | |||
return SUCCESS; | |||
} | |||
auto tensor_desc = op_desc_->GetInputDescPtr(static_cast<uint32_t>(i)); | |||
if (tensor_desc == nullptr) { | |||
GELOGE(FAILED, "tensor_desc is null"); | |||
return FAILED; | |||
} | |||
if (data_buf.isDataSupportMemShare && support_mem_share) { | |||
GELOGI("No need to copy input data, user's output data buffer can be shared."); | |||
} else { | |||
// Copy result to Databuf | |||
int64_t size = v_input_size_[i]; | |||
GELOGI("Tensor data size before: %ld", size); | |||
graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(*tensor_desc, size); | |||
if (graph_status != ge::GRAPH_SUCCESS) { | |||
GELOGE(graph_status, "GetTensorSizeInBytes failed!"); | |||
return FAILED; | |||
} | |||
if (data_buf.length < size) { | |||
GELOGE(FAILED, "Tensor data size: %ld data_buf length: %ld", size, data_buf.length); | |||
return FAILED; | |||
} else if (data_buf.length > size) { | |||
GELOGW("Tensor data size: %ld data_buf length: %ld", size, data_buf.length); | |||
} | |||
rtError_t rt_ret = rtMemcpy(data_buf.data, size, v_input_data_addr_[i], size, RT_MEMCPY_DEVICE_TO_HOST); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "rtmemcpy error"); | |||
return FAILED; | |||
} | |||
GELOGI("Tensor data size: %ld data_buf length: %ld", size, data_buf.length); | |||
} | |||
++data_count; | |||
GELOGD("Successfully copy the output tensor memory to buffer, output op : %s, output tensor index : %zu!", | |||
op_desc_->GetName().c_str(), i); | |||
return SUCCESS; | |||
} | |||
void Output::GetOutputData(vector<void *> &v_data_addr, vector<int64_t> &v_data_size) { | |||
for (size_t i = 0; i < input_num_; ++i) { | |||
v_data_addr.push_back(v_input_data_addr_[i]); | |||
v_data_size.push_back(v_input_size_[i]); | |||
} | |||
} | |||
} // namespace ge |
@@ -0,0 +1,94 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef GE_GRAPH_LOAD_OUTPUT_OUTPUT_H_ | |||
#define GE_GRAPH_LOAD_OUTPUT_OUTPUT_H_ | |||
#include <string> | |||
#include <vector> | |||
#include "common/debug/log.h" | |||
#include "common/op/attr_value_util.h" | |||
#include "common/op/ge_op_utils.h" | |||
#include "common/types.h" | |||
#include "common/util.h" | |||
#include "common/ge_types.h" | |||
#include "graph/load/new_model_manager/davinci_model.h" | |||
#include "graph/op_desc.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
namespace ge { | |||
using std::string; | |||
using std::vector; | |||
// The base class for all op | |||
class Output { | |||
public: | |||
Output(const OpDescPtr &op_desc, DavinciModel *model); | |||
virtual ~Output(); | |||
/// | |||
/// @ingroup domi | |||
/// @brief Initialize input/output params | |||
/// @return Status | |||
/// | |||
virtual Status Init(); | |||
/// | |||
/// @ingroup domi | |||
/// @brief Copy Op Output to user space. | |||
/// @brief when model running, Add one DataOp as input node, Add one Output Op as output node. | |||
/// @return Status | |||
/// | |||
virtual Status CopyResult(OutputData &rslt, uint32_t data_begin, uint32_t &data_index, bool support_mem_share); | |||
/// | |||
/// @ingroup domi | |||
/// @brief Trans Output data to fp16 | |||
/// @return Status | |||
/// | |||
Status SetDataBuf(DataBuffer &data_buf, uint32_t &data_count, size_t i, bool support_mem_share); | |||
/// | |||
/// @ingroup domi | |||
/// @brief Get Output data and size. | |||
/// @return void | |||
/// | |||
void GetOutputData(vector<void *> &v_data_addr, vector<int64_t> &v_data_size); | |||
// Copy assignment operator and copy constructor are deleted | |||
Output &operator=(const Output &output) = delete; | |||
Output(const Output &output) = delete; | |||
protected: | |||
// Model's base address | |||
uint8_t *base_; | |||
uint8_t *var_base_; | |||
uint64_t logic_base_; | |||
uint64_t logic_var_base_; | |||
// The DavinciModel which ops belong to | |||
DavinciModel *model_; | |||
ConstOpDescPtr op_desc_; | |||
// Input descriptions | |||
size_t input_num_; | |||
vector<void *> v_input_data_addr_; // init as:buf_base + op_def_->input(i)); | |||
vector<int64_t> v_input_size_; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_LOAD_OUTPUT_OUTPUT_H_ |
@@ -34,6 +34,9 @@ const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize, | |||
26 * kGByteSize}; | |||
static bool BlockComparator(const Block *left, const Block *right) { | |||
if (left->device_id != right->device_id) { | |||
return left->device_id < right->device_id; | |||
} | |||
if (left->size != right->size) { | |||
return left->size < right->size; | |||
} | |||
@@ -264,20 +267,20 @@ Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) { | |||
return ge::FAILED; | |||
} | |||
} | |||
if (AddToBlockBin(memory_addr, memory_size, device_id) != ge::SUCCESS) { | |||
if (AddToBlockBin(memory_addr, memory_size) != ge::SUCCESS) { | |||
(void)memory_allocator_->FreeMemory(memory_addr); | |||
return ge::FAILED; | |||
} | |||
return ge::SUCCESS; | |||
} | |||
Status CachingAllocator::AddToBlockBin(uint8_t *ptr, size_t size, uint32_t device_id) { | |||
Status CachingAllocator::AddToBlockBin(uint8_t *ptr, size_t size) { | |||
BlockBin *bin = GetBlockBin(size); | |||
if (bin == nullptr) { | |||
GELOGE(ge::FAILED, "Get block bin failed size = %zu", size); | |||
return ge::FAILED; | |||
} | |||
Block *block = new (std::nothrow) Block(device_id, size, bin, nullptr); | |||
Block *block = new (std::nothrow) Block(0, size, bin, nullptr); | |||
if (block == nullptr) { | |||
GELOGE(ge::FAILED, "Alloc block failed size = %zu", size); | |||
return ge::FAILED; | |||
@@ -336,4 +339,5 @@ void CachingAllocator::FreeBlockBins() { | |||
} | |||
} | |||
} | |||
} // namespace ge |
@@ -32,6 +32,7 @@ | |||
#include "runtime/mem.h" | |||
namespace ge { | |||
constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes | |||
constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold | |||
constexpr size_t kKByteSize = 1024; | |||
@@ -68,10 +69,6 @@ class CachingAllocator { | |||
public: | |||
explicit CachingAllocator(rtMemType_t memory_type); | |||
CachingAllocator(const CachingAllocator &) = delete; | |||
CachingAllocator &operator=(const CachingAllocator &) = delete; | |||
virtual ~CachingAllocator() = default; | |||
/// | |||
@@ -140,10 +137,9 @@ class CachingAllocator { | |||
/// @brief add memory to right bin based on size | |||
/// @param [in] memory ptr | |||
/// @param [in] memory size | |||
/// @param [in] device_id device id | |||
/// @return Status result of function | |||
/// | |||
Status AddToBlockBin(uint8_t *ptr, size_t size, uint32_t device_id); | |||
Status AddToBlockBin(uint8_t *ptr, size_t size); | |||
/// | |||
/// @ingroup ge_graph | |||
@@ -210,5 +206,7 @@ class CachingAllocator { | |||
// block bins by different block size | |||
BlockBin *free_block_bins_[kNumBins]; | |||
}; | |||
} // namespace ge | |||
}; // namespace ge | |||
#endif // GE_GRAPH_MANAGER_GRAPH_CACHING_ALLOCATOR_H_ |
@@ -57,6 +57,7 @@ | |||
#include "graph/passes/flow_ctrl_pass.h" | |||
#include "graph/passes/hccl_group_pass.h" | |||
#include "graph/passes/hccl_memcpy_pass.h" | |||
#include "graph/passes/identify_reference_pass.h" | |||
#include "graph/passes/identity_pass.h" | |||
#include "graph/passes/iterator_op_pass.h" | |||
#include "graph/passes/link_gen_mask_nodes_pass.h" | |||
@@ -73,9 +74,7 @@ | |||
#include "graph/passes/switch_data_edges_bypass.h" | |||
#include "graph/passes/switch_dead_branch_elimination.h" | |||
#include "graph/passes/switch_logic_remove_pass.h" | |||
#include "graph/passes/merge_to_stream_merge_pass.h" | |||
#include "graph/passes/switch_to_stream_switch_pass.h" | |||
#include "graph/passes/attach_stream_label_pass.h" | |||
#include "graph/passes/switch_op_pass.h" | |||
#include "graph/passes/transop_breadth_fusion_pass.h" | |||
#include "graph/passes/transop_depth_fusion_pass.h" | |||
#include "graph/passes/transop_nearby_allreduce_fusion_pass.h" | |||
@@ -84,7 +83,6 @@ | |||
#include "graph/passes/transpose_transdata_pass.h" | |||
#include "graph/passes/variable_op_pass.h" | |||
#include "graph/passes/variable_prepare_op_pass.h" | |||
#include "graph/passes/ref_identity_delete_op_pass.h" | |||
#include "graph/passes/variable_ref_delete_op_pass.h" | |||
#include "graph/passes/variable_ref_useless_control_out_delete_pass.h" | |||
#include "graph/utils/tensor_adapter.h" | |||
@@ -349,13 +347,12 @@ Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_gr | |||
return SUCCESS; | |||
} | |||
#define GM_RUN_AND_DUMP_PERF(name, func, ...) \ | |||
#define GM_RUN_AND_DUMP(name, func, ...) \ | |||
do { \ | |||
GE_RUN_PERF(GraphManager, func, __VA_ARGS__); \ | |||
GE_RUN(GraphManager, func, __VA_ARGS__); \ | |||
GE_DUMP(compute_graph, "PreRunAfter" name); \ | |||
GELOGI("Run %s on graph %s(%u) success.", name, compute_graph->GetName().c_str(), graph_node->GetGraphId()); \ | |||
} while (0) | |||
Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs, | |||
GeRootModelPtr &ge_root_model, uint64_t session_id) { | |||
GE_CHECK_NOTNULL(graph_node); | |||
@@ -368,30 +365,30 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||
compute_graph->GetName().c_str()); | |||
GE_DUMP(compute_graph, "PreRunBegin"); | |||
GM_RUN_AND_DUMP_PERF("OptimizeGraphPrepare", graph_optimize_.OptimizeOriginalGraphForQuantize, compute_graph); | |||
GM_RUN_AND_DUMP_PERF("HandleSummaryOp", graph_optimize_.HandleSummaryOp, compute_graph); | |||
GM_RUN_AND_DUMP_PERF("Prepare", graph_preparer_.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph, | |||
session_id); | |||
GM_RUN_AND_DUMP_PERF("OptimizeOriginalGraph", graph_optimize_.OptimizeOriginalGraph, compute_graph); | |||
GM_RUN_AND_DUMP("OptimizeGraphPrepare", graph_optimize_.OptimizeOriginalGraphForQuantize, compute_graph); | |||
GM_RUN_AND_DUMP("HandleSummaryOp", graph_optimize_.HandleSummaryOp, compute_graph); | |||
GM_RUN_AND_DUMP("Prepare", graph_preparer_.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph, | |||
session_id); | |||
GM_RUN_AND_DUMP("OptimizeOriginalGraph", graph_optimize_.OptimizeOriginalGraph, compute_graph); | |||
GM_RUN_AND_DUMP_PERF("PrepareRunningFormatRefiner", graph_preparer_.PrepareRunningFormatRefiner); | |||
GM_RUN_AND_DUMP_PERF("RefineRunningFormat", graph_optimize_.OptimizeOriginalGraphJudgeInsert, compute_graph); | |||
GM_RUN_AND_DUMP("PrepareRunningFormatRefiner", graph_preparer_.PrepareRunningFormatRefiner); | |||
GM_RUN_AND_DUMP("RefineRunningFormat", graph_optimize_.OptimizeOriginalGraphJudgeInsert, compute_graph); | |||
GE_RUN(GraphManager, graph_preparer_.RecordAIPPInfo, compute_graph); | |||
if (IsTailingOptimization()) { | |||
GM_RUN_AND_DUMP_PERF("OptimizeSwitchOp", graph_preparer_.SwitchOpOptimize, compute_graph); | |||
GM_RUN_AND_DUMP("OptimizeSwitchOp", graph_preparer_.SwitchOpOptimize, compute_graph); | |||
} | |||
GM_RUN_AND_DUMP_PERF("Optimize1", OptimizeStage1, compute_graph); | |||
GM_RUN_AND_DUMP_PERF("InferShape2", compute_graph->InferShapeInNeed); | |||
GM_RUN_AND_DUMP("Optimize1", OptimizeStage1, compute_graph); | |||
GM_RUN_AND_DUMP("InferShape2", compute_graph->InferShapeInNeed); | |||
const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); | |||
if (unknown_shape_skip != nullptr) { | |||
PassManager graph_pass; | |||
GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) | |||
GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); | |||
} | |||
GE_CHK_STATUS_RET(graph_optimize_.IdentifyReference(compute_graph), "Identify reference failed."); | |||
GM_RUN_AND_DUMP_PERF("OptimizeSubgraph", OptimizeSubgraph, graph_node, compute_graph, session_id); | |||
GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); | |||
GM_RUN_AND_DUMP_PERF("Build", Build, graph_node, compute_graph, ge_root_model, session_id); | |||
GM_RUN_AND_DUMP("OptimizeSubgraph", OptimizeSubgraph, graph_node, compute_graph, session_id); | |||
GM_RUN_AND_DUMP("Optimize2", OptimizeStage2, compute_graph); | |||
GM_RUN_AND_DUMP("Build", Build, graph_node, compute_graph, ge_root_model, session_id); | |||
// when set incre build, save om model and var manager | |||
GeModelPtr ge_model = nullptr; | |||
@@ -400,7 +397,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||
GELOGW("Fail to save cache."); | |||
} | |||
// release rts generate context | |||
RtContextUtil::GetInstance().DestroyRtContexts(session_id); | |||
RtContextUtil::GetInstance().DestroyrtContexts(); | |||
GEEVENT("[GEPERFTRACE] GE PreRun End"); | |||
return SUCCESS; | |||
} | |||
@@ -474,7 +471,7 @@ Status GraphManager::LoadGraph(const GeRootModelPtr &ge_root_model, const GraphN | |||
} | |||
GE_TIMESTAMP_START(LoadGraph); | |||
Status ret = GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, model_listener); | |||
GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraph"); | |||
GE_TIMESTAMP_END(LoadGraph, "GraphManager::LoadGraph"); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "[StartForRunGraph] LoadGraph Failed"); | |||
graph_node->SetRunFlag(false); | |||
@@ -637,7 +634,7 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vector<GeTenso | |||
graph_optimize_.TranFrameOp(compute_graph_tmp); | |||
} | |||
GeRootModelPtr ge_root_model = nullptr; | |||
GeRootModelPtr ge_root_model; | |||
ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "[RunGraph] StartForRunGraph failed!"); | |||
@@ -1616,6 +1613,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||
SwitchDeadBranchElimination switch_dead_branch_elimination; | |||
SwitchLogicRemovePass switch_logic_remove_pass; | |||
MergePass merge_pass; | |||
IdentifyReferencePass identify_reference_pass; | |||
CastRemovePass cast_remove_pass; | |||
TransposeTransDataPass transpose_transdata_pass; | |||
TransOpSymmetryEliminationPass symmetry_elimination_pass; | |||
@@ -1624,6 +1622,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||
names_to_passes.emplace_back("SwitchDeadBranchElimination", &switch_dead_branch_elimination); | |||
names_to_passes.emplace_back("SwitchLogicRemovePass", &switch_logic_remove_pass); | |||
names_to_passes.emplace_back("MergePass", &merge_pass); | |||
names_to_passes.emplace_back("IdentifyReferencePass", &identify_reference_pass); | |||
names_to_passes.emplace_back("CastRemovePass", &cast_remove_pass); | |||
names_to_passes.emplace_back("TransposeTransDataPass", &transpose_transdata_pass); | |||
names_to_passes.emplace_back("TransOpSymmetryEliminationPass", &symmetry_elimination_pass); | |||
@@ -1639,32 +1638,14 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||
GELOGE(ret, "Run passes when OptimizeStage1_2 failed, ret:%u.", ret); | |||
return ret; | |||
} | |||
// Calculate Op/Fe constantfolding cost | |||
uint64_t op_constant_folding_cost = 0; | |||
for (auto &it : constant_folding_pass.GetOpConstantFoldingPerfStatistic()) { | |||
op_constant_folding_cost += it.second.second; | |||
GELOGI("The time cost of %s constant folding is [%lu] micro second, calls is %lu.", it.first.c_str(), | |||
it.second.second, it.second.first); | |||
} | |||
GEEVENT("[GEPERFTRACE] The time cost of extern constant folding is [%lu] micro second.", op_constant_folding_cost); | |||
for (auto &it : constant_folding_pass.GetGeConstantFoldingPerfStatistic()) { | |||
op_constant_folding_cost += it.second.second; | |||
GELOGI("The time cost of %s constant folding is [%lu] micro second, calls is %lu.", it.first.c_str(), | |||
it.second.second, it.second.first); | |||
} | |||
GraphUtils::DumpGEGraphToOnnx(*compute_graph, "OptimizeStage1_2"); | |||
PassManager graph_pass; | |||
// the prune pass should between SwitchPass and SwitchToStreamSwitchPass | |||
// the prune pass should between SwtichPass and SwitchOpPass | |||
GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::PrunePass", new (std::nothrow) PrunePass)) | |||
GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::NextIterationPass", new (std::nothrow) NextIterationPass)) | |||
GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ControlTriggerPass", new (std::nothrow) ControlTriggerPass)) | |||
GE_CHK_STATUS_RET( | |||
graph_pass.AddPass("OptimizeStage1_3::MergeToStreamMergePass", new (std::nothrow) MergeToStreamMergePass)) | |||
GE_CHK_STATUS_RET( | |||
graph_pass.AddPass("OptimizeStage1_3::SwitchToStreamSwitchPass", new (std::nothrow) SwitchToStreamSwitchPass)) | |||
GE_CHK_STATUS_RET( | |||
graph_pass.AddPass("OptimizeStage1_3::AttachStreamLabelPass", new (std::nothrow) AttachStreamLabelPass)) | |||
GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::SwitchOpPass", new (std::nothrow) SwitchOpPass)) | |||
GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::IteratorOpPass", new (std::nothrow) IteratorOpPass)) | |||
GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::VariableRefUselessControlOutDeletePass", | |||
new (std::nothrow) VariableRefUselessControlOutDeletePass)) | |||
@@ -1679,7 +1660,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||
NamesToPass identity_remove_pass; | |||
GE_TIMESTAMP_START(identity_remove_pass); | |||
IdentityPass identity_force_pass(true); // after SwitchToStreamSwitchPass | |||
IdentityPass identity_force_pass(true); // after SwitchOpPass | |||
identity_remove_pass.emplace_back("IdentityPass", &identity_force_pass); | |||
ret = GEPass(compute_graph).Run(identity_remove_pass); | |||
GE_TIMESTAMP_END(identity_remove_pass, "GraphPrepare::IdentityRemovePass"); | |||
@@ -1739,8 +1720,6 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::MultiBatchPass", | |||
new (std::nothrow) MultiBatchPass)) | |||
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::RefIdentityDeleteOpPass", | |||
new (std::nothrow) RefIdentityDeleteOpPass)) | |||
// the value of the attr is the original variable name the ref-variable ref from. | |||
// The attr will be used when allocating memory, | |||
// the node marked attr will be output to a variable instead of new-allocated memory. | |||
@@ -1798,6 +1777,8 @@ Status GraphManager::OptimizeAfterMergeSubGraph(ge::ComputeGraphPtr &compute_gra | |||
GEPass ge_passes_for_shape(compute_graph); | |||
NamesToPass names_to_passes_for_shape; | |||
IdentifyReferencePass identify_reference_pass; | |||
names_to_passes_for_shape.emplace_back("IdentifyReferencePass", &identify_reference_pass); | |||
CastRemovePass cast_remove_pass; | |||
names_to_passes_for_shape.emplace_back("CastRemovePass", &cast_remove_pass); | |||
TransposeTransDataPass transpose_transdata_pass; | |||
@@ -1885,10 +1866,7 @@ Status GraphManager::OptimizeAfterMergeSubGraph(ge::ComputeGraphPtr &compute_gra | |||
GE_CHK_STATUS_RET(ret, "Remove isolated Constant failed, ret:%d.", ret); | |||
PassManager pass_for_optimize; | |||
const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); | |||
if (unknown_shape_skip == nullptr) { | |||
GE_CHK_STATUS_RET(pass_for_optimize.AddPass("SubgraphPass", new (std::nothrow) SubgraphPass)); | |||
} | |||
GE_CHK_STATUS_RET(pass_for_optimize.AddPass("SubgraphPass", new (std::nothrow) SubgraphPass)); | |||
GE_CHK_STATUS_RET(pass_for_optimize.AddPass("MultiBatchPass", new (std::nothrow) MultiBatchPass)); | |||
GE_CHK_STATUS_RET(pass_for_optimize.AddPass("CompileNodesPass", new (std::nothrow) CompileNodesPass)); | |||
GE_TIMESTAMP_START(pass_for_optimize); | |||
@@ -1928,7 +1906,7 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G | |||
GE_CHECK_NOTNULL(graph_node->graph_run_async_listener_); | |||
Status ret = | |||
GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, graph_node->graph_run_async_listener_); | |||
GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraphAsync"); | |||
GE_TIMESTAMP_END(LoadGraph, "GraphManager::LoadGraphAsync"); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "[LoadGraphAsync] LoadGraphAsync Failed"); | |||
graph_node->SetRunFlag(false); | |||
@@ -2331,21 +2309,21 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra | |||
GELOGE(FAILED, "failed get dynamic shape partitioned flag on partitioned graph."); | |||
return FAILED; | |||
} | |||
GE_TIMESTAMP_EVENT_END(GraphPartitionDynamicShape, "OptimizeSubgraph::GraphPartitionDynamicShape"); | |||
GE_TIMESTAMP_END(GraphPartitionDynamicShape, "OptimizeSubgraph::GraphPartitionDynamicShape"); | |||
GE_TIMESTAMP_START(GraphPartition); | |||
ret = graph_partitioner_.Partition(compute_graph, GraphPartitioner::kPartitioning); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Graph partition Failed"); | |||
return ret; | |||
} | |||
GE_TIMESTAMP_EVENT_END(GraphPartition, "OptimizeSubgraph::Partition1"); | |||
GE_TIMESTAMP_END(GraphPartition, "OptimizeSubgraph::Partition1"); | |||
GE_TIMESTAMP_START(SetSubgraph); | |||
ret = SetSubgraph(session_id, compute_graph); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Graph set subgraph Failed"); | |||
return ret; | |||
} | |||
GE_TIMESTAMP_EVENT_END(SetSubgraph, "OptimizeSubgraph::SetSubGraph"); | |||
GE_TIMESTAMP_END(SetSubgraph, "OptimizeSubgraph::SetSubGraph"); | |||
ComputeGraphPtr merged_compute_graph = nullptr; | |||
std::vector<ComputeGraphPtr> merged_sub_graph_list; | |||
@@ -2364,7 +2342,7 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra | |||
sub_graph->SetSessionID(session_id); | |||
sub_graph->SetGraphID(graph_node->GetGraphId()); | |||
} | |||
GE_TIMESTAMP_EVENT_END(MergeSubgraph, "OptimizeSubgraph::MergeSubGraph"); | |||
GE_TIMESTAMP_END(MergeSubgraph, "OptimizeSubgraph::MergeSubGraph"); | |||
GE_DUMP(merged_compute_graph, "mergedComputeGraph"); | |||
compute_graph = merged_compute_graph; | |||
if (!AttrUtils::SetBool(*compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) { | |||
@@ -2390,7 +2368,8 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp | |||
} | |||
bool is_always_dump = false; | |||
if (!PropertiesManager::Instance().GetDumpProperties(session_id).GetDumpPath().empty()) { | |||
PropertiesManager &properties_manager = PropertiesManager::Instance(); | |||
if (!properties_manager.GetDumpOutputPath().empty()) { | |||
is_always_dump = true; | |||
} | |||
@@ -327,6 +327,6 @@ class GraphManager { | |||
std::mutex run_mutex_; | |||
}; | |||
} // namespace ge | |||
}; // namespace ge | |||
#endif // GE_GRAPH_MANAGER_GRAPH_MANAGER_H_ |
@@ -190,6 +190,6 @@ class MemManager { | |||
std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_; | |||
std::recursive_mutex allocator_mutex_; | |||
}; | |||
} // namespace ge | |||
}; // namespace ge | |||
#endif // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_ |
@@ -91,7 +91,7 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen | |||
std::string var_key = VarKey(var_name, tensor_desc); | |||
GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str()); | |||
if (var_addr_mgr_map_.count(var_key) == 0) { | |||
uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() + | |||
uint64_t logic_address = VarManager::Instance(0)->GetVarMemLogicBase() + | |||
reinterpret_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); | |||
GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(), | |||
TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(), | |||
@@ -274,7 +274,7 @@ MemResource::MemResource() : total_size_(0), var_mem_size_(0) {} | |||
Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) { | |||
size = (size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize; | |||
uint64_t real_size = size; | |||
total_size_ = VarManager::Instance(session_id)->GetVarMemMaxSize(); | |||
total_size_ = VarManager::Instance(0)->GetVarMemMaxSize(); | |||
if (total_size_ < var_mem_size_) { | |||
GELOGE(PARAM_INVALID, "total_size_: %lu is smaller than var_mem_size_: %lu", total_size_, var_mem_size_); | |||
return PARAM_INVALID; | |||
@@ -684,8 +684,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty | |||
if (mem_base == nullptr) { | |||
return nullptr; | |||
} | |||
uint8_t *mem_addr = | |||
logic_addr + reinterpret_cast<intptr_t>(mem_base) - VarManager::Instance(session_id_)->GetVarMemLogicBase(); | |||
uint8_t *mem_addr = logic_addr + reinterpret_cast<intptr_t>(mem_base) - VarManager::Instance(0)->GetVarMemLogicBase(); | |||
return mem_addr; | |||
} | |||
@@ -309,5 +309,5 @@ class VarManagerPool { | |||
std::mutex var_manager_mutex_; | |||
map<uint64_t, VarManager *> var_manager_map_; | |||
}; | |||
} // namespace ge | |||
}; // namespace ge | |||
#endif // GE_GRAPH_MANAGER_GRAPH_VAR_MANAGER_H_ |
@@ -92,6 +92,6 @@ class EventManager { | |||
std::vector<rtEvent_t> event_list_; | |||
bool inited_; | |||
uint32_t current_idx_; | |||
}; // EventManager | |||
} // namespace ge | |||
}; // EventManager | |||
}; // namespace ge | |||
#endif // GE_GRAPH_MANAGER_MODEL_MANAGER_EVENT_MANAGER_H_ |
@@ -397,11 +397,10 @@ Status TransVarDataUtils::SyncTensorToHost(const string &var_name, const ge::GeT | |||
uint8_t *src_addr = nullptr; | |||
GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, src_tensor_desc, &src_addr)); | |||
uint8_t *mem_addr = | |||
src_addr - | |||
static_cast<int64_t>(reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) + | |||
static_cast<int64_t>( | |||
reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); | |||
uint8_t *mem_addr = src_addr - | |||
static_cast<int64_t>(reinterpret_cast<uintptr_t>(VarManager::Instance(0)->GetVarMemLogicBase())) + | |||
static_cast<int64_t>( | |||
reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); | |||
GE_CHK_RT_RET(rtMallocHost(reinterpret_cast<void **>(host_addr), src_tensor_size)); | |||
GE_CHK_RT_RET(rtMemcpy(*host_addr, src_tensor_size, mem_addr, src_tensor_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||
@@ -414,11 +413,10 @@ Status TransVarDataUtils::SyncTensorToDevice(const string &var_name, const uint8 | |||
const ge::GeTensorDesc &dst_tensor_desc, uint64_t session_id) { | |||
uint8_t *dst_addr = nullptr; | |||
GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, dst_tensor_desc, &dst_addr)); | |||
uint8_t *mem_addr = | |||
dst_addr - | |||
static_cast<int64_t>(reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) + | |||
static_cast<int64_t>( | |||
reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); | |||
uint8_t *mem_addr = dst_addr - | |||
static_cast<int64_t>(reinterpret_cast<uintptr_t>(VarManager::Instance(0)->GetVarMemLogicBase())) + | |||
static_cast<int64_t>( | |||
reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); | |||
GE_CHK_RT_RET(rtMemcpy(mem_addr, addr_size, host_addr, addr_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||
GELOGI("SyncTensorToDevice var_name %s, addr_size %u", var_name.c_str(), addr_size); | |||
@@ -24,6 +24,7 @@ | |||
#include "graph/utils/type_utils.h" | |||
namespace ge { | |||
Status HcomOmeUtil::GetHcclDataType(const ge::ConstOpDescPtr &op_desc, | |||
std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos) { | |||
GE_CHECK_NOTNULL(op_desc); | |||
@@ -100,12 +101,6 @@ Status HcomOmeUtil::GetHcomCount(const ge::ConstOpDescPtr &op_desc, hcclDataType | |||
GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(i)); | |||
GE_CHK_STATUS_RET(ge::TensorUtils::GetSize(*op_desc->GetInputDescPtr(i), input_size), | |||
"get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); | |||
// dynamic shape hccl op get size from output tensor desc | |||
if (op_desc->HasAttr(ATTR_NAME_IS_UNKNOWN_SHAPE)) { | |||
GE_CHECK_NOTNULL(op_desc->GetOutputDescPtr(i)); | |||
GE_CHK_STATUS_RET(ge::TensorUtils::GetSize(*op_desc->GetOutputDescPtr(i), input_size), | |||
"get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); | |||
} | |||
GE_IF_BOOL_EXEC( | |||
op_desc->GetType() == HCOMREDUCESCATTER, int32_t rank_size = 0; | |||
@@ -119,8 +114,6 @@ Status HcomOmeUtil::GetHcomCount(const ge::ConstOpDescPtr &op_desc, hcclDataType | |||
total_size = total_size + block_size; continue;); | |||
int64_t shape_size = op_desc->GetInputDescPtr(i)->GetShape().GetShapeSize(); | |||
GELOGD("hcom util node %s inputsize %ld, shapesize %ld, datasize %d.", op_desc->GetName().c_str(), input_size, | |||
shape_size, size); | |||
GE_CHK_STATUS_RET(ge::CheckInt64Int32MulOverflow(shape_size, size), | |||
"Product of shape size and size beyond INT64_MAX"); | |||
GE_IF_BOOL_EXEC(is_allgather, block_size = shape_size * size;); | |||
@@ -144,6 +144,8 @@ class HcomOmeUtil { | |||
/// | |||
static Status GetHorovodInputs(const ge::ConstOpDescPtr &op_desc, | |||
std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos); | |||
private: | |||
/// | |||
/// @ingroup domi_ome | |||
/// @brief GetHcomCount | |||
@@ -152,8 +154,6 @@ class HcomOmeUtil { | |||
/// | |||
static Status GetHcomCount(const ge::ConstOpDescPtr &op_desc, hcclDataType_t data_type, bool is_allgather, | |||
int &count); | |||
private: | |||
/// | |||
/// @ingroup domi_ome | |||
/// @brief GetHorovodCount | |||
@@ -19,30 +19,13 @@ | |||
#include "framework/common/debug/ge_log.h" | |||
namespace ge { | |||
void RtContextUtil::AddRtContext(uint64_t session_id, rtContext_t context) { | |||
std::lock_guard<std::mutex> lock(ctx_mutex_); | |||
rt_contexts_[session_id].emplace_back(context); | |||
} | |||
void RtContextUtil::DestroyRtContexts(uint64_t session_id) { | |||
std::lock_guard<std::mutex> lock(ctx_mutex_); | |||
auto &contexts = rt_contexts_[session_id]; | |||
DestroyRtContexts(session_id, contexts); | |||
} | |||
void RtContextUtil::DestroyAllRtContexts() { | |||
std::lock_guard<std::mutex> lock(ctx_mutex_); | |||
for (auto &ctx_pair : rt_contexts_) { | |||
DestroyRtContexts(ctx_pair.first, ctx_pair.second); | |||
} | |||
rt_contexts_.clear(); | |||
} | |||
void RtContextUtil::AddrtContext(rtContext_t context) { rtContexts_.emplace_back(context); } | |||
void RtContextUtil::DestroyRtContexts(uint64_t session_id, std::vector<rtContext_t> &contexts) { | |||
GELOGI("Runtime context handle number of session %lu is %zu.", session_id, contexts.size()); | |||
for (auto &rtContext : contexts) { | |||
void RtContextUtil::DestroyrtContexts() { | |||
GELOGI("The size of runtime context handle is %zu.", rtContexts_.size()); | |||
for (auto &rtContext : rtContexts_) { | |||
(void)rtCtxDestroy(rtContext); | |||
} | |||
contexts.clear(); | |||
rtContexts_.clear(); | |||
} | |||
} // namespace ge |
@@ -18,8 +18,6 @@ | |||
#define GE_GRAPH_MANAGER_UTIL_RT_CONTEXT_UTIL_H_ | |||
#include <vector> | |||
#include <map> | |||
#include <mutex> | |||
#include "runtime/context.h" | |||
@@ -31,14 +29,13 @@ class RtContextUtil { | |||
return instance; | |||
} | |||
void AddRtContext(uint64_t session_id, rtContext_t context); | |||
void AddrtContext(rtContext_t context); | |||
const rtContext_t GetNormalModeContext() const { return before_prerun_ctx_; } | |||
void SetNormalModeContext(rtContext_t context) { before_prerun_ctx_ = context; } | |||
void DestroyRtContexts(uint64_t session_id); | |||
void DestroyAllRtContexts(); | |||
void DestroyrtContexts(); | |||
RtContextUtil &operator=(const RtContextUtil &) = delete; | |||
RtContextUtil(const RtContextUtil &RtContextUtil) = delete; | |||
@@ -47,12 +44,8 @@ class RtContextUtil { | |||
RtContextUtil() = default; | |||
~RtContextUtil() {} | |||
void DestroyRtContexts(uint64_t session_id, std::vector<rtContext_t> &contexts); | |||
std::map<uint64_t, std::vector<rtContext_t>> rt_contexts_; | |||
std::vector<rtContext_t> rtContexts_; | |||
rtContext_t before_prerun_ctx_ = nullptr; | |||
std::mutex ctx_mutex_; | |||
}; | |||
} // namespace ge | |||
@@ -299,36 +299,4 @@ void GraphOptimize::TranFrameOp(ComputeGraphPtr &compute_graph) { | |||
} | |||
} | |||
} | |||
Status GraphOptimize::IdentifyReference(ComputeGraphPtr &compute_graph) { | |||
for (auto &node : compute_graph->GetAllNodes()) { | |||
GE_CHECK_NOTNULL(node); | |||
auto op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
auto input_name_index = op_desc->GetAllInputName(); | |||
bool is_ref = false; | |||
for (const auto &name_index : input_name_index) { | |||
const int out_index = op_desc->GetOutputIndexByName(name_index.first); | |||
if (out_index != -1) { | |||
auto input_desc = op_desc->GetInputDesc(name_index.second); | |||
input_desc.SetRefPortByIndex({name_index.second}); | |||
op_desc->UpdateInputDesc(name_index.second, input_desc); | |||
GELOGI("SetRefPort: set op[%s] input desc[%u-%s] ref.", op_desc->GetName().c_str(), name_index.second, | |||
name_index.first.c_str()); | |||
auto output_desc = op_desc->GetOutputDesc(static_cast<uint32_t>(out_index)); | |||
output_desc.SetRefPortByIndex({name_index.second}); | |||
op_desc->UpdateOutputDesc(static_cast<uint32_t>(out_index), output_desc); | |||
GELOGI("SetRefPort: set op[%s] output desc[%u-%s] ref.", op_desc->GetName().c_str(), out_index, | |||
name_index.first.c_str()); | |||
is_ref = true; | |||
} | |||
} | |||
if (is_ref) { | |||
AttrUtils::SetBool(op_desc, ATTR_NAME_REFERENCE, is_ref); | |||
GELOGI("param [node] %s is reference node, set attribute %s to be true.", node->GetName().c_str(), | |||
ATTR_NAME_REFERENCE.c_str()); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -67,9 +67,6 @@ class GraphOptimize { | |||
// handle summary node before preRun graph | |||
Status HandleSummaryOp(ComputeGraphPtr &compute_graph); | |||
// Identify reference node before optimize subgraph | |||
Status IdentifyReference(ComputeGraphPtr &compute_graph); | |||
void TranFrameOp(ComputeGraphPtr &compute_graph); | |||
private: | |||
@@ -88,5 +85,5 @@ class GraphOptimize { | |||
std::map<uint32_t, std::map<string, size_t>> summary_output_indexes_ = {}; | |||
std::string func_bin_path_; | |||
}; | |||
} // namespace ge | |||
}; // namespace ge | |||
#endif // GE_GRAPH_OPTIMIZE_GRAPH_OPTIMIZE_H_ |
@@ -80,8 +80,7 @@ Status GraphOptimize::HandleSummaryOp(ComputeGraphPtr &compute_graph) { | |||
del_nodes.emplace_back(node_ptr); | |||
} | |||
} | |||
GE_IF_BOOL_EXEC(!summary_output_indexes.empty(), | |||
summary_output_indexes_.insert({compute_graph->GetGraphID(), summary_output_indexes})); | |||
summary_output_indexes_.insert({compute_graph->GetGraphID(), summary_output_indexes}); | |||
// add output nodes for summary | |||
std::vector<std::pair<NodePtr, int32_t>> out_nodes_info; | |||
@@ -62,16 +62,15 @@ Status DynamicShapePartitioner::Partition() { | |||
} | |||
GELOGD("Start dynamic shape partition graph %s.", root_graph_->GetName().c_str()); | |||
REQUIRE_SUCCESS(MarkUnknownShapeNodes(), "Failed mark unknown shape nodes, root grah name:%s.", | |||
root_graph_->GetName().c_str()); | |||
REQUIRE_SUCCESS(MarkUnknownShapeNodes(), "Failed mark unknown shape nodes."); | |||
if (unknown_shape_nodes_.empty()) { | |||
GELOGD("Skip dynamic shape partition of graph %s as all nodes are known shape.", root_graph_->GetName().c_str()); | |||
REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false), | |||
"Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str()); | |||
"Failed set dynamic shape partitioned flag on root graph."); | |||
return SUCCESS; | |||
} | |||
REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, true), | |||
"Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str()); | |||
"Failed set dynamic shape partitioned flag on root graph."); | |||
DumpGraph("_Before_DSP"); | |||
auto status = PartitionImpl(); | |||
@@ -108,21 +107,21 @@ void DynamicShapePartitioner::PruneUniqueClusters() { | |||
} | |||
Status DynamicShapePartitioner::BuildPartitionFrame() { | |||
for (const auto &cluster : unique_clusters_) { | |||
for (auto cluster : unique_clusters_) { | |||
REQUIRE_SUCCESS(cluster->BuildFrame(), "Failed build frame of cluster[%lu].", cluster->Id()); | |||
} | |||
return SUCCESS; | |||
} | |||
Status DynamicShapePartitioner::CombinePartitionFrame() { | |||
for (const auto &cluster : unique_clusters_) { | |||
for (auto cluster : unique_clusters_) { | |||
REQUIRE_SUCCESS(cluster->CombinePartitionFrame(), "Failed combine frame of cluster[%lu].", cluster->Id()); | |||
} | |||
return SUCCESS; | |||
} | |||
Status DynamicShapePartitioner::BuildPartitionSubgraph() { | |||
for (const auto &cluster : unique_clusters_) { | |||
for (auto cluster : unique_clusters_) { | |||
REQUIRE_SUCCESS(cluster->BuildPartitionSubgraph(), "Failed build subgraph of cluster[%lu].", cluster->Id()); | |||
} | |||
return SUCCESS; | |||
@@ -135,10 +134,10 @@ std::string DynamicShapePartitioner::DebugString() const { | |||
size_t netoutput = 0; | |||
std::stringstream ss; | |||
ss << "All unknown shape nodes:" << std::endl; | |||
for (const auto &node : unknown_shape_nodes_) { | |||
for (auto node : unknown_shape_nodes_) { | |||
ss << " [" << node->GetName() << "](" << node->GetType() << ")" << std::endl; | |||
} | |||
for (const auto &cluster : unique_clusters_) { | |||
for (auto cluster : unique_clusters_) { | |||
if (cluster->IsUnknownShape()) { | |||
unknown++; | |||
} else if (cluster->IsKnownShape()) { | |||
@@ -151,7 +150,7 @@ std::string DynamicShapePartitioner::DebugString() const { | |||
} | |||
ss << "All clusters:" << unique_clusters_.size() << ", data:" << data << ", known:" << known | |||
<< ", unknown:" << unknown << ", netoutput:" << netoutput << std::endl; | |||
for (const auto &cluster : unique_clusters_) { | |||
for (auto cluster : unique_clusters_) { | |||
ss << " " << cluster->DebugString() << std::endl; | |||
} | |||
return ss.str(); | |||
@@ -159,13 +158,13 @@ std::string DynamicShapePartitioner::DebugString() const { | |||
void DynamicShapePartitioner::DumpGraph(const std::string &suffix) { | |||
GraphUtils::DumpGEGraphToOnnx(*root_graph_, root_graph_->GetName() + suffix); | |||
for (const auto &sub_graph : root_graph_->GetAllSubgraphs()) { | |||
for (auto sub_graph : root_graph_->GetAllSubgraphs()) { | |||
GraphUtils::DumpGEGraphToOnnx(*sub_graph, sub_graph->GetName() + suffix); | |||
} | |||
} | |||
void DynamicShapePartitioner::ClearResource() { | |||
for (const auto &cluster : unique_clusters_) { | |||
for (auto cluster : unique_clusters_) { | |||
cluster->Clear(); | |||
} | |||
node_2_cluster_.clear(); | |||
@@ -176,7 +175,8 @@ void DynamicShapePartitioner::ClearResource() { | |||
} | |||
Status DynamicShapePartitioner::MarkUnknownShapeNodes() { | |||
for (auto &node : root_graph_->GetDirectNode()) { | |||
auto graph = root_graph_; | |||
for (auto &node : graph->GetDirectNode()) { | |||
REQUIRE_SUCCESS(CollectSpreadUnknownShapeNodes(node), "Failed collect spread unknown shape nodes %s.", | |||
node->GetName().c_str()); | |||
} | |||
@@ -186,7 +186,7 @@ Status DynamicShapePartitioner::MarkUnknownShapeNodes() { | |||
Status DynamicShapePartitioner::InitClusters() { | |||
auto graph = root_graph_; | |||
size_t rank = 0; | |||
for (const auto &node : graph->GetDirectNode()) { | |||
for (const auto node : graph->GetDirectNode()) { | |||
Cluster::Type type = Cluster::DATA; | |||
if (node->GetType() == DATA) { | |||
type = Cluster::DATA; | |||
@@ -208,7 +208,7 @@ Status DynamicShapePartitioner::InitClusters() { | |||
cluster->AddInput(node_2_cluster_[parent]); | |||
} | |||
} | |||
for (const auto &node : graph->GetDirectNode()) { | |||
for (const auto node : graph->GetDirectNode()) { | |||
GELOGD("Make cluster for node %s : %s.", node->GetName().c_str(), node_2_cluster_[node]->DebugString().c_str()); | |||
} | |||
return SUCCESS; | |||
@@ -220,8 +220,8 @@ Status DynamicShapePartitioner::TopologicalSortClusters() { | |||
std::queue<ClusterPtr> ready_clusters; | |||
std::unordered_map<ClusterPtr, size_t> cluster_pending_count; | |||
std::unordered_set<ClusterPtr> seen_clusters; | |||
for (auto &iter : node_2_cluster_) { | |||
auto cluster = iter.second; | |||
for (auto iter = node_2_cluster_.begin(); iter != node_2_cluster_.end(); iter++) { | |||
auto cluster = iter->second; | |||
if (seen_clusters.count(cluster) != 0) { | |||
continue; | |||
} | |||
@@ -242,7 +242,7 @@ Status DynamicShapePartitioner::TopologicalSortClusters() { | |||
if (cluster->IsKnownShape()) { | |||
ordered_cluster_.push_back(cluster); | |||
} | |||
for (const auto &out_cluster : cluster->Outputs()) { | |||
for (auto out_cluster : cluster->Outputs()) { | |||
if (cluster_pending_count[out_cluster] > 0 && --cluster_pending_count[out_cluster] == 0) { | |||
ready_clusters.push(out_cluster); | |||
} | |||
@@ -273,16 +273,16 @@ static std::string ToString(const std::vector<ClusterPtr> &clusters) { | |||
Status DynamicShapePartitioner::MergeClusters() { | |||
// Merge unknown shape clusters | |||
for (const auto &cluster : ordered_cluster_) { | |||
for (const auto &in_cluster : cluster->Inputs()) { | |||
for (auto cluster : ordered_cluster_) { | |||
for (auto in_cluster : cluster->Inputs()) { | |||
if (!in_cluster->IsUnknownShape()) { | |||
continue; | |||
} | |||
auto merged_clusters = cluster->MergeAllPathFrom(in_cluster); | |||
GELOGD("Merge all path cluster from %lu to %lu %s.", in_cluster->Id(), cluster->Id(), | |||
ToString(merged_clusters).c_str()); | |||
for (const auto &merged_cluster : merged_clusters) { | |||
for (const auto &node : merged_cluster->Nodes()) { | |||
for (auto merged_cluster : merged_clusters) { | |||
for (auto node : merged_cluster->Nodes()) { | |||
node_2_cluster_[node] = cluster; | |||
} | |||
} | |||
@@ -291,7 +291,7 @@ Status DynamicShapePartitioner::MergeClusters() { | |||
REQUIRE_SUCCESS(TopologicalSortClusters(), "Failed topological sort clusters after merge unknown shape clusters."); | |||
// Merge known shape clusters | |||
for (const auto &cluster : ordered_cluster_) { | |||
for (auto cluster : ordered_cluster_) { | |||
if (cluster->IsRefVariable() && cluster->Inputs().size() == 1) { | |||
auto in_cluster = *(cluster->Inputs().begin()); | |||
in_cluster->Merge(cluster); | |||
@@ -299,13 +299,13 @@ Status DynamicShapePartitioner::MergeClusters() { | |||
continue; | |||
} | |||
for (const auto &in_cluster : cluster->Inputs()) { | |||
for (auto in_cluster : cluster->Inputs()) { | |||
if (!in_cluster->IsKnownShape()) { | |||
continue; | |||
} | |||
if (cluster->TryMerge(in_cluster)) { | |||
GELOGD("Success merge known shape cluster from %lu to %lu.", in_cluster->Id(), cluster->Id()); | |||
for (const auto &node : in_cluster->Nodes()) { | |||
for (auto node : in_cluster->Nodes()) { | |||
node_2_cluster_[node] = cluster; | |||
} | |||
} | |||
@@ -333,7 +333,7 @@ Status DynamicShapePartitioner::CollectSpreadUnknownShapeNodes(NodePtr node) { | |||
if (IsUnknownShapeTensor(out_tensor)) { | |||
GELOGD("Collect node %s as unknown as output %lu is unknown.", node->GetName().c_str(), anchor_index); | |||
is_unknown = true; | |||
auto anchor = node->GetOutDataAnchor(static_cast<int>(anchor_index)); | |||
auto anchor = node->GetOutDataAnchor(anchor_index); | |||
for (const auto peer_anchor : anchor->GetPeerInDataAnchors()) { | |||
if (peer_anchor != nullptr) { | |||
GELOGD("Collect node %s as has unknown input from %s:%lu.", peer_anchor->GetOwnerNode()->GetName().c_str(), | |||
@@ -349,7 +349,7 @@ Status DynamicShapePartitioner::CollectSpreadUnknownShapeNodes(NodePtr node) { | |||
if (IsUnknownShapeTensor(in_tensor)) { | |||
GELOGD("Collect node %s as unknown as input %lu is unknown.", node->GetName().c_str(), anchor_index); | |||
is_unknown = true; | |||
auto anchor = node->GetInDataAnchor(static_cast<int>(anchor_index)); | |||
auto anchor = node->GetInDataAnchor(anchor_index); | |||
const auto peer_anchor = anchor->GetPeerOutAnchor(); | |||
if (peer_anchor != nullptr) { | |||
GELOGD("Collect node %s as has unknown output to %s:%lu.", peer_anchor->GetOwnerNode()->GetName().c_str(), | |||
@@ -453,15 +453,15 @@ std::string Cluster::DebugString() const { | |||
} | |||
ss << "[" << id_ << "](size:" << nodes_.size() << ")"; | |||
ss << "(" << min_ << "," << max_ << ")("; | |||
for (const auto &cluster : in_clusters_) { | |||
for (auto cluster : in_clusters_) { | |||
ss << cluster->id_ << ","; | |||
} | |||
ss << ")->("; | |||
for (const auto &cluster : out_clusters_) { | |||
for (auto cluster : out_clusters_) { | |||
ss << cluster->id_ << ","; | |||
} | |||
ss << ")|"; | |||
for (const auto &node : nodes_) { | |||
for (auto node : nodes_) { | |||
ss << (node->GetName() + "|"); | |||
} | |||
return ss.str(); | |||
@@ -507,12 +507,12 @@ void Cluster::Merge(ClusterPtr other) { | |||
in_clusters_.erase(other); | |||
out_clusters_.erase(other); | |||
auto in_clusters = other->in_clusters_; | |||
for (const auto &cluster : in_clusters) { | |||
for (auto cluster : in_clusters) { | |||
cluster->RemoveOutput(other); | |||
cluster->AddOutput(shared_from_this()); | |||
} | |||
auto out_clusters = other->out_clusters_; | |||
for (const auto &cluster : out_clusters) { | |||
for (auto cluster : out_clusters) { | |||
cluster->RemoveInput(other); | |||
cluster->AddInput(shared_from_this()); | |||
} | |||
@@ -529,7 +529,7 @@ bool Cluster::TryMerge(ClusterPtr other) { | |||
while (!forward_reached.empty()) { | |||
auto current_cluster = forward_reached.front(); | |||
forward_reached.pop(); | |||
for (const auto &cluster : current_cluster->out_clusters_) { | |||
for (auto cluster : current_cluster->out_clusters_) { | |||
if (cluster->max_ == max_ && current_cluster != other) { | |||
return false; | |||
} else if (cluster->min_ < max_) { | |||
@@ -557,7 +557,7 @@ std::vector<ClusterPtr> Cluster::MergeAllPathFrom(ClusterPtr other) { | |||
while (!forward_reached_queue.empty()) { | |||
auto current_cluster = forward_reached_queue.front(); | |||
forward_reached_queue.pop(); | |||
for (const auto &cluster : current_cluster->out_clusters_) { | |||
for (auto cluster : current_cluster->out_clusters_) { | |||
if (cluster->min_ < max_ && cluster->max_ != max_ && forward_reached_clusters.count(cluster) == 0) { | |||
forward_reached_clusters.insert(cluster); | |||
forward_reached_queue.push(cluster); | |||
@@ -567,7 +567,7 @@ std::vector<ClusterPtr> Cluster::MergeAllPathFrom(ClusterPtr other) { | |||
while (!backward_reached_queue.empty()) { | |||
auto current_cluster = backward_reached_queue.front(); | |||
backward_reached_queue.pop(); | |||
for (const auto &cluster : current_cluster->in_clusters_) { | |||
for (auto cluster : current_cluster->in_clusters_) { | |||
if (cluster->max_ > other->min_ && cluster->max_ != other->max_ && | |||
backward_reached_clusters.count(cluster) == 0) { | |||
backward_reached_clusters.insert(cluster); | |||
@@ -578,7 +578,7 @@ std::vector<ClusterPtr> Cluster::MergeAllPathFrom(ClusterPtr other) { | |||
} | |||
} | |||
} | |||
for (const auto &cluster : path_clusters) { | |||
for (auto cluster : path_clusters) { | |||
Merge(cluster); | |||
} | |||
return path_clusters; | |||
@@ -598,11 +598,11 @@ void Cluster::AddFrameOutput(OutDataAnchorPtr anchor) { | |||
}; | |||
InDataAnchorPtr Cluster::GetFrameInDataAnchor(InDataAnchorPtr anchor) { | |||
return partition_node_->GetInDataAnchor(static_cast<int>(inputs_index_[anchor])); | |||
return partition_node_->GetInDataAnchor(inputs_index_[anchor]); | |||
}; | |||
OutDataAnchorPtr Cluster::GetFrameOutDataAnchor(OutDataAnchorPtr anchor) { | |||
return partition_node_->GetOutDataAnchor(static_cast<int>(outputs_index_[anchor])); | |||
return partition_node_->GetOutDataAnchor(outputs_index_[anchor]); | |||
}; | |||
InControlAnchorPtr Cluster::GetFrameInControlAnchor() { return partition_node_->GetInControlAnchor(); }; | |||
@@ -616,25 +616,22 @@ Status Cluster::BuildFrame() { | |||
auto node = nodes_.front(); | |||
auto in_control_anchor = node->GetInControlAnchor(); | |||
if (in_control_anchor != nullptr) { | |||
for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { | |||
for (auto peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { | |||
auto src_cluster = partitioner_->node_2_cluster_[peer_out_control_anchor->GetOwnerNode()]; | |||
if (src_cluster->id_ != id_) { | |||
REQUIRE_GRAPH_SUCCESS( | |||
GraphUtils::RemoveEdge(peer_out_control_anchor, in_control_anchor), | |||
"Failed remove edge from node %s index %d to node %s index %d.", | |||
peer_out_control_anchor->GetOwnerNode()->GetName().c_str(), AnchorUtils::GetIdx(peer_out_control_anchor), | |||
in_control_anchor->GetOwnerNode()->GetName().c_str(), AnchorUtils::GetIdx(in_control_anchor)); | |||
auto src_cluster = partitioner_->node_2_cluster_[peer_out_control_anchor->GetOwnerNode()]; | |||
GraphUtils::RemoveEdge(peer_out_control_anchor, in_control_anchor); | |||
control_inputs_.insert(src_cluster); | |||
src_cluster->control_outputs_.insert(peer_out_control_anchor); | |||
} | |||
} | |||
} | |||
if (IsData()) { | |||
for (const auto &anchor : node->GetAllOutDataAnchors()) { | |||
for (auto anchor : node->GetAllOutDataAnchors()) { | |||
AddFrameOutput(anchor); | |||
} | |||
} else { | |||
for (const auto &anchor : node->GetAllInDataAnchors()) { | |||
for (auto anchor : node->GetAllInDataAnchors()) { | |||
AddFrameInput(anchor); | |||
} | |||
} | |||
@@ -663,7 +660,7 @@ Status Cluster::BuildPartitionFrame() { | |||
"Failed set shape flag."); | |||
REQUIRE_GRAPH_SUCCESS(GraphUtils::RemoveJustNode(graph, node), "Failed remove root graph node."); | |||
REQUIRE_GRAPH_SUCCESS(node->SetOwnerComputeGraph(subgraph_), "Failed set owner graph."); | |||
for (const auto &anchor : node->GetAllInDataAnchors()) { | |||
for (auto anchor : node->GetAllInDataAnchors()) { | |||
auto peer_out_anchor = anchor->GetPeerOutAnchor(); | |||
if (peer_out_anchor == nullptr) { | |||
continue; // Skip overhang input. | |||
@@ -677,7 +674,7 @@ Status Cluster::BuildPartitionFrame() { | |||
} | |||
auto in_control_anchor = node->GetInControlAnchor(); | |||
if (in_control_anchor != nullptr) { | |||
for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { | |||
for (auto peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { | |||
if (peer_out_control_anchor == nullptr) { | |||
continue; | |||
} | |||
@@ -692,9 +689,9 @@ Status Cluster::BuildPartitionFrame() { | |||
} | |||
} | |||
} | |||
for (const auto &anchor : node->GetAllOutDataAnchors()) { | |||
for (auto anchor : node->GetAllOutDataAnchors()) { | |||
auto peer_in_anchors = anchor->GetPeerInDataAnchors(); | |||
for (const auto &peer_in_anchor : peer_in_anchors) { | |||
for (auto peer_in_anchor : peer_in_anchors) { | |||
auto src_cluster = partitioner_->node_2_cluster_[peer_in_anchor->GetOwnerNode()]; | |||
if (src_cluster->id_ != id_) { | |||
AddFrameOutput(anchor); | |||
@@ -720,7 +717,7 @@ Status Cluster::BuildPartitionFrame() { | |||
} | |||
Status Cluster::CombinePartitionFrame() { | |||
for (const auto &anchor : inputs_) { | |||
for (auto anchor : inputs_) { | |||
auto peer_out_anchor = anchor->GetPeerOutAnchor(); | |||
auto src_cluster = partitioner_->node_2_cluster_[peer_out_anchor->GetOwnerNode()]; | |||
auto src_anchor = src_cluster->GetFrameOutDataAnchor(peer_out_anchor); | |||
@@ -732,7 +729,7 @@ Status Cluster::CombinePartitionFrame() { | |||
src_anchor->GetOwnerNode()->GetName().c_str(), src_anchor->GetIdx(), | |||
dst_anchor->GetOwnerNode()->GetName().c_str(), dst_anchor->GetIdx()); | |||
} | |||
for (const auto &src_cluster : control_inputs_) { | |||
for (auto src_cluster : control_inputs_) { | |||
auto src_anchor = src_cluster->GetFrameOutControlAnchor(); | |||
auto dst_anchor = GetFrameInControlAnchor(); | |||
REQUIRE_GRAPH_SUCCESS(GraphUtils::AddEdge(src_anchor, dst_anchor), "Failed add edge from %s:%d to %s:%d.", | |||
@@ -777,8 +774,8 @@ Status Cluster::BuildPartitionSubgraph() { | |||
REQUIRE_NOT_NULL(net_output_node, "Failed add netoutput node to subgraph."); | |||
REQUIRE_GRAPH_SUCCESS(net_output_node->SetOwnerComputeGraph(subgraph_), "Failed set owner graph of netoutput node."); | |||
parent_node_index = 0; | |||
for (const auto &anchor : outputs_) { | |||
auto output_desc = anchor->GetOwnerNode()->GetOpDesc()->GetOutputDesc(static_cast<uint32_t>(anchor->GetIdx())); | |||
for (auto anchor : outputs_) { | |||
auto output_desc = anchor->GetOwnerNode()->GetOpDesc()->GetOutputDesc(anchor->GetIdx()); | |||
REQUIRE(AttrUtils::SetInt(output_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_node_index), | |||
"Failed set parent_node_index on subgraph netoutput's input."); | |||
REQUIRE_GRAPH_SUCCESS(net_output_op->UpdateInputDesc(parent_node_index, output_desc), | |||
@@ -789,7 +786,7 @@ Status Cluster::BuildPartitionSubgraph() { | |||
anchor->GetIdx()); | |||
parent_node_index++; | |||
} | |||
for (const auto &anchor : control_outputs_) { | |||
for (auto anchor : control_outputs_) { | |||
REQUIRE_GRAPH_SUCCESS(GraphUtils::AddEdge(anchor, net_output_node->GetInControlAnchor()), | |||
"Faile add control edge from %s:%d to netoutput node.", | |||
anchor->GetOwnerNode()->GetName().c_str(), anchor->GetIdx()); | |||
@@ -38,7 +38,6 @@ Status EnginePlacer::Run() { | |||
return FAILED; | |||
} | |||
// Assign engine for each node in the graph | |||
instance_ptr->DNNEngineManagerObj().InitPerformanceStaistic(); | |||
for (const auto &node_ptr : compute_graph_->GetDirectNode()) { | |||
GE_CHECK_NOTNULL(node_ptr); | |||
GE_CHECK_NOTNULL(node_ptr->GetOpDesc()); | |||
@@ -61,15 +60,12 @@ Status EnginePlacer::Run() { | |||
return FAILED; | |||
} | |||
} | |||
for (auto &it : instance_ptr->DNNEngineManagerObj().GetCheckSupportCost()) { | |||
GEEVENT("The time cost of %s::CheckSupported is [%lu] micro second.", it.first.c_str(), it.second); | |||
} | |||
GELOGI("Engine placer ends."); | |||
return SUCCESS; | |||
} | |||
Status EnginePlacer::AssignEngineAndLog(ge::ConstNodePtr node_ptr, const std::string &engine_name) { | |||
if ((node_ptr == nullptr) || (node_ptr->GetOpDesc() == nullptr)) { | |||
if (node_ptr == nullptr || node_ptr->GetOpDesc() == nullptr) { | |||
GELOGE(FAILED, "node_ptr is null."); | |||
return FAILED; | |||
} | |||
@@ -25,7 +25,6 @@ | |||
#include "framework/common/types.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/manager/graph_manager_utils.h" | |||
#include "graph/common/ge_call_wrapper.h" | |||
#include "graph/utils/graph_utils.h" | |||
#include "graph/utils/op_desc_utils.h" | |||
#include "graph/utils/type_utils.h" | |||
@@ -232,33 +231,33 @@ Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_co | |||
ComputeGraphPtr new_sub_graph = MakeShared<ComputeGraph>(original_compute_graph->GetName()); | |||
GE_CHECK_NOTNULL(new_sub_graph); | |||
output_merged_compute_graph = new_sub_graph; | |||
GE_TIMESTAMP_START(MergeSubGraphRemoveNode); | |||
GE_TIMESTAMP_START(MergeGraphRemoveNode); | |||
if (RemoveNodeAndEdgeBetweenEndPld(output_merged_compute_graph, sub_graph_list) != ge::SUCCESS) { | |||
GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: merging sub-graphs failed"); | |||
return FAILED; | |||
} | |||
GE_TIMESTAMP_END(MergeSubGraphRemoveNode, "GraphPartitioner::MergeGraphRemoveNodeAndEdge"); | |||
GE_TIMESTAMP_START(MergeSubGraphTopologicalSorting); | |||
GE_TIMESTAMP_END(MergeGraphRemoveNode, "GraphPartitioner::MergeGraphRemoveNodeAndEdge"); | |||
GE_TIMESTAMP_START(MergeGraphTopologicalSorting); | |||
Status ret = output_merged_compute_graph->TopologicalSorting(); | |||
if (ret != SUCCESS) { | |||
GELOGE(GE_GRAPH_TOPO_SORT_FAILED, "[GraphPartitioner]: output_merged_compute_graph->TopologicalSorting failed"); | |||
return FAILED; | |||
} | |||
GE_TIMESTAMP_END(MergeSubGraphTopologicalSorting, "GraphPartitioner::MergeGraphTopologicalSorting"); | |||
GE_TIMESTAMP_END(MergeGraphTopologicalSorting, "GraphPartitioner::MergeGraphTopologicalSorting"); | |||
// flush all nodes' engine of merged graph | |||
GE_TIMESTAMP_START(MergeSubGraphEnginePlacerRun); | |||
GE_TIMESTAMP_START(MergeGraphEnginePlacerRun); | |||
graph_info_.engine_placer_.SetComputeGraph(output_merged_compute_graph); | |||
if (graph_info_.engine_placer_.Run() != SUCCESS) { | |||
GELOGE(GE_GRAPH_INIT_FAILED, "[GraphPartitioner]: engine_placer run failed"); | |||
return FAILED; | |||
} | |||
GE_TIMESTAMP_END(MergeSubGraphEnginePlacerRun, "GraphPartitioner::MergeGraphEnginePlacerRun"); | |||
GE_TIMESTAMP_END(MergeGraphEnginePlacerRun, "GraphPartitioner::MergeGraphEnginePlacerRun"); | |||
GELOGI("Graph merge ends."); | |||
return SUCCESS; | |||
} | |||
Status ge::GraphPartitioner::UpdatePldOpDesc(const NodePtr &dst_node, int input_index, OpDescPtr &pld_op_desc) { | |||
if ((dst_node == nullptr) || (pld_op_desc == nullptr) || (dst_node->GetOpDesc() == nullptr)) { | |||
if (dst_node == nullptr || pld_op_desc == nullptr || dst_node->GetOpDesc() == nullptr) { | |||
GELOGE(FAILED, "parameter ptr is null."); | |||
return FAILED; | |||
} | |||
@@ -276,7 +275,7 @@ Status ge::GraphPartitioner::UpdatePldOpDesc(const NodePtr &dst_node, int input_ | |||
} | |||
Status ge::GraphPartitioner::UpdateEndOpDesc(const NodePtr &src_node, int output_index, OpDescPtr &end_op_desc) { | |||
if ((src_node == nullptr) || (end_op_desc == nullptr) || (src_node->GetOpDesc() == nullptr)) { | |||
if (src_node == nullptr || end_op_desc == nullptr || src_node->GetOpDesc() == nullptr) { | |||
GELOGE(FAILED, "parameter ptr is null."); | |||
return FAILED; | |||
} | |||
@@ -297,9 +296,9 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr | |||
const AnchorPtr &peer_in_anchor, | |||
const ge::ComputeGraphPtr &pld_graph, | |||
const ge::ComputeGraphPtr &end_graph) { | |||
GE_CHECK_NOTNULL(out_anchor); | |||
GE_CHECK_NOTNULL(peer_in_anchor); | |||
GE_CHECK_NOTNULL(pld_graph); | |||
GE_CHECK_NOTNULL(out_anchor); | |||
GE_CHECK_NOTNULL(end_graph); | |||
const auto &src_node = out_anchor->GetOwnerNode(); | |||
const auto &dst_node = peer_in_anchor->GetOwnerNode(); | |||
@@ -314,7 +313,6 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr | |||
GELOGW("SetInt peerIndex failed");) | |||
GE_IF_BOOL_EXEC(!AttrUtils::SetStr(end_op_desc, "parentOpType", dst_node->GetType()), | |||
GELOGW("SetStr parentOpType failed");) | |||
GE_IF_BOOL_EXEC(!end_op_desc->SetExtAttr("parentNode", dst_node), GELOGW("SetEndExtAttr parentNode failed");) | |||
// replace input_desc of end with owner node's desc | |||
int output_index = ge::AnchorUtils::GetIdx(out_anchor); | |||
bool is_need_update_desc = (output_index >= 0) && (graph_info_.mode_ == kPartitioning); | |||
@@ -363,7 +361,6 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr | |||
GELOGW("SetStr parentId failed");) | |||
GE_IF_BOOL_EXEC(!AttrUtils::SetInt(pld_op_desc, "anchorIndex", AnchorUtils::GetIdx(out_anchor)), | |||
GELOGW("SetInt anchorIndex failed");) | |||
GE_IF_BOOL_EXEC(!pld_op_desc->SetExtAttr("parentNode", src_node), GELOGW("SetPldExtAttr parentNode failed");) | |||
// do not care over flow | |||
graph_info_.num_of_pld_end_++; | |||
// replace output_desc of pld with input node's output desc | |||
@@ -398,14 +395,14 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr | |||
return FAILED; | |||
} | |||
graph_info_.index_2_end_[graph_info_.num_of_pld_end_] = new_end_node; | |||
graph_info_.pld_2_end_[new_pld_node] = new_end_node; | |||
graph_info_.end_2_pld_[new_end_node] = new_pld_node; | |||
graph_info_.pld_2_end_[new_pld_node] = new_end_node; | |||
return SUCCESS; | |||
} | |||
Status ge::GraphPartitioner::LinkInput2EndRemoveOrginalLink(ge::NodePtr input_node, ge::ComputeGraphPtr src_graph, | |||
ge::ComputeGraphPtr dst_graph) { | |||
if ((input_node == nullptr) || (src_graph == nullptr) || (dst_graph == nullptr)) { | |||
if (input_node == nullptr || src_graph == nullptr || dst_graph == nullptr) { | |||
GELOGE(FAILED, "parameter ptr is null."); | |||
return FAILED; | |||
} | |||
@@ -445,7 +442,7 @@ Status ge::GraphPartitioner::LinkInput2EndRemoveOrginalLink(ge::NodePtr input_no | |||
Status ge::GraphPartitioner::PutInputNodesInSubGraph(const ge::ComputeGraphPtr &src_graph, | |||
const ge::ComputeGraphPtr &dst_graph) { | |||
if ((src_graph == nullptr) || (dst_graph == nullptr)) { | |||
if (src_graph == nullptr || dst_graph == nullptr) { | |||
GELOGE(FAILED, "parameter ptr is null."); | |||
return FAILED; | |||
} | |||
@@ -852,34 +849,34 @@ Status ge::GraphPartitioner::PartitionSubGraph(ge::ComputeGraphPtr compute_graph | |||
GELOGE(GE_GRAPH_TOPO_SORT_FAILED, "[GraphPartitioner]: subGraphPtr->TopologicalSorting failed"); | |||
return FAILED; | |||
} | |||
GE_TIMESTAMP_START(PartitionSubGraphInitialize); | |||
GE_TIMESTAMP_START(GraphPartitionInitialize); | |||
if (Initialize(compute_graph) != SUCCESS) { | |||
GELOGE(GE_GRAPH_INIT_FAILED, "[GraphPartitioner]: initialize failed"); | |||
return FAILED; | |||
} | |||
GE_TIMESTAMP_END(PartitionSubGraphInitialize, "GraphPartitioner::PartitionInitialize"); | |||
GE_TIMESTAMP_START(PartitionSubGraphMarkClusters); | |||
GE_TIMESTAMP_END(GraphPartitionInitialize, "GraphPartitioner::PartitionInitialize"); | |||
GE_TIMESTAMP_START(GraphPartitionMarkClusters); | |||
MarkClusters(); | |||
GE_TIMESTAMP_END(PartitionSubGraphMarkClusters, "GraphPartitioner::PartitionMarkClusters"); | |||
GE_TIMESTAMP_START(PartitionSubGraphSplitSubGraphs); | |||
GE_TIMESTAMP_END(GraphPartitionMarkClusters, "GraphPartitioner::PartitionMarkClusters"); | |||
GE_TIMESTAMP_START(GraphPartitionSplitSubGraphs); | |||
if (SplitSubGraphs(compute_graph) != SUCCESS) { | |||
GELOGE(FAILED, "[GraphPartitioner]: SplitSubGraphs failed"); | |||
return FAILED; | |||
} | |||
GE_TIMESTAMP_END(PartitionSubGraphSplitSubGraphs, "GraphPartitioner::PartitionSplitSubGraphs"); | |||
GE_TIMESTAMP_START(PartitionSubGraphSortSubGraphs); | |||
GE_TIMESTAMP_END(GraphPartitionSplitSubGraphs, "GraphPartitioner::PartitionSplitSubGraphs"); | |||
GE_TIMESTAMP_START(GraphPartitionSortSubGraphs); | |||
if (SortSubGraphs(compute_graph) != ge::SUCCESS) { | |||
GELOGE(GE_GRAPH_TOPO_SORT_FAILED, "Graph Partition SortSubGraphs failed."); | |||
return ge::FAILED; | |||
} | |||
GE_TIMESTAMP_END(PartitionSubGraphSortSubGraphs, "GraphPartitioner::PartitionSortSubGraphs"); | |||
GE_TIMESTAMP_START(PartitionSubGraphAddPartitionsToGraphNode); | |||
GE_TIMESTAMP_END(GraphPartitionSortSubGraphs, "GraphPartitioner::PartitionSortSubGraphs"); | |||
GE_TIMESTAMP_START(GraphPartitionAddPartitionsToGraphNode); | |||
vector<ge::SubGraphInfoPtr> output_subgraphs; | |||
if (AddPartitionsToGraphNode(output_subgraphs, compute_graph) != ge::SUCCESS) { | |||
GELOGE(GE_GRAPH_EMPTY_PARTITION, "Graph Partition AddPartitionsToGraphNode failed."); | |||
return ge::FAILED; | |||
} | |||
GE_TIMESTAMP_END(PartitionSubGraphAddPartitionsToGraphNode, "GraphPartitioner::PartitionAddPartitionsToGraphNode"); | |||
GE_TIMESTAMP_END(GraphPartitionAddPartitionsToGraphNode, "GraphPartitioner::PartitionAddPartitionsToGraphNode"); | |||
GELOGI("Graph Partition ends. Adding partitions to SubGraphInfo, got %zu sub graphs", output_subgraphs.size()); | |||
graph_info_.mode_ = kMerging; | |||
// do not care over flow | |||
@@ -926,7 +923,7 @@ Status ge::GraphPartitioner::AddPlaceHolderEnd(const AnchorPtr &out_anchor, cons | |||
Status ge::GraphPartitioner::SortSubGraphs(const ge::ComputeGraphPtr &compute_graph) { | |||
uint32_t rank = kRankOne; // rank 0 for data graph | |||
ComputeGraphPtr new_input_nodes_sub_graph = MakeShared<ComputeGraph>("inputNodeGraph"); | |||
if ((new_input_nodes_sub_graph == nullptr) || (compute_graph == nullptr)) { | |||
if (new_input_nodes_sub_graph == nullptr || compute_graph == nullptr) { | |||
GELOGE(FAILED, "[GraphPartitioner]: new_input_nodes_sub_graph or compute_graph is null."); | |||
return FAILED; | |||
} | |||
@@ -968,7 +965,7 @@ Status ge::GraphPartitioner::SortSubGraphs(const ge::ComputeGraphPtr &compute_gr | |||
} | |||
AnchorPtr ge::GraphPartitioner::GetEndInAnchor(const AnchorPtr &src_anchor, const NodePtr &end_node) { | |||
if ((src_anchor == nullptr) || (end_node == nullptr)) { | |||
if (src_anchor == nullptr || end_node == nullptr) { | |||
GELOGE(FAILED, "parameter ptr is null."); | |||
return nullptr; | |||
} | |||
@@ -982,7 +979,7 @@ AnchorPtr ge::GraphPartitioner::GetEndInAnchor(const AnchorPtr &src_anchor, cons | |||
} | |||
AnchorPtr ge::GraphPartitioner::GetPldOutAnchor(const NodePtr &pld_node, const AnchorPtr &dst_anchor) { | |||
if ((pld_node == nullptr) || (dst_anchor == nullptr)) { | |||
if (pld_node == nullptr || dst_anchor == nullptr) { | |||
GELOGE(FAILED, "parameter ptr is null."); | |||
return nullptr; | |||
} | |||
@@ -995,16 +992,16 @@ AnchorPtr ge::GraphPartitioner::GetPldOutAnchor(const NodePtr &pld_node, const A | |||
return pld_out_anchor; | |||
} | |||
void ge::GraphPartitioner::AddEndPldInformationToSubGraphInfo(ge::SubGraphInfoPtr &subgraph_info) { | |||
if (subgraph_info == nullptr) { | |||
void ge::GraphPartitioner::AddEndPldInformationToSubGraphInfo(ge::SubGraphInfoPtr &sub_graph_info) { | |||
if (sub_graph_info == nullptr) { | |||
GELOGE(FAILED, "parameter ptr is null."); | |||
return; | |||
} | |||
auto subgraph = subgraph_info->GetSubGraph(); | |||
GE_CHECK_NOTNULL_JUST_RETURN(subgraph); | |||
auto sub_graph = sub_graph_info->GetSubGraph(); | |||
GE_CHECK_NOTNULL_JUST_RETURN(sub_graph); | |||
NodetoNodeMap end_map; | |||
NodetoNodeMap pld_map; | |||
for (const auto &node : subgraph->GetDirectNode()) { | |||
for (const auto &node : sub_graph->GetDirectNode()) { | |||
if (node->GetType() == kEndType) { | |||
end_map[node] = graph_info_.end_2_pld_.at(node); | |||
} | |||
@@ -1012,8 +1009,8 @@ void ge::GraphPartitioner::AddEndPldInformationToSubGraphInfo(ge::SubGraphInfoPt | |||
pld_map[node] = graph_info_.pld_2_end_.at(node); | |||
} | |||
} | |||
subgraph_info->SetEnd2PldMap(end_map); | |||
subgraph_info->SetPld2EndMap(pld_map); | |||
sub_graph_info->SetEnd2PldMap(end_map); | |||
sub_graph_info->SetPld2EndMap(pld_map); | |||
} | |||
const Graph2SubGraphInfoList &ge::GraphPartitioner::GetSubGraphMap() { return graph_2_subgraph_list_; } | |||
@@ -22,12 +22,16 @@ | |||
#include <sstream> | |||
#include <vector> | |||
#include "framework/common/debug/ge_log.h" | |||
#include "common/ge_inner_error_codes.h" | |||
#include "common/ge/ge_util.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/utils/node_utils.h" | |||
#include "init/gelib.h" | |||
namespace { | |||
bool is_loop_graph = false; | |||
} | |||
namespace ge { | |||
namespace { | |||
bool GraphShouldBeSkip(const ge::ComputeGraphPtr &graph) { | |||
@@ -40,6 +44,7 @@ bool GraphShouldBeSkip(const ge::ComputeGraphPtr &graph) { | |||
} // namespace | |||
Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { | |||
GE_TIMESTAMP_START(AtomicAddrCleanPass); | |||
if (graph == nullptr) { | |||
GELOGE(PARAM_INVALID, "param [graph] must not be null."); | |||
return PARAM_INVALID; | |||
@@ -66,10 +71,10 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { | |||
} | |||
atomic_node_vec.push_back(node); | |||
} | |||
if (!is_loop_graph_ && node->GetType() == LOOPCOND) { | |||
if (!is_loop_graph && node->GetType() == LOOPCOND) { | |||
// there is loop in this graph | |||
GELOGD("There is no loop node. It will insert clean node follow atomic node."); | |||
is_loop_graph_ = true; | |||
is_loop_graph = true; | |||
} | |||
} | |||
if (atomic_node_vec.empty()) { | |||
@@ -78,7 +83,7 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { | |||
} | |||
// 2.Insert clean node and link to atomic node | |||
Status ret; | |||
if (is_loop_graph_) { | |||
if (is_loop_graph) { | |||
ret = HandleLoopGraph(graph, atomic_node_vec); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
@@ -90,6 +95,7 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { | |||
} | |||
} | |||
GELOGD("AtomicAddrCleanPass end."); | |||
GE_TIMESTAMP_END(AtomicAddrCleanPass, "GraphManager::AtomicAddrCleanPass"); | |||
return SUCCESS; | |||
} | |||
@@ -166,14 +172,12 @@ NodePtr AtomicAddrCleanPass::InsertAtomicAddrCleanNode(ComputeGraphPtr &graph) { | |||
if (!session_graph_id.empty()) { | |||
(void)AttrUtils::SetStr(op_desc, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); | |||
} | |||
string node_name = op_desc->GetName(); | |||
// Only flush subgraph name | |||
if (graph->GetParentGraph() != nullptr) { | |||
node_name = graph->GetName() + "_" + node_name; | |||
} | |||
string node_name = (graph->GetParentGraph() != nullptr) | |||
? (graph->GetName() + "_" + op_desc->GetName() + session_graph_id) | |||
: (op_desc->GetName() + session_graph_id); | |||
string name = node_name + session_graph_id; | |||
op_desc->SetName(name); | |||
op_desc->SetName(node_name); | |||
GELOGI("Create cleanAddr op:%s.", op_desc->GetName().c_str()); | |||
// To avoid same name between graphs, set session graph id to this node | |||
NodePtr clean_addr_node = graph->AddNodeFront(op_desc); | |||
@@ -199,7 +203,7 @@ Status AtomicAddrCleanPass::LinkToAtomicNode(const NodePtr &atomic_node, NodePtr | |||
} | |||
GELOGD("Graph add cleanAddrNode op out ctrl edge, dst node: %s.", atomic_node->GetName().c_str()); | |||
std::string stream_label; | |||
if (is_loop_graph_ && AttrUtils::GetStr(atomic_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label)) { | |||
if (is_loop_graph && AttrUtils::GetStr(atomic_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label)) { | |||
if (!AttrUtils::SetStr(atomic_clean_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label)) { | |||
GELOGW("LinkToAtomicNode: SetStr failed"); | |||
return INTERNAL_ERROR; | |||
@@ -258,7 +262,7 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) { | |||
return true; | |||
} | |||
/// | |||
/// @brief Clear Status, used for subgraph pass | |||
/// @brief Clear Status, uesd for subgraph pass | |||
/// @return SUCCESS | |||
/// | |||
Status AtomicAddrCleanPass::ClearStatus() { | |||
@@ -75,7 +75,6 @@ class AtomicAddrCleanPass : public GraphPass { | |||
bool IsAtomicOp(const NodePtr &node); | |||
vector<NodePtr> hcom_node_vec_; | |||
bool is_loop_graph_ = false; | |||
}; | |||
} // namespace ge | |||
@@ -1,319 +0,0 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "graph/passes/attach_stream_label_pass.h" | |||
#include "ge/ge_api_types.h" | |||
#include "graph/common/omg_util.h" | |||
namespace ge { | |||
Status AttachStreamLabelPass::Run(ComputeGraphPtr graph) { | |||
GELOGD("AttachStreamLabelPass Enter."); | |||
FindNodes(graph); | |||
for (const auto &node : need_label_nodes_) { | |||
OpDescPtr op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
if (!op_desc->HasAttr(ATTR_NAME_STREAM_LABEL)) { | |||
GE_CHK_STATUS_RET(UpdateCondBranch(node), "Update cond branch failed, start node:%s.", node->GetName().c_str()); | |||
} | |||
} | |||
GE_CHK_STATUS_RET(UpdateEnterNode(), "UpdateEnterNode failed."); | |||
GELOGD("AttachStreamLabelPass Leave."); | |||
return SUCCESS; | |||
} | |||
/// | |||
/// @brief Clear Status, used for subgraph pass | |||
/// @return | |||
/// | |||
Status AttachStreamLabelPass::ClearStatus() { | |||
stream_switch_nodes_.clear(); | |||
need_label_nodes_.clear(); | |||
enter_nodes_.clear(); | |||
branch_head_nodes_.clear(); | |||
return SUCCESS; | |||
} | |||
/// | |||
/// @brief Find StreamSwitch / StreamMerge / Enter node | |||
/// @param [in] graph | |||
/// @return void | |||
/// | |||
void AttachStreamLabelPass::FindNodes(const ComputeGraphPtr &graph) { | |||
for (const NodePtr &node : graph->GetDirectNode()) { | |||
const std::string &type = node->GetType(); | |||
if (type == STREAMSWITCH) { | |||
stream_switch_nodes_.emplace_back(node); | |||
} else if (type == STREAMMERGE) { | |||
if ((node->GetOpDesc() != nullptr) && !node->GetOpDesc()->HasAttr(ATTR_NAME_NEXT_ITERATION)) { | |||
need_label_nodes_.emplace_back(node); | |||
} | |||
} else if ((type == ENTER) || (type == REFENTER)) { | |||
enter_nodes_.emplace_back(node); | |||
} | |||
} | |||
for (const auto &node : stream_switch_nodes_) { | |||
for (const auto &out_ctrl_node : node->GetOutControlNodes()) { | |||
MarkHeadNodes(out_ctrl_node, node); | |||
} | |||
need_label_nodes_.emplace_back(node); | |||
} | |||
} | |||
/// | |||
/// @brief Mark node as head_node of stream_switch | |||
/// @param [in] node | |||
/// @param [in] stream_switch | |||
/// @return void | |||
/// | |||
void AttachStreamLabelPass::MarkHeadNodes(const NodePtr &node, const NodePtr &stream_switch) { | |||
static const std::set<std::string> bypass_type_set = {IDENTITY, IDENTITYN, CAST, TRANSDATA, | |||
TRANSPOSE, TRANSPOSED, RESHAPE}; | |||
std::stack<NodePtr> nodes; | |||
nodes.push(node); | |||
std::set<NodePtr> visited; | |||
while (!nodes.empty()) { | |||
NodePtr cur_node = nodes.top(); | |||
nodes.pop(); | |||
if (visited.count(cur_node) > 0) { | |||
continue; | |||
} | |||
GELOGD("branch_head_node %s of stream_switch %s.", cur_node->GetName().c_str(), stream_switch->GetName().c_str()); | |||
branch_head_nodes_[cur_node] = stream_switch; | |||
if (bypass_type_set.count(cur_node->GetType()) > 0) { | |||
for (const auto &out_node : cur_node->GetOutAllNodes()) { | |||
nodes.push(out_node); | |||
} | |||
} | |||
visited.insert(cur_node); | |||
} | |||
} | |||
/// | |||
/// @brief update cond branch | |||
/// @param [in] node | |||
/// @return Status | |||
/// | |||
Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { | |||
std::string stream_label; | |||
std::unordered_set<NodePtr> branch_nodes; | |||
std::unordered_set<NodePtr> visited; | |||
std::stack<NodePtr> nodes; | |||
nodes.push(node); | |||
static const std::set<std::string> end_type_set = {STREAMSWITCH, STREAMMERGE, MERGE}; | |||
bool merge_flag = false; | |||
bool exit_flag = false; | |||
bool net_output_flag = false; | |||
while (!nodes.empty()) { | |||
NodePtr cur_node = nodes.top(); | |||
nodes.pop(); | |||
if (visited.count(cur_node) > 0) { | |||
continue; | |||
} | |||
if (AttachFlag(cur_node, stream_label, merge_flag, exit_flag, net_output_flag) != SUCCESS) { | |||
GELOGE(FAILED, "Attach flag for node %s failed.", cur_node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
const std::string &type = cur_node->GetType(); | |||
for (const auto &out_node : cur_node->GetOutAllNodes()) { | |||
const std::string &out_type = out_node->GetType(); | |||
bool stop_flag = (end_type_set.count(out_type) > 0) || | |||
((branch_head_nodes_.count(out_node) > 0) && (branch_head_nodes_[out_node] != node)) || | |||
(((type == ENTER) || (type == REFENTER)) && (out_type != STREAMACTIVE)); | |||
if (!stop_flag) { | |||
nodes.push(out_node); | |||
GELOGD("Insert branch node %s.", out_node->GetName().c_str()); | |||
branch_nodes.insert(out_node); | |||
} | |||
} | |||
visited.insert(cur_node); | |||
} | |||
if (node->GetType() == STREAMSWITCH) { | |||
GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed."); | |||
} | |||
bool attach_flag = (merge_flag || exit_flag) && net_output_flag; | |||
if (attach_flag) { | |||
GELOGI("No need to keep on attaching label."); | |||
return SUCCESS; | |||
} | |||
for (const NodePtr &tmp_node : branch_nodes) { | |||
GELOGD("Attach label %s to node: %s.", stream_label.c_str(), tmp_node->GetName().c_str()); | |||
GE_CHK_STATUS_RET(SetStreamLabel(tmp_node, stream_label), "Set stream label failed."); | |||
} | |||
return SUCCESS; | |||
} | |||
/// | |||
/// @brief attach flag | |||
/// @param [in] node | |||
/// @param [out] stream_label | |||
/// @param [out] merge_flag | |||
/// @param [out] exit_flag | |||
/// @param [out] net_output_flag | |||
/// @return Status | |||
/// | |||
Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, | |||
bool &exit_flag, bool &net_output_flag) { | |||
const std::string &type = node->GetType(); | |||
if (type == STREAMSWITCH) { | |||
if (node->GetInDataNodes().empty()) { | |||
GELOGE(INTERNAL_ERROR, "node %s has no input_data_node.", node->GetName().c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
stream_label = node->GetInDataNodes().at(0)->GetName(); | |||
GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | |||
bool value = false; | |||
OpDescPtr op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
GE_CHK_BOOL_EXEC(AttrUtils::GetBool(op_desc, ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG, value), return FAILED, | |||
"StreamSwitch get attr TRUE_BRANCH_STREAM failed."); | |||
stream_label += (value ? "_t" : "_f"); | |||
} else if (type == STREAMMERGE) { | |||
stream_label = node->GetName(); | |||
GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | |||
merge_flag = true; | |||
} else if ((type == EXIT) || (type == REFEXIT)) { | |||
GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | |||
exit_flag = true; | |||
} else if (type == NETOUTPUT) { | |||
net_output_flag = true; | |||
} | |||
return SUCCESS; | |||
} | |||
/// | |||
/// @brief Update stream_label start with enter nodes | |||
/// @return Status | |||
/// | |||
Status AttachStreamLabelPass::UpdateEnterNode() { | |||
std::unordered_map<NodePtr, std::vector<NodePtr>> enter_active_map; | |||
for (const auto &enter_node : enter_nodes_) { | |||
for (const auto &out_ctrl_node : enter_node->GetOutControlNodes()) { | |||
if (out_ctrl_node->GetType() != STREAMACTIVE) { | |||
continue; | |||
} | |||
auto iter = enter_active_map.find(out_ctrl_node); | |||
if (iter == enter_active_map.end()) { | |||
enter_active_map[out_ctrl_node] = {enter_node}; | |||
} else { | |||
iter->second.emplace_back(enter_node); | |||
} | |||
} | |||
} | |||
for (const auto &pair : enter_active_map) { | |||
if (SetEnterLabel(pair.second, pair.first) != SUCCESS) { | |||
GELOGE(FAILED, "Set stream_label for enter_nodes failed."); | |||
return FAILED; | |||
} | |||
NodePtr active_node = pair.first; | |||
GE_CHECK_NOTNULL(active_node); | |||
std::vector<std::string> active_label_list; | |||
if (!AttrUtils::GetListStr(active_node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, active_label_list) || | |||
(active_label_list.size() != 1) || active_label_list[0].empty()) { | |||
GELOGE(INTERNAL_ERROR, "Get attr ATTR_NAME_ACTIVE_LABEL_LIST failed, node: %s.", active_node->GetName().c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
std::stack<NodePtr> enter_nodes; | |||
for (const auto &enter_node : pair.second) { | |||
enter_nodes.emplace(enter_node); | |||
} | |||
if (UpdateLoopBranch(enter_nodes, active_label_list[0]) != SUCCESS) { | |||
GELOGE(FAILED, "Update stream_label for loop_branch failed."); | |||
return FAILED; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
/// | |||
/// @brief Set stream_label for enter_nodes | |||
/// @param [in] enter_nodes | |||
/// @param [in] active_node | |||
/// @return Status | |||
/// | |||
Status AttachStreamLabelPass::SetEnterLabel(const std::vector<NodePtr> &enter_nodes, const NodePtr &active_node) { | |||
std::string stream_label; | |||
GE_CHECK_NOTNULL(active_node); | |||
(void)AttrUtils::GetStr(active_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label); | |||
bool same_flag = true; | |||
for (const auto &enter_node : enter_nodes) { | |||
std::string tmp_label; | |||
(void)AttrUtils::GetStr(enter_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, tmp_label); | |||
if (tmp_label.empty() || (stream_label == tmp_label)) { | |||
continue; | |||
} | |||
same_flag = false; | |||
break; | |||
} | |||
if (stream_label.empty()) { | |||
if (same_flag) { | |||
stream_label = active_node->GetName(); | |||
} else { | |||
GELOGW("stream_label of enter_active is empty while stream_label of some enter_node is not."); | |||
return SUCCESS; | |||
} | |||
} | |||
for (const auto &enter_node : enter_nodes) { | |||
GE_CHK_STATUS_RET(SetStreamLabel(enter_node, stream_label), "Set stream label failed."); | |||
} | |||
GE_CHK_STATUS_RET(SetStreamLabel(active_node, stream_label), "Set stream label failed."); | |||
return SUCCESS; | |||
} | |||
/// | |||
/// @brief Update stream_label for loop_branch | |||
/// @param [in] enter_nodes | |||
/// @param [in] stream_label | |||
/// @return Status | |||
/// | |||
Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack<NodePtr> &enter_nodes, | |||
const std::string &stream_label) { | |||
std::stack<NodePtr> nodes(enter_nodes); | |||
NodePtr cur_node = nullptr; | |||
while (!nodes.empty()) { | |||
cur_node = nodes.top(); | |||
nodes.pop(); | |||
for (const NodePtr &out_node : cur_node->GetOutAllNodes()) { | |||
OpDescPtr out_desc = out_node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(out_desc); | |||
std::string out_type = out_desc->GetType(); | |||
if (out_desc->HasAttr(ATTR_NAME_STREAM_LABEL) || (out_type == ENTER) || (out_type == REFENTER)) { | |||
continue; | |||
} | |||
GELOGD("Attach label %s to node: %s.", stream_label.c_str(), out_node->GetName().c_str()); | |||
GE_CHK_STATUS_RET(SetStreamLabel(out_node, stream_label), "Set stream label failed."); | |||
nodes.push(out_node); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -1,97 +0,0 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef GE_GRAPH_PASSES_ATTACH_STREAM_LABEL_PASS_H_ | |||
#define GE_GRAPH_PASSES_ATTACH_STREAM_LABEL_PASS_H_ | |||
#include <stack> | |||
#include "inc/graph_pass.h" | |||
namespace ge { | |||
class AttachStreamLabelPass : public GraphPass { | |||
public: | |||
Status Run(ComputeGraphPtr graph); | |||
/// | |||
/// @brief Clear Status, used for subgraph pass | |||
/// @return | |||
/// | |||
Status ClearStatus() override; | |||
private: | |||
/// | |||
/// @brief Find StreamSwitch / StreamMerge / Enter node | |||
/// @param [in] graph | |||
/// @return void | |||
/// | |||
void FindNodes(const ComputeGraphPtr &graph); | |||
/// | |||
/// @brief Mark node as head_node of stream_switch | |||
/// @param [in] node | |||
/// @param [in] stream_switch | |||
/// @return void | |||
/// | |||
void MarkHeadNodes(const NodePtr &node, const NodePtr &stream_switch); | |||
/// | |||
/// @brief update cond branch | |||
/// @param [in] node | |||
/// @return Status | |||
/// | |||
Status UpdateCondBranch(const NodePtr &node); | |||
/// | |||
/// @brief attach flag | |||
/// @param [in] node | |||
/// @param [out] stream_label | |||
/// @param [out] merge_flag | |||
/// @param [out] exit_flag | |||
/// @param [out] net_output_flag | |||
/// @return Status | |||
/// | |||
static Status AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, bool &exit_flag, | |||
bool &net_output_flag); | |||
/// | |||
/// @brief Update stream_label for loop_branch | |||
/// @param [in] enter_nodes | |||
/// @param [in] stream_label | |||
/// @return Status | |||
/// | |||
static Status UpdateLoopBranch(const std::stack<NodePtr> &enter_nodes, const std::string &stream_label); | |||
/// | |||
/// @brief Update stream_label start with enter nodes | |||
/// @return Status | |||
/// | |||
Status UpdateEnterNode(); | |||
/// | |||
/// @brief Set stream_label for enter_nodes | |||
/// @param [in] enter_nodes | |||
/// @param [in] active_node | |||
/// @return Status | |||
/// | |||
static Status SetEnterLabel(const std::vector<NodePtr> &enter_nodes, const NodePtr &active_node); | |||
std::vector<NodePtr> stream_switch_nodes_; | |||
std::vector<NodePtr> need_label_nodes_; | |||
std::vector<NodePtr> enter_nodes_; | |||
std::unordered_map<NodePtr, NodePtr> branch_head_nodes_; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_PASSES_ATTACH_STREAM_LABEL_PASS_H_ |
@@ -69,6 +69,7 @@ bool CastRemovePass::HasSameDataType(OpDescPtr &begin_op_desc, OpDescPtr &end_op | |||
auto begin_out_desc = begin_op_desc->MutableOutputDesc(0); | |||
DataType begin_out_datatype = begin_out_desc->GetDataType(); | |||
if (begin_out_datatype == end_out_datatype && (begin_out_datatype == DT_FLOAT16 || begin_out_datatype == DT_FLOAT)) { | |||
type = begin_out_datatype; | |||
return true; | |||
@@ -83,7 +83,6 @@ Status CommonSubexpressionEliminationPass::Run(ComputeGraphPtr graph) { | |||
continue; | |||
} | |||
auto key = GetCseKey(node); | |||
GELOGD("The node %s cse key %s", node->GetName().c_str(), key.c_str()); | |||
auto iter = keys_to_node.find(key); | |||
if (iter == keys_to_node.end()) { | |||
keys_to_node[key] = node; | |||
@@ -23,7 +23,6 @@ | |||
#include "common/ge_inner_error_codes.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/common/ge_call_wrapper.h" | |||
#include "graph/op_desc.h" | |||
using domi::ImplyType; | |||
@@ -79,7 +78,7 @@ graphStatus CompileNodesPass::Run(ComputeGraphPtr graph) { | |||
return result; | |||
} | |||
GELOGI("[CompileNodesPass]: Optimize success."); | |||
GE_TIMESTAMP_EVENT_END(CompileNodesPass, "OptimizeStage2::ControlAttrOptimize::CompileNodesPass"); | |||
GE_TIMESTAMP_END(CompileNodesPass, "GraphManager::CompileNodesPass"); | |||
return GRAPH_SUCCESS; | |||
} | |||
@@ -102,6 +101,7 @@ graphStatus CompileNodesPass::GetSupportedKernel(const NodePtr &node, const std: | |||
} | |||
} | |||
OpsKernelInfoStorePtr kernel_info = instance->OpsKernelManagerObj().GetOpsKernelInfoStore(kernel_lib_name); | |||
if (kernel_info == nullptr) { | |||
GELOGE(ge::GE_GRAPH_PARAM_NULLPTR, "Get op %s ops kernel info store failed", node->GetName().c_str()); | |||
return ge::GE_GRAPH_PARAM_NULLPTR; | |||
@@ -226,7 +226,7 @@ Status CondPass::HandleScalarCond(const ComputeGraphPtr &graph, const OutDataAnc | |||
return FAILED; | |||
} | |||
if (GraphUtils::InsertNodeAfter(out_anchor, {in_anchor}, cast_node) != GRAPH_SUCCESS) { | |||
if (GraphUtils::InsertNodeBefore(out_anchor, {in_anchor}, cast_node) != GRAPH_SUCCESS) { | |||
GELOGE(FAILED, "Insert Cast node %s between %s->%s failed.", cast_node->GetName().c_str(), | |||
out_anchor->GetOwnerNode()->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); | |||
return FAILED; | |||
@@ -271,7 +271,7 @@ Status CondPass::InsertNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr | |||
} | |||
AddRePassNode(new_node); | |||
if (GraphUtils::InsertNodeAfter(out_anchor, {in_anchor}, new_node) != GRAPH_SUCCESS) { | |||
if (GraphUtils::InsertNodeBefore(out_anchor, {in_anchor}, new_node) != GRAPH_SUCCESS) { | |||
GELOGE(FAILED, "Insert %s node %s between %s->%s failed.", type.c_str(), new_node->GetName().c_str(), | |||
out_anchor->GetOwnerNode()->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); | |||
return FAILED; | |||