Browse Source

fixing cmake and code structures

tags/v0.7.0-beta
zhangzhenghai 4 years ago
parent
commit
1becd81e30
100 changed files with 1955 additions and 2737 deletions
  1. +0
    -2
      build.sh
  2. +0
    -11
      inc/common/opskernel/ge_task_info.h
  3. +0
    -1
      inc/common/util/compress/compress.h
  4. +0
    -33
      inc/common/util/compress/compress_weight.h
  5. +2
    -2
      inc/common/util/platform_info.h
  6. +0
    -8
      inc/external/ge/ge_api_types.h
  7. +1
    -2
      inc/external/graph/types.h
  8. +0
    -2
      inc/external/register/register.h
  9. +24
    -0
      inc/framework/common/debug/ge_log.h
  10. +36
    -19
      inc/framework/common/debug/log.h
  11. +3
    -5
      inc/framework/common/ge_types.h
  12. +2
    -0
      inc/framework/common/helper/model_helper.h
  13. +0
    -7
      inc/framework/common/types.h
  14. +1
    -1
      inc/framework/executor/ge_executor.h
  15. +0
    -1
      inc/framework/generator/ge_generator.h
  16. +1
    -0
      inc/framework/omg/omg.h
  17. +1
    -9
      inc/graph/compute_graph.h
  18. +1
    -20
      inc/graph/debug/ge_attr_define.h
  19. +1
    -0
      inc/graph/detail/attributes_holder.h
  20. +0
    -1
      inc/graph/ge_context.h
  21. +2
    -5
      inc/graph/ge_tensor.h
  22. +1
    -0
      inc/graph/model_serialize.h
  23. +0
    -4
      inc/graph/op_desc.h
  24. +2
    -5
      src/ge/CMakeLists.txt
  25. +43
    -14
      src/ge/client/ge_api.cc
  26. +1
    -21
      src/ge/engine_manager/dnnengine_manager.cc
  27. +0
    -3
      src/ge/engine_manager/dnnengine_manager.h
  28. +1
    -1
      src/ge/executor/CMakeLists.txt
  29. +1
    -0
      src/ge/executor/ge_executor.cc
  30. +1
    -2
      src/ge/executor/module.mk
  31. +4
    -8
      src/ge/ge_inference.mk
  32. +3
    -36
      src/ge/ge_runner.mk
  33. +333
    -0
      src/ge/ge_train.mk
  34. +1
    -134
      src/ge/generator/ge_generator.cc
  35. +0
    -48
      src/ge/graph/common/ge_call_wrapper.h
  36. +4
    -10
      src/ge/graph/execute/graph_execute.cc
  37. +2
    -2
      src/ge/graph/execute/graph_execute.h
  38. +1
    -2
      src/ge/graph/load/graph_loader.cc
  39. +1
    -1
      src/ge/graph/load/new_model_manager/cpu_queue_schedule.cc
  40. +86
    -280
      src/ge/graph/load/new_model_manager/data_dumper.cc
  41. +1
    -33
      src/ge/graph/load/new_model_manager/data_dumper.h
  42. +363
    -541
      src/ge/graph/load/new_model_manager/davinci_model.cc
  43. +36
    -80
      src/ge/graph/load/new_model_manager/davinci_model.h
  44. +44
    -38
      src/ge/graph/load/new_model_manager/model_manager.cc
  45. +1
    -7
      src/ge/graph/load/new_model_manager/model_manager.h
  46. +128
    -109
      src/ge/graph/load/new_model_manager/model_utils.cc
  47. +18
    -19
      src/ge/graph/load/new_model_manager/model_utils.h
  48. +2
    -1
      src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc
  49. +5
    -5
      src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h
  50. +36
    -89
      src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
  51. +1
    -14
      src/ge/graph/load/new_model_manager/task_info/hccl_task_info.h
  52. +20
    -54
      src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
  53. +0
    -2
      src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h
  54. +34
    -103
      src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
  55. +0
    -13
      src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
  56. +2
    -31
      src/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc
  57. +2
    -5
      src/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h
  58. +70
    -16
      src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc
  59. +6
    -4
      src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h
  60. +51
    -55
      src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc
  61. +4
    -10
      src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h
  62. +6
    -37
      src/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc
  63. +0
    -5
      src/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h
  64. +13
    -33
      src/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc
  65. +1
    -5
      src/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h
  66. +12
    -3
      src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h
  67. +79
    -27
      src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc
  68. +2
    -1
      src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h
  69. +0
    -2
      src/ge/graph/load/new_model_manager/task_info/task_info.h
  70. +1
    -1
      src/ge/graph/load/new_model_manager/task_info/task_info_factory.h
  71. +6
    -0
      src/ge/graph/load/new_model_manager/zero_copy_task.cc
  72. +175
    -0
      src/ge/graph/load/output/output.cc
  73. +94
    -0
      src/ge/graph/load/output/output.h
  74. +7
    -3
      src/ge/graph/manager/graph_caching_allocator.cc
  75. +5
    -7
      src/ge/graph/manager/graph_caching_allocator.h
  76. +36
    -57
      src/ge/graph/manager/graph_manager.cc
  77. +1
    -1
      src/ge/graph/manager/graph_manager.h
  78. +1
    -1
      src/ge/graph/manager/graph_mem_allocator.h
  79. +3
    -4
      src/ge/graph/manager/graph_var_manager.cc
  80. +1
    -1
      src/ge/graph/manager/graph_var_manager.h
  81. +2
    -2
      src/ge/graph/manager/model_manager/event_manager.h
  82. +8
    -10
      src/ge/graph/manager/trans_var_data_utils.cc
  83. +1
    -8
      src/ge/graph/manager/util/hcom_util.cc
  84. +2
    -2
      src/ge/graph/manager/util/hcom_util.h
  85. +5
    -22
      src/ge/graph/manager/util/rt_context_util.cc
  86. +3
    -10
      src/ge/graph/manager/util/rt_context_util.h
  87. +0
    -32
      src/ge/graph/optimize/graph_optimize.cc
  88. +1
    -4
      src/ge/graph/optimize/graph_optimize.h
  89. +1
    -2
      src/ge/graph/optimize/summary_optimize.cc
  90. +52
    -55
      src/ge/graph/partition/dynamic_shape_partition.cc
  91. +1
    -5
      src/ge/graph/partition/engine_place.cc
  92. +32
    -35
      src/ge/graph/partition/graph_partition.cc
  93. +15
    -11
      src/ge/graph/passes/atomic_addr_clean_pass.cc
  94. +0
    -1
      src/ge/graph/passes/atomic_addr_clean_pass.h
  95. +0
    -319
      src/ge/graph/passes/attach_stream_label_pass.cc
  96. +0
    -97
      src/ge/graph/passes/attach_stream_label_pass.h
  97. +1
    -0
      src/ge/graph/passes/cast_remove_pass.cc
  98. +0
    -1
      src/ge/graph/passes/common_subexpression_elimination_pass.cc
  99. +2
    -2
      src/ge/graph/passes/compile_nodes_pass.cc
  100. +2
    -2
      src/ge/graph/passes/cond_pass.cc

+ 0
- 2
build.sh View File

@@ -174,11 +174,9 @@ echo "---------------- GraphEngine output generated ----------------"
# generate output package in tar form, including ut/st libraries/executables # generate output package in tar form, including ut/st libraries/executables
cd ${BASEPATH} cd ${BASEPATH}
mkdir -p output/plugin/nnengine/ge_config/ mkdir -p output/plugin/nnengine/ge_config/
mkdir -p output/plugin/opskernel/
find output/ -name graphengine_lib.tar -exec rm {} \; find output/ -name graphengine_lib.tar -exec rm {} \;
cp src/ge/engine_manager/engine_conf.json output/plugin/nnengine/ge_config/ cp src/ge/engine_manager/engine_conf.json output/plugin/nnengine/ge_config/
find output/ -maxdepth 1 -name libengine.so -exec mv -f {} output/plugin/nnengine/ \; find output/ -maxdepth 1 -name libengine.so -exec mv -f {} output/plugin/nnengine/ \;
find output/ -maxdepth 1 -name libge_local_engine.so -exec mv -f {} output/plugin/opskernel/ \;
tar -cf graphengine_lib.tar output/* tar -cf graphengine_lib.tar output/*
mv -f graphengine_lib.tar output mv -f graphengine_lib.tar output
echo "---------------- GraphEngine package archive generated ----------------" echo "---------------- GraphEngine package archive generated ----------------"

+ 0
- 11
inc/common/opskernel/ge_task_info.h View File

@@ -52,16 +52,5 @@ struct GETaskInfo {


std::vector<GETaskKernelHcclInfo> kernelHcclInfo; std::vector<GETaskKernelHcclInfo> kernelHcclInfo;
}; };

struct HcomOpertion {
std::string hcclType;
void *inputPtr;
void *outputPtr;
uint64_t count;
int32_t dataType;
int32_t opType;
int32_t root;
};

} // namespace ge } // namespace ge
#endif // INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_ #endif // INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_

+ 0
- 1
inc/common/util/compress/compress.h View File

@@ -28,7 +28,6 @@ struct CompressConfig {
size_t channel; // channels of L2 or DDR. For load balance size_t channel; // channels of L2 or DDR. For load balance
size_t fractalSize; // size of compressing block size_t fractalSize; // size of compressing block
bool isTight; // whether compose compressed data tightly bool isTight; // whether compose compressed data tightly
size_t init_offset;
}; };


CmpStatus CompressWeights(char* input, const CompressConfig& compressConfig, char* indexs, char* output, CmpStatus CompressWeights(char* input, const CompressConfig& compressConfig, char* indexs, char* output,


+ 0
- 33
inc/common/util/compress/compress_weight.h View File

@@ -1,33 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef COMPRESS_WEIGHT_H
#define COMPRESS_WEIGHT_H

#include "compress.h"

const int SHAPE_SIZE_WEIGHT = 4;

struct CompressOpConfig {
int64_t wShape[SHAPE_SIZE_WEIGHT];
size_t compressTilingK;
size_t compressTilingN;
struct CompressConfig compressConfig;
};

extern "C" CmpStatus CompressWeightsConv2D(const char *const input, char *const zipBuffer, char *const infoBuffer,
CompressOpConfig *const param);
#endif // COMPRESS_WEIGHT_H

+ 2
- 2
inc/common/util/platform_info.h View File

@@ -27,6 +27,7 @@ using std::string;
using std::vector; using std::vector;


namespace fe { namespace fe {

class PlatformInfoManager { class PlatformInfoManager {
public: public:
PlatformInfoManager(const PlatformInfoManager &) = delete; PlatformInfoManager(const PlatformInfoManager &) = delete;
@@ -38,8 +39,6 @@ class PlatformInfoManager {


uint32_t GetPlatformInfo(const string SoCVersion, PlatformInfo &platformInfo, OptionalInfo &optiCompilationInfo); uint32_t GetPlatformInfo(const string SoCVersion, PlatformInfo &platformInfo, OptionalInfo &optiCompilationInfo);


uint32_t GetPlatformInfoWithOutSocVersion(PlatformInfo &platformInfo, OptionalInfo &optiCompilationInfo);

void SetOptionalCompilationInfo(OptionalInfo &optiCompilationInfo); void SetOptionalCompilationInfo(OptionalInfo &optiCompilationInfo);


private: private:
@@ -95,5 +94,6 @@ class PlatformInfoManager {
map<string, PlatformInfo> platformInfoMap_; map<string, PlatformInfo> platformInfoMap_;
OptionalInfo optiCompilationInfo_; OptionalInfo optiCompilationInfo_;
}; };

} // namespace fe } // namespace fe
#endif #endif

+ 0
- 8
inc/external/ge/ge_api_types.h View File

@@ -44,12 +44,8 @@ const char *const OPTION_EXEC_ENABLE_DUMP = "ge.exec.enableDump";
const char *const OPTION_EXEC_DUMP_PATH = "ge.exec.dumpPath"; const char *const OPTION_EXEC_DUMP_PATH = "ge.exec.dumpPath";
const char *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep"; const char *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep";
const char *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode"; const char *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode";
const char *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug";
const char *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode";
const char *const OPTION_EXEC_OP_DEBUG_LEVEL = "ge.exec.opDebugLevel";
const char *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild"; const char *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild";
const char *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath"; const char *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath";
const char *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses";
// profiling flag // profiling flag
const char *const OPTION_EXEC_PROFILING_MODE = "ge.exec.profilingMode"; const char *const OPTION_EXEC_PROFILING_MODE = "ge.exec.profilingMode";
const char *const OPTION_EXEC_PROFILING_OPTIONS = "ge.exec.profilingOptions"; const char *const OPTION_EXEC_PROFILING_OPTIONS = "ge.exec.profilingOptions";
@@ -223,10 +219,6 @@ const char *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream";
// Configure input fp16 nodes // Configure input fp16 nodes
const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16";


// Configure debug level, its value should be 0(default), 1 or 2.
// 0: close debug; 1: open TBE compiler; 2: open ccec compiler
const std::string OP_DEBUG_LEVEL = "ge.opDebugLevel";

// Graph run mode // Graph run mode
enum GraphRunMode { PREDICTION = 0, TRAIN }; enum GraphRunMode { PREDICTION = 0, TRAIN };




+ 1
- 2
inc/external/graph/types.h View File

@@ -145,8 +145,7 @@ enum Format {
FORMAT_FRACTAL_ZN_LSTM, FORMAT_FRACTAL_ZN_LSTM,
FORMAT_FRACTAL_Z_G, FORMAT_FRACTAL_Z_G,
FORMAT_RESERVED, FORMAT_RESERVED,
FORMAT_ALL,
FORMAT_NULL
FORMAT_ALL
}; };


// for unknown shape op type // for unknown shape op type


+ 0
- 2
inc/external/register/register.h View File

@@ -98,8 +98,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData {


OpRegistrationData &DelInputWithOriginalType(int input_idx, const std::string &ori_type); OpRegistrationData &DelInputWithOriginalType(int input_idx, const std::string &ori_type);


OpRegistrationData &InputReorderVector(const vector<int> &input_order);

domi::ImplyType GetImplyType() const; domi::ImplyType GetImplyType() const;
std::string GetOmOptype() const; std::string GetOmOptype() const;
std::set<std::string> GetOriginOpTypeSet() const; std::set<std::string> GetOriginOpTypeSet() const;


+ 24
- 0
inc/framework/common/debug/ge_log.h View File

@@ -51,6 +51,30 @@ inline pid_t GetTid() {
return tid; return tid;
} }


#define GE_TIMESTAMP_START(stage) uint64_t startUsec_##stage = ge::GetCurrentTimestap()

#define GE_TIMESTAMP_END(stage, stage_name) \
do { \
uint64_t endUsec_##stage = ge::GetCurrentTimestap(); \
GEEVENT("[GEPERFTRACE] The time cost of %s is [%lu] micro second.", (stage_name), \
(endUsec_##stage - startUsec_##stage)); \
} while (0);

#define GE_TIMESTAMP_CALLNUM_START(stage) \
uint64_t startUsec_##stage = ge::GetCurrentTimestap(); \
uint64_t call_num_of##stage = 0; \
uint64_t time_of##stage = 0

#define GE_TIMESTAMP_RESTART(stage) (startUsec_##stage = ge::GetCurrentTimestap())

#define GE_TIMESTAMP_ADD(stage) \
time_of##stage += ge::GetCurrentTimestap() - startUsec_##stage; \
call_num_of##stage++

#define GE_TIMESTAMP_CALLNUM_END(stage, stage_name) \
GEEVENT("[GEPERFTRACE] The time cost of %s is [%lu] micro second, call num is %lu", (stage_name), time_of##stage, \
call_num_of##stage)

#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ #define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \
dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GetTid(), __FUNCTION__, ERROR_CODE, \ dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GetTid(), __FUNCTION__, ERROR_CODE, \
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__)


+ 36
- 19
inc/framework/common/debug/log.h View File

@@ -19,12 +19,15 @@


#include <string> #include <string>


#include "runtime/rt.h"
#include "cce/cce_def.hpp"
#include "common/string_util.h" #include "common/string_util.h"
#include "common/util.h" #include "common/util.h"
#include "framework/common/debug/ge_log.h" #include "framework/common/debug/ge_log.h"
#include "ge/ge_api_error_codes.h" #include "ge/ge_api_error_codes.h"


using cce::CC_STATUS_SUCCESS;
using cce::ccStatus_t;

#if !defined(__ANDROID__) && !defined(ANDROID) #if !defined(__ANDROID__) && !defined(ANDROID)
#define DOMI_LOGE(...) GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, __VA_ARGS__) #define DOMI_LOGE(...) GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, __VA_ARGS__)
#else #else
@@ -99,13 +102,17 @@
} while (0); } while (0);


// If expr is not true, print the log and return the specified status // If expr is not true, print the log and return the specified status
#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \
do { \
bool b = (expr); \
if (!b) { \
GELOGE(_status, __VA_ARGS__); \
return _status; \
} \
#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \
do { \
bool b = (expr); \
if (!b) { \
std::string msg; \
(void)msg.append(ge::StringUtils::FormatString(__VA_ARGS__)); \
(void)msg.append( \
ge::StringUtils::FormatString(" Error Code:0x%X(%s)", _status, GET_ERRORNO_STR(_status).c_str())); \
DOMI_LOGE("%s", msg.c_str()); \
return _status; \
} \
} while (0); } while (0);


// If expr is not true, print the log and return the specified status // If expr is not true, print the log and return the specified status
@@ -125,7 +132,7 @@
DOMI_LOGE(__VA_ARGS__); \ DOMI_LOGE(__VA_ARGS__); \
exec_expr; \ exec_expr; \
} \ } \
}
};


// If expr is not true, print the log and execute a custom statement // If expr is not true, print the log and execute a custom statement
#define GE_CHK_BOOL_EXEC_WARN(expr, exec_expr, ...) \ #define GE_CHK_BOOL_EXEC_WARN(expr, exec_expr, ...) \
@@ -135,7 +142,7 @@
GELOGW(__VA_ARGS__); \ GELOGW(__VA_ARGS__); \
exec_expr; \ exec_expr; \
} \ } \
}
};
// If expr is not true, print the log and execute a custom statement // If expr is not true, print the log and execute a custom statement
#define GE_CHK_BOOL_EXEC_INFO(expr, exec_expr, ...) \ #define GE_CHK_BOOL_EXEC_INFO(expr, exec_expr, ...) \
{ \ { \
@@ -144,7 +151,7 @@
GELOGI(__VA_ARGS__); \ GELOGI(__VA_ARGS__); \
exec_expr; \ exec_expr; \
} \ } \
}
};


// If expr is not true, print the log and execute a custom statement // If expr is not true, print the log and execute a custom statement
#define GE_CHK_BOOL_TRUE_EXEC_INFO(expr, exec_expr, ...) \ #define GE_CHK_BOOL_TRUE_EXEC_INFO(expr, exec_expr, ...) \
@@ -154,7 +161,7 @@
GELOGI(__VA_ARGS__); \ GELOGI(__VA_ARGS__); \
exec_expr; \ exec_expr; \
} \ } \
}
};


// If expr is true, print logs and execute custom statements // If expr is true, print logs and execute custom statements
#define GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(expr, exec_expr, ...) \ #define GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(expr, exec_expr, ...) \
@@ -164,7 +171,7 @@
DOMI_LOGE(__VA_ARGS__); \ DOMI_LOGE(__VA_ARGS__); \
exec_expr; \ exec_expr; \
} \ } \
}
};
// If expr is true, print the Information log and execute a custom statement // If expr is true, print the Information log and execute a custom statement
#define GE_CHK_TRUE_EXEC_INFO(expr, exec_expr, ...) \ #define GE_CHK_TRUE_EXEC_INFO(expr, exec_expr, ...) \
{ \ { \
@@ -173,7 +180,7 @@
GELOGI(__VA_ARGS__); \ GELOGI(__VA_ARGS__); \
exec_expr; \ exec_expr; \
} \ } \
}
};


// If expr is not SUCCESS, print the log and execute the expression + return // If expr is not SUCCESS, print the log and execute the expression + return
#define GE_CHK_BOOL_TRUE_RET_VOID(expr, exec_expr, ...) \ #define GE_CHK_BOOL_TRUE_RET_VOID(expr, exec_expr, ...) \
@@ -184,7 +191,7 @@
exec_expr; \ exec_expr; \
return; \ return; \
} \ } \
}
};


// If expr is not SUCCESS, print the log and execute the expression + return _status // If expr is not SUCCESS, print the log and execute the expression + return _status
#define GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(expr, _status, exec_expr, ...) \ #define GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(expr, _status, exec_expr, ...) \
@@ -195,7 +202,7 @@
exec_expr; \ exec_expr; \
return _status; \ return _status; \
} \ } \
}
};


// If expr is not true, execute a custom statement // If expr is not true, execute a custom statement
#define GE_CHK_BOOL_EXEC_NOLOG(expr, exec_expr) \ #define GE_CHK_BOOL_EXEC_NOLOG(expr, exec_expr) \
@@ -204,7 +211,7 @@
if (!b) { \ if (!b) { \
exec_expr; \ exec_expr; \
} \ } \
}
};


// -----------------runtime related macro definitions------------------------------- // -----------------runtime related macro definitions-------------------------------
// If expr is not RT_ERROR_NONE, print the log // If expr is not RT_ERROR_NONE, print the log
@@ -224,7 +231,7 @@
DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \
exec_expr; \ exec_expr; \
} \ } \
}
};


// If expr is not RT_ERROR_NONE, print the log and return // If expr is not RT_ERROR_NONE, print the log and return
#define GE_CHK_RT_RET(expr) \ #define GE_CHK_RT_RET(expr) \
@@ -236,13 +243,23 @@
} \ } \
} while (0); } while (0);


// ------------------------cce related macro definitions----------------------------
// If expr is not CC_STATUS_SUCCESS, print the log
#define GE_CHK_CCE(expr) \
do { \
ccStatus_t _cc_ret = (expr); \
if (_cc_ret != CC_STATUS_SUCCESS) { \
DOMI_LOGE("Call cce api failed, ret: 0x%X", _cc_ret); \
} \
} while (0);

// If expr is true, execute exec_expr without printing logs // If expr is true, execute exec_expr without printing logs
#define GE_IF_BOOL_EXEC(expr, exec_expr) \ #define GE_IF_BOOL_EXEC(expr, exec_expr) \
{ \ { \
if (expr) { \ if (expr) { \
exec_expr; \ exec_expr; \
} \ } \
}
};


// If make_shared is abnormal, print the log and execute the statement // If make_shared is abnormal, print the log and execute the statement
#define GE_MAKE_SHARED(exec_expr0, exec_expr1) \ #define GE_MAKE_SHARED(exec_expr0, exec_expr1) \


+ 3
- 5
inc/framework/common/ge_types.h View File

@@ -54,9 +54,9 @@ const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM";
struct DataBuffer { struct DataBuffer {
public: public:
void *data; // Data address void *data; // Data address
uint64_t length; // Data length
uint32_t length; // Data length
bool isDataSupportMemShare = false; bool isDataSupportMemShare = false;
DataBuffer(void *dataIn, uint64_t len, bool isSupportMemShare)
DataBuffer(void *dataIn, uint32_t len, bool isSupportMemShare)
: data(dataIn), length(len), isDataSupportMemShare(isSupportMemShare) {} : data(dataIn), length(len), isDataSupportMemShare(isSupportMemShare) {}


DataBuffer() : data(nullptr), length(0), isDataSupportMemShare(false) {} DataBuffer() : data(nullptr), length(0), isDataSupportMemShare(false) {}
@@ -106,7 +106,7 @@ struct ShapeDescription {
// Definition of input and output description information // Definition of input and output description information
struct InputOutputDescInfo { struct InputOutputDescInfo {
std::string name; std::string name;
uint64_t size;
uint32_t size;
uint32_t data_type; uint32_t data_type;
ShapeDescription shape_info; ShapeDescription shape_info;
}; };
@@ -231,7 +231,6 @@ struct Options {


// Profiling info of task // Profiling info of task
struct TaskDescInfo { struct TaskDescInfo {
std::string model_name;
std::string op_name; std::string op_name;
uint32_t block_dim; uint32_t block_dim;
uint32_t task_id; uint32_t task_id;
@@ -240,7 +239,6 @@ struct TaskDescInfo {


// Profiling info of graph // Profiling info of graph
struct ComputeGraphDescInfo { struct ComputeGraphDescInfo {
std::string model_name;
std::string op_name; std::string op_name;
std::string op_type; std::string op_type;
std::vector<Format> input_format; std::vector<Format> input_format;


+ 2
- 0
inc/framework/common/helper/model_helper.h View File

@@ -44,6 +44,8 @@ class ModelHelper {
void SetSaveMode(bool val) { is_offline_ = val; } void SetSaveMode(bool val) { is_offline_ = val; }
bool GetSaveMode(void) const { return is_offline_; } bool GetSaveMode(void) const { return is_offline_; }


static Status TransModelToGeModel(const ModelPtr& model, GeModelPtr& ge_model);
static Status TransGeModelToModel(const GeModelPtr& geModelPtr, ModelPtr& modelPtr);
Status GetBaseNameFromFileName(const std::string& file_name, std::string& base_name); Status GetBaseNameFromFileName(const std::string& file_name, std::string& base_name);
Status GetModelNameFromMergedGraphName(const std::string& graph_name, std::string& model_name); Status GetModelNameFromMergedGraphName(const std::string& graph_name, std::string& model_name);




+ 0
- 7
inc/framework/common/types.h View File

@@ -48,9 +48,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_S
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_LAYER; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_LAYER;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_FILE_PATH; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_FILE_PATH;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_MODE; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_MODE;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_AICORE;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_ATOMIC;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_ALL;


// Supported public properties name // Supported public properties name
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROP_OME_START_TIME; // Start time FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROP_OME_START_TIME; // Start time
@@ -338,7 +335,6 @@ REGISTER_OPTYPE_DECLARE(BASICLSTMCELL, "BasicLSTMCell");
REGISTER_OPTYPE_DECLARE(GETNEXT, "GetNext"); REGISTER_OPTYPE_DECLARE(GETNEXT, "GetNext");
REGISTER_OPTYPE_DECLARE(INITDATA, "InitData"); REGISTER_OPTYPE_DECLARE(INITDATA, "InitData");
REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape") REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape")
REGISTER_OPTYPE_DECLARE(REFIDENTITY, "RefIdentity");


// ANN dedicated operator // ANN dedicated operator
REGISTER_OPTYPE_DECLARE(ANN_MEAN, "AnnMean"); REGISTER_OPTYPE_DECLARE(ANN_MEAN, "AnnMean");
@@ -635,9 +631,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_N


FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_END_GRAPH; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_END_GRAPH;


FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_OP_DEBUG;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_TYPE_OP_DEBUG;

// convolution node type // convolution node type
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_TYPE_CONVOLUTION; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_TYPE_CONVOLUTION;
// adds a convolutional node name for the hard AIPP // adds a convolutional node name for the hard AIPP


+ 1
- 1
inc/framework/executor/ge_executor.h View File

@@ -21,12 +21,12 @@
#include <string> #include <string>
#include <vector> #include <vector>


#include "common/dynamic_aipp.h"
#include "common/ge_inner_error_codes.h" #include "common/ge_inner_error_codes.h"
#include "common/ge_types.h" #include "common/ge_types.h"
#include "common/types.h" #include "common/types.h"
#include "graph/tensor.h" #include "graph/tensor.h"
#include "runtime/base.h" #include "runtime/base.h"
#include "common/dynamic_aipp.h"


namespace ge { namespace ge {
class ModelListenerAdapter; class ModelListenerAdapter;


+ 0
- 1
inc/framework/generator/ge_generator.h View File

@@ -27,7 +27,6 @@
#include "graph/ge_tensor.h" #include "graph/ge_tensor.h"
#include "graph/graph.h" #include "graph/graph.h"
#include "graph/op_desc.h" #include "graph/op_desc.h"
#include "graph/detail/attributes_holder.h"


namespace ge { namespace ge {
class GeGenerator { class GeGenerator {


+ 1
- 0
inc/framework/omg/omg.h View File

@@ -106,6 +106,7 @@ void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &ou
void UpdateOmgCtxWithParserCtx(); void UpdateOmgCtxWithParserCtx();


void UpdateParserCtxWithOmgCtx(); void UpdateParserCtxWithOmgCtx();

} // namespace ge } // namespace ge


namespace domi { namespace domi {


+ 1
- 9
inc/graph/compute_graph.h View File

@@ -74,9 +74,6 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A


size_t GetAllNodesSize() const; size_t GetAllNodesSize() const;
Vistor<NodePtr> GetAllNodes() const; Vistor<NodePtr> GetAllNodes() const;
// is_unknown_shape: false, same with GetAllNodes func
// is_unknown_shape: true, same with GetDirectNodes func
Vistor<NodePtr> GetNodes(bool is_unknown_shape) const;
size_t GetDirectNodesSize() const; size_t GetDirectNodesSize() const;
Vistor<NodePtr> GetDirectNode() const; Vistor<NodePtr> GetDirectNode() const;
Vistor<NodePtr> GetInputNodes() const; Vistor<NodePtr> GetInputNodes() const;
@@ -177,10 +174,6 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A
void SetInputSize(uint32_t size) { input_size_ = size; } void SetInputSize(uint32_t size) { input_size_ = size; }
uint32_t GetInputSize() const { return input_size_; } uint32_t GetInputSize() const { return input_size_; }


// false: known shape true: unknow shape
bool GetGraphUnknownFlag() const { return is_unknown_shape_graph_; }
void SetGraphUnknownFlag(bool flag) { is_unknown_shape_graph_ = flag; }

/// ///
/// Set is need train iteration. /// Set is need train iteration.
/// If set true, it means this graph need to be run iteration some /// If set true, it means this graph need to be run iteration some
@@ -289,8 +282,7 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A
std::map<uint32_t, std::string> op_name_map_; std::map<uint32_t, std::string> op_name_map_;
uint64_t session_id_ = 0; uint64_t session_id_ = 0;
ge::Format data_format_ = ge::FORMAT_ND; ge::Format data_format_ = ge::FORMAT_ND;
// unknown graph indicator, default is false, mean known shape
bool is_unknown_shape_graph_ = false;
}; };
} // namespace ge } // namespace ge

#endif // INC_GRAPH_COMPUTE_GRAPH_H_ #endif // INC_GRAPH_COMPUTE_GRAPH_H_

+ 1
- 20
inc/graph/debug/ge_attr_define.h View File

@@ -778,10 +778,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MOD


GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_CORE_TYPE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_CORE_TYPE;


GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_ATC_VERSION;

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_OPP_VERSION;

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_SCALE_MODE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_SCALE_MODE;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_SCALE_VALUE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_SCALE_VALUE;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_SCALE_OFFSET; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string QUANTIZE_SCALE_OFFSET;
@@ -1000,7 +996,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_DUMP_ORIGIN_FORMAT; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_DUMP_ORIGIN_FORMAT;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_DUMP_ORIGIN_DATA_TYPE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_DUMP_ORIGIN_DATA_TYPE;


// used for lX fusion
// used for l1 fusion and other fusion in future
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_L1_FUSION_GROUP_ID; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_L1_FUSION_GROUP_ID;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_L1_FUSION_GROUP_KEY; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_L1_FUSION_GROUP_KEY;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSION_GROUP_KEY; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSION_GROUP_KEY;
@@ -1014,17 +1010,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_FOR_L1_FUSION; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_FOR_L1_FUSION;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_N_BATCH_SPILT; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_N_BATCH_SPILT;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NO_TASK_AND_DUMP_NEEDED; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NO_TASK_AND_DUMP_NEEDED;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DATA_DUMP_REF;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_L2_FUSION_GROUP_ID; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_L2_FUSION_GROUP_ID;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_FOR_L2_FUSION; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_FOR_L2_FUSION;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_FLAG;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_ADDR;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_VALID_SIZE;

// op overflow dump
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE;


// functional ops attr // functional ops attr
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_THEN_BRANCH; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_THEN_BRANCH;
@@ -1070,13 +1058,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_HOR
// for gradient group // for gradient group
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_HCCL_FUSED_GROUP; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_HCCL_FUSED_GROUP;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_HCCL_FUSED_FLAG; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_HCCL_FUSED_FLAG;

// dynamic shape attrs
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX;

// for fusion op plugin
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE;
} // namespace ge } // namespace ge


#endif // INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_ #endif // INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_

+ 1
- 0
inc/graph/detail/attributes_holder.h View File

@@ -149,4 +149,5 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY AttrHolder {
AnyMap extAttrs_; AnyMap extAttrs_;
}; };
} // namespace ge } // namespace ge

#endif // INC_GRAPH_DETAIL_ATTRIBUTES_HOLDER_H_ #endif // INC_GRAPH_DETAIL_ATTRIBUTES_HOLDER_H_

+ 0
- 1
inc/graph/ge_context.h View File

@@ -28,7 +28,6 @@ class GEContext {
uint32_t DeviceId(); uint32_t DeviceId();
uint64_t TraceId(); uint64_t TraceId();
void Init(); void Init();
void SetSessionId(uint64_t session_id);
void SetCtxDeviceId(uint32_t device_id); void SetCtxDeviceId(uint32_t device_id);


private: private:


+ 2
- 5
inc/graph/ge_tensor.h View File

@@ -25,7 +25,6 @@
#include "graph/buffer.h" #include "graph/buffer.h"
#include "graph/ge_error_codes.h" #include "graph/ge_error_codes.h"
#include "graph/types.h" #include "graph/types.h"

namespace ge { namespace ge {
class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeShape { class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeShape {
public: public:
@@ -109,11 +108,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDesc : public AttrH
DataType GetDataType() const; DataType GetDataType() const;
void SetDataType(DataType dt); void SetDataType(DataType dt);


DataType GetOriginDataType() const;
void SetOriginDataType(DataType originDataType); void SetOriginDataType(DataType originDataType);

std::vector<uint32_t> GetRefPortIndex() const;
void SetRefPortByIndex(const std::vector<uint32_t> &index);
DataType GetOriginDataType() const;


GeTensorDesc Clone() const; GeTensorDesc Clone() const;
GeTensorDesc &operator=(const GeTensorDesc &desc); GeTensorDesc &operator=(const GeTensorDesc &desc);
@@ -190,4 +186,5 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensor {
GeTensorDesc &DescReference() const; GeTensorDesc &DescReference() const;
}; };
} // namespace ge } // namespace ge

#endif // INC_GRAPH_GE_TENSOR_H_ #endif // INC_GRAPH_GE_TENSOR_H_

+ 1
- 0
inc/graph/model_serialize.h View File

@@ -49,4 +49,5 @@ class ModelSerialize {
friend class GraphDebugImp; friend class GraphDebugImp;
}; };
} // namespace ge } // namespace ge

#endif // INC_GRAPH_MODEL_SERIALIZE_H_ #endif // INC_GRAPH_MODEL_SERIALIZE_H_

+ 0
- 4
inc/graph/op_desc.h View File

@@ -105,8 +105,6 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder {


GeTensorDescPtr MutableInputDesc(uint32_t index) const; GeTensorDescPtr MutableInputDesc(uint32_t index) const;


GeTensorDescPtr MutableInputDesc(const string &name) const;

Vistor<GeTensorDesc> GetAllInputsDesc() const; Vistor<GeTensorDesc> GetAllInputsDesc() const;


Vistor<GeTensorDescPtr> GetAllInputsDescPtr() const; Vistor<GeTensorDescPtr> GetAllInputsDescPtr() const;
@@ -129,8 +127,6 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder {


GeTensorDescPtr MutableOutputDesc(uint32_t index) const; GeTensorDescPtr MutableOutputDesc(uint32_t index) const;


GeTensorDescPtr MutableOutputDesc(const string &name) const;

uint32_t GetAllOutputsDescSize() const; uint32_t GetAllOutputsDescSize() const;


Vistor<GeTensorDesc> GetAllOutputsDesc() const; Vistor<GeTensorDesc> GetAllOutputsDesc() const;


+ 2
- 5
src/ge/CMakeLists.txt View File

@@ -60,7 +60,6 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"common/formats/formats.cc" "common/formats/formats.cc"
"common/formats/utils/formats_trans_utils.cc" "common/formats/utils/formats_trans_utils.cc"
"common/fp16_t.cc" "common/fp16_t.cc"
"common/ge/op_tiling_manager.cc"
"common/ge/plugin_manager.cc" "common/ge/plugin_manager.cc"
"common/helper/model_cache_helper.cc" "common/helper/model_cache_helper.cc"
"common/profiling/profiling_manager.cc" "common/profiling/profiling_manager.cc"
@@ -95,6 +94,7 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
"graph/load/new_model_manager/task_info/task_info.cc" "graph/load/new_model_manager/task_info/task_info.cc"
"graph/load/output/output.cc"
"graph/manager/*.cc" "graph/manager/*.cc"
"graph/manager/model_manager/event_manager.cc" "graph/manager/model_manager/event_manager.cc"
"graph/manager/util/debug.cc" "graph/manager/util/debug.cc"
@@ -159,11 +159,8 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"hybrid/node_executor/aicpu/aicpu_ext_info.cc" "hybrid/node_executor/aicpu/aicpu_ext_info.cc"
"hybrid/node_executor/aicpu/aicpu_node_executor.cc" "hybrid/node_executor/aicpu/aicpu_node_executor.cc"
"hybrid/node_executor/compiledsubgraph/known_node_executor.cc" "hybrid/node_executor/compiledsubgraph/known_node_executor.cc"
"hybrid/node_executor/controlop/control_op_executor.cc"
"hybrid/node_executor/hccl/hccl_node_executor.cc"
"hybrid/node_executor/hostcpu/ge_local_node_executor.cc" "hybrid/node_executor/hostcpu/ge_local_node_executor.cc"
"hybrid/node_executor/node_executor.cc" "hybrid/node_executor/node_executor.cc"
"hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
"hybrid/node_executor/task_context.cc" "hybrid/node_executor/task_context.cc"
"init/gelib.cc" "init/gelib.cc"
"model/ge_model.cc" "model/ge_model.cc"
@@ -207,7 +204,6 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"common/formats/formats.cc" "common/formats/formats.cc"
"common/formats/utils/formats_trans_utils.cc" "common/formats/utils/formats_trans_utils.cc"
"common/fp16_t.cc" "common/fp16_t.cc"
"common/ge/op_tiling_manager.cc"
"common/ge/plugin_manager.cc" "common/ge/plugin_manager.cc"
"common/helper/model_cache_helper.cc" "common/helper/model_cache_helper.cc"
"common/profiling/profiling_manager.cc" "common/profiling/profiling_manager.cc"
@@ -240,6 +236,7 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
"graph/load/new_model_manager/task_info/task_info.cc" "graph/load/new_model_manager/task_info/task_info.cc"
"graph/load/output/output.cc"
"graph/manager/*.cc" "graph/manager/*.cc"
"graph/manager/model_manager/event_manager.cc" "graph/manager/model_manager/event_manager.cc"
"graph/manager/util/debug.cc" "graph/manager/util/debug.cc"


+ 43
- 14
src/ge/client/ge_api.cc View File

@@ -28,7 +28,6 @@
#include "graph/opsproto_manager.h" #include "graph/opsproto_manager.h"
#include "graph/utils/type_utils.h" #include "graph/utils/type_utils.h"
#include "graph/manager/util/rt_context_util.h" #include "graph/manager/util/rt_context_util.h"
#include "graph/common/ge_call_wrapper.h"
#include "register/op_registry.h" #include "register/op_registry.h"
#include "common/ge/tbe_plugin_manager.h" #include "common/ge/tbe_plugin_manager.h"


@@ -42,8 +41,8 @@ namespace {
const int32_t kMaxStrLen = 128; const int32_t kMaxStrLen = 128;
} }


static bool g_ge_initialized = false;
static std::mutex g_ge_release_mutex; // GEFinalize and ~Session use
static bool kGeInitialized = false;
static std::mutex kGeReleaseMutex; // GEFinalize and ~Session use


namespace ge { namespace ge {
void GetOpsProtoPath(std::string &opsproto_path) { void GetOpsProtoPath(std::string &opsproto_path) {
@@ -62,6 +61,31 @@ void GetOpsProtoPath(std::string &opsproto_path) {
opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
} }


Status CheckDumpAndReuseMemory(const std::map<string, string> &options) {
const int kDecimal = 10;
auto dump_op_env = std::getenv("DUMP_OP");
int dump_op_flag = (dump_op_env != nullptr) ? std::strtol(dump_op_env, nullptr, kDecimal) : 0;
auto disableReuseMemoryIter = options.find("ge.exec.disableReuseMemory");
if (disableReuseMemoryIter != options.end()) {
if (disableReuseMemoryIter->second == "0") {
GELOGD("ge.exec.disableReuseMemory=0, reuse memory is open");
if (dump_op_flag) {
GELOGW("Will dump incorrect op data with GE Option ge.exec.disableReuseMemory=0");
}
} else if (disableReuseMemoryIter->second == "1") {
GELOGD("ge.exec.disableReuseMemory=1, reuse memory is close");
} else {
GELOGE(PARAM_INVALID, "CheckDumpAndReuseMemory ge.exec.disableReuseMemory is valid");
return FAILED;
}
} else {
if (dump_op_flag) {
GELOGW("Will dump incorrect op data with default reuse memory");
}
}
return SUCCESS;
}

Status CheckOptionsValid(const std::map<string, string> &options) { Status CheckOptionsValid(const std::map<string, string> &options) {
// check job_id is valid // check job_id is valid
auto job_id_iter = options.find(OPTION_EXEC_JOB_ID); auto job_id_iter = options.find(OPTION_EXEC_JOB_ID);
@@ -72,6 +96,11 @@ Status CheckOptionsValid(const std::map<string, string> &options) {
} }
} }


// Check ge.exec.disableReuseMemory and env DUMP_OP
if (CheckDumpAndReuseMemory(options) != SUCCESS) {
return FAILED;
}

return SUCCESS; return SUCCESS;
} }


@@ -79,7 +108,7 @@ Status CheckOptionsValid(const std::map<string, string> &options) {
Status GEInitialize(const std::map<string, string> &options) { Status GEInitialize(const std::map<string, string> &options) {
GELOGT(TRACE_INIT, "GEInitialize start"); GELOGT(TRACE_INIT, "GEInitialize start");
// 0.check init status // 0.check init status
if (g_ge_initialized) {
if (kGeInitialized) {
GELOGW("GEInitialize is called more than once"); GELOGW("GEInitialize is called more than once");
return SUCCESS; return SUCCESS;
} }
@@ -118,9 +147,9 @@ Status GEInitialize(const std::map<string, string> &options) {
} }


// 7.check return status, return // 7.check return status, return
if (!g_ge_initialized) {
if (!kGeInitialized) {
// Initialize success, first time calling initialize // Initialize success, first time calling initialize
g_ge_initialized = true;
kGeInitialized = true;
} }


GELOGT(TRACE_STOP, "GEInitialize finished"); GELOGT(TRACE_STOP, "GEInitialize finished");
@@ -131,12 +160,12 @@ Status GEInitialize(const std::map<string, string> &options) {
Status GEFinalize() { Status GEFinalize() {
GELOGT(TRACE_INIT, "GEFinalize start"); GELOGT(TRACE_INIT, "GEFinalize start");
// check init status // check init status
if (!g_ge_initialized) {
if (!kGeInitialized) {
GELOGW("GEFinalize is called before GEInitialize"); GELOGW("GEFinalize is called before GEInitialize");
return SUCCESS; return SUCCESS;
} }


std::lock_guard<std::mutex> lock(g_ge_release_mutex);
std::lock_guard<std::mutex> lock(kGeReleaseMutex);
// call Finalize // call Finalize
Status ret = SUCCESS; Status ret = SUCCESS;
Status middle_ret; Status middle_ret;
@@ -158,10 +187,10 @@ Status GEFinalize() {
ret = middle_ret; ret = middle_ret;
} }


if (g_ge_initialized && ret == SUCCESS) {
if (kGeInitialized && ret == SUCCESS) {
// Unified destruct rt_context // Unified destruct rt_context
RtContextUtil::GetInstance().DestroyAllRtContexts();
g_ge_initialized = false;
RtContextUtil::GetInstance().DestroyrtContexts();
kGeInitialized = false;
} }


GELOGT(TRACE_STOP, "GEFinalize finished"); GELOGT(TRACE_STOP, "GEFinalize finished");
@@ -173,7 +202,7 @@ Session::Session(const std::map<string, string> &options) {
GELOGT(TRACE_INIT, "Session Constructor start"); GELOGT(TRACE_INIT, "Session Constructor start");
// check init status // check init status
sessionId_ = 0; sessionId_ = 0;
if (!g_ge_initialized) {
if (!kGeInitialized) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED); GELOGE(GE_CLI_GE_NOT_INITIALIZED);
return; return;
} }
@@ -203,13 +232,13 @@ Session::Session(const std::map<string, string> &options) {
Session::~Session() { Session::~Session() {
GELOGT(TRACE_INIT, "Session Destructor start"); GELOGT(TRACE_INIT, "Session Destructor start");
// 0.check init status // 0.check init status
if (!g_ge_initialized) {
if (!kGeInitialized) {
GELOGW("GE is not yet initialized or is finalized."); GELOGW("GE is not yet initialized or is finalized.");
return; return;
} }


Status ret = FAILED; Status ret = FAILED;
std::lock_guard<std::mutex> lock(g_ge_release_mutex);
std::lock_guard<std::mutex> lock(kGeReleaseMutex);
try { try {
uint64_t session_id = sessionId_; uint64_t session_id = sessionId_;
// call DestroySession // call DestroySession


+ 1
- 21
src/ge/engine_manager/dnnengine_manager.cc View File

@@ -24,7 +24,6 @@


#include "common/debug/log.h" #include "common/debug/log.h"
#include "common/ge/ge_util.h" #include "common/ge/ge_util.h"
#include "common/util/error_manager/error_manager.h"
#include "framework/common/debug/ge_log.h" #include "framework/common/debug/ge_log.h"
#include "graph/ge_context.h" #include "graph/ge_context.h"
#include "init/gelib.h" #include "init/gelib.h"
@@ -162,10 +161,6 @@ bool DNNEngineManager::IsEngineRegistered(const std::string &name) {
return false; return false;
} }


void DNNEngineManager::InitPerformanceStaistic() { checksupport_cost_.clear(); }

const map<string, uint64_t> &DNNEngineManager::GetCheckSupportCost() const { return checksupport_cost_; }

std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) {
GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(GE_CLI_GE_NOT_INITIALIZED, "DNNEngineManager: op_desc is nullptr"); GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(GE_CLI_GE_NOT_INITIALIZED, "DNNEngineManager: op_desc is nullptr");
return ""); return "");
@@ -199,20 +194,15 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) {
if (kernel_info_store != kernel_map.end()) { if (kernel_info_store != kernel_map.end()) {
std::string unsupported_reason; std::string unsupported_reason;
// It will be replaced by engine' checksupport // It will be replaced by engine' checksupport
uint64_t start_time = GetCurrentTimestap();
if (kernel_info_store->second->CheckSupported(op_desc, unsupported_reason)) { if (kernel_info_store->second->CheckSupported(op_desc, unsupported_reason)) {
checksupport_cost_[kernel_name] += GetCurrentTimestap() - start_time;
op_desc->SetOpEngineName(it.engine); op_desc->SetOpEngineName(it.engine);
op_desc->SetOpKernelLibName(kernel_name); op_desc->SetOpKernelLibName(kernel_name);
GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s into op_desc %s", kernel_name.c_str(), GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s into op_desc %s", kernel_name.c_str(),
it.engine.c_str(), op_desc->GetName().c_str()); it.engine.c_str(), op_desc->GetName().c_str());
return it.engine; return it.engine;
} else { } else {
checksupport_cost_[kernel_name] += GetCurrentTimestap() - start_time;
bool is_custom_op = false; bool is_custom_op = false;
if ((ge::AttrUtils::GetBool(op_desc, kCustomOpFlag, is_custom_op)) && is_custom_op) { if ((ge::AttrUtils::GetBool(op_desc, kCustomOpFlag, is_custom_op)) && is_custom_op) {
ErrorManager::GetInstance().ATCReportErrMessage("E13001", {"kernelname", "optype", "opname"},
{kernel_name, op_desc->GetType(), op_desc->GetName()});
GELOGE(FAILED, GELOGE(FAILED,
"The custom operator registered by the user does not support the logic function delivered by this " "The custom operator registered by the user does not support the logic function delivered by this "
"network. Check support failed, kernel_name is %s, op type is %s, op name is %s", "network. Check support failed, kernel_name is %s, op type is %s, op name is %s",
@@ -231,13 +221,9 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) {
} }
} }
for (const auto &it : unsupported_reasons) { for (const auto &it : unsupported_reasons) {
ErrorManager::GetInstance().ATCReportErrMessage("E13002", {"optype", "opskernel", "reason"},
{op_desc->GetType(), it.first, it.second});
GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "GetDNNEngineName:Op type %s of ops kernel %s is unsupported, reason:%s", GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "GetDNNEngineName:Op type %s of ops kernel %s is unsupported, reason:%s",
op_desc->GetType().c_str(), it.first.c_str(), it.second.c_str()); op_desc->GetType().c_str(), it.first.c_str(), it.second.c_str());
} }
ErrorManager::GetInstance().ATCReportErrMessage("E13003", {"opname", "optype"},
{op_desc->GetName(), op_desc->GetType()});
GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "Can't find any supported ops kernel and engine of %s, type is %s", GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "Can't find any supported ops kernel and engine of %s, type is %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str()); op_desc->GetName().c_str(), op_desc->GetType().c_str());
return ""; return "";
@@ -398,13 +384,7 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h
return FAILED; return FAILED;
} }


try {
ifs >> *json_file;
} catch (const json::exception &e) {
GELOGE(FAILED, "Read json file failed");
ifs.close();
return FAILED;
}
ifs >> *json_file;
ifs.close(); ifs.close();
GELOGI("Read json file success"); GELOGI("Read json file success");
return SUCCESS; return SUCCESS;


+ 0
- 3
src/ge/engine_manager/dnnengine_manager.h View File

@@ -63,8 +63,6 @@ class DNNEngineManager {
// If can't find appropriate engine name, return "", report error // If can't find appropriate engine name, return "", report error
string GetDNNEngineName(const OpDescPtr &op_desc); string GetDNNEngineName(const OpDescPtr &op_desc);
const map<string, SchedulerConf> &GetSchedulers() const; const map<string, SchedulerConf> &GetSchedulers() const;
const map<string, uint64_t> &GetCheckSupportCost() const;
void InitPerformanceStaistic();


private: private:
DNNEngineManager(); DNNEngineManager();
@@ -80,7 +78,6 @@ class DNNEngineManager {
std::map<std::string, DNNEnginePtr> engines_map_; std::map<std::string, DNNEnginePtr> engines_map_;
std::map<std::string, ge::DNNEngineAttribute> engines_attrs_map_; std::map<std::string, ge::DNNEngineAttribute> engines_attrs_map_;
std::map<string, SchedulerConf> schedulers_; std::map<string, SchedulerConf> schedulers_;
std::map<string, uint64_t> checksupport_cost_;
bool init_flag_; bool init_flag_;
}; };
} // namespace ge } // namespace ge


+ 1
- 1
src/ge/executor/CMakeLists.txt View File

@@ -26,7 +26,6 @@ file(GLOB PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}


file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"ge_executor.cc" "ge_executor.cc"
"../common/ge/op_tiling_manager.cc"
"../common/ge/plugin_manager.cc" "../common/ge/plugin_manager.cc"
"../common/profiling/profiling_manager.cc" "../common/profiling/profiling_manager.cc"
"../graph/execute/graph_execute.cc" "../graph/execute/graph_execute.cc"
@@ -60,6 +59,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"../graph/load/new_model_manager/task_info/task_info.cc" "../graph/load/new_model_manager/task_info/task_info.cc"
"../graph/load/new_model_manager/tbe_handle_store.cc" "../graph/load/new_model_manager/tbe_handle_store.cc"
"../graph/load/new_model_manager/zero_copy_task.cc" "../graph/load/new_model_manager/zero_copy_task.cc"
"../graph/load/output/output.cc"
"../graph/manager/graph_caching_allocator.cc" "../graph/manager/graph_caching_allocator.cc"
"../graph/manager/graph_manager_utils.cc" "../graph/manager/graph_manager_utils.cc"
"../graph/manager/graph_mem_allocator.cc" "../graph/manager/graph_mem_allocator.cc"


+ 1
- 0
src/ge/executor/ge_executor.cc View File

@@ -854,4 +854,5 @@ Status GeExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index,
GELOGI("GetAllAippInputOutputDims succ."); GELOGI("GetAllAippInputOutputDims succ.");
return SUCCESS; return SUCCESS;
} }

} // namespace ge } // namespace ge

+ 1
- 2
src/ge/executor/module.mk View File

@@ -4,7 +4,6 @@ local_ge_executor_src_files := \
ge_executor.cc \ ge_executor.cc \
../common/profiling/profiling_manager.cc \ ../common/profiling/profiling_manager.cc \
../common/ge/plugin_manager.cc \ ../common/ge/plugin_manager.cc \
../common/ge/op_tiling_manager.cc \
../graph/load/graph_loader.cc \ ../graph/load/graph_loader.cc \
../graph/execute/graph_execute.cc \ ../graph/execute/graph_execute.cc \
../omm/csa_interact.cc \ ../omm/csa_interact.cc \
@@ -45,6 +44,7 @@ local_ge_executor_src_files := \
../graph/load/new_model_manager/task_info/end_graph_task_info.cc \ ../graph/load/new_model_manager/task_info/end_graph_task_info.cc \
../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \
../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \
../graph/load/output/output.cc \
../single_op/single_op_manager.cc \ ../single_op/single_op_manager.cc \
../single_op/single_op_model.cc \ ../single_op/single_op_model.cc \
../single_op/single_op.cc \ ../single_op/single_op.cc \
@@ -53,7 +53,6 @@ local_ge_executor_src_files := \
../single_op/task/build_task_utils.cc \ ../single_op/task/build_task_utils.cc \
../single_op/task/tbe_task_builder.cc \ ../single_op/task/tbe_task_builder.cc \
../single_op/task/aicpu_task_builder.cc \ ../single_op/task/aicpu_task_builder.cc \
../single_op/task/aicpu_kernel_task_builder.cc \
../hybrid/hybrid_davinci_model_stub.cc\ ../hybrid/hybrid_davinci_model_stub.cc\


local_ge_executor_c_include := \ local_ge_executor_c_include := \


+ 4
- 8
src/ge/ge_inference.mk View File

@@ -32,7 +32,6 @@ COMMON_LOCAL_SRC_FILES := \


GRAPH_MANAGER_LOCAL_SRC_FILES := \ GRAPH_MANAGER_LOCAL_SRC_FILES := \
common/ge/plugin_manager.cc\ common/ge/plugin_manager.cc\
common/ge/op_tiling_manager.cc\
init/gelib.cc \ init/gelib.cc \
session/inner_session.cc \ session/inner_session.cc \
session/session_manager.cc \ session/session_manager.cc \
@@ -92,7 +91,6 @@ OMG_HOST_SRC_FILES := \
graph/passes/no_use_reshape_remove_pass.cc \ graph/passes/no_use_reshape_remove_pass.cc \
graph/passes/iterator_op_pass.cc \ graph/passes/iterator_op_pass.cc \
graph/passes/atomic_addr_clean_pass.cc \ graph/passes/atomic_addr_clean_pass.cc \
graph/passes/mark_same_addr_pass.cc \
graph/common/omg_util.cc \ graph/common/omg_util.cc \
graph/common/bcast.cc \ graph/common/bcast.cc \
graph/passes/dimension_compute_pass.cc \ graph/passes/dimension_compute_pass.cc \
@@ -147,7 +145,6 @@ OMG_HOST_SRC_FILES := \
graph/passes/stop_gradient_pass.cc \ graph/passes/stop_gradient_pass.cc \
graph/passes/prevent_gradient_pass.cc \ graph/passes/prevent_gradient_pass.cc \
graph/passes/identity_pass.cc \ graph/passes/identity_pass.cc \
graph/passes/ref_identity_delete_op_pass.cc \
graph/passes/placeholder_with_default_pass.cc \ graph/passes/placeholder_with_default_pass.cc \
graph/passes/snapshot_pass.cc \ graph/passes/snapshot_pass.cc \
graph/passes/guarantee_const_pass.cc \ graph/passes/guarantee_const_pass.cc \
@@ -156,9 +153,7 @@ OMG_HOST_SRC_FILES := \
graph/passes/folding_pass.cc \ graph/passes/folding_pass.cc \
graph/passes/cast_translate_pass.cc \ graph/passes/cast_translate_pass.cc \
graph/passes/prune_pass.cc \ graph/passes/prune_pass.cc \
graph/passes/merge_to_stream_merge_pass.cc \
graph/passes/switch_to_stream_switch_pass.cc \
graph/passes/attach_stream_label_pass.cc \
graph/passes/switch_op_pass.cc \
graph/passes/multi_batch_pass.cc \ graph/passes/multi_batch_pass.cc \
graph/passes/next_iteration_pass.cc \ graph/passes/next_iteration_pass.cc \
graph/passes/control_trigger_pass.cc \ graph/passes/control_trigger_pass.cc \
@@ -178,6 +173,7 @@ OMG_HOST_SRC_FILES := \
graph/passes/variable_op_pass.cc \ graph/passes/variable_op_pass.cc \
graph/passes/cast_remove_pass.cc \ graph/passes/cast_remove_pass.cc \
graph/passes/transpose_transdata_pass.cc \ graph/passes/transpose_transdata_pass.cc \
graph/passes/identify_reference_pass.cc \
graph/passes/hccl_memcpy_pass.cc \ graph/passes/hccl_memcpy_pass.cc \
graph/passes/flow_ctrl_pass.cc \ graph/passes/flow_ctrl_pass.cc \
graph/passes/link_gen_mask_nodes_pass.cc \ graph/passes/link_gen_mask_nodes_pass.cc \
@@ -203,6 +199,7 @@ OME_HOST_SRC_FILES := \
graph/load/new_model_manager/tbe_handle_store.cc \ graph/load/new_model_manager/tbe_handle_store.cc \
graph/load/new_model_manager/cpu_queue_schedule.cc \ graph/load/new_model_manager/cpu_queue_schedule.cc \
graph/load/new_model_manager/zero_copy_task.cc \ graph/load/new_model_manager/zero_copy_task.cc \
graph/load/output/output.cc \
graph/load/new_model_manager/data_dumper.cc \ graph/load/new_model_manager/data_dumper.cc \
graph/load/new_model_manager/task_info/task_info.cc \ graph/load/new_model_manager/task_info/task_info.cc \
graph/load/new_model_manager/task_info/event_record_task_info.cc \ graph/load/new_model_manager/task_info/event_record_task_info.cc \
@@ -227,7 +224,6 @@ OME_HOST_SRC_FILES := \
single_op/task/build_task_utils.cc \ single_op/task/build_task_utils.cc \
single_op/task/tbe_task_builder.cc \ single_op/task/tbe_task_builder.cc \
single_op/task/aicpu_task_builder.cc \ single_op/task/aicpu_task_builder.cc \
single_op/task/aicpu_kernel_task_builder.cc \
single_op/single_op.cc \ single_op/single_op.cc \
single_op/single_op_model.cc \ single_op/single_op_model.cc \
single_op/stream_resource.cc \ single_op/stream_resource.cc \
@@ -372,7 +368,7 @@ endif


LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)


LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_ir_build.cc
LOCAL_SRC_FILES := ../../out/atc/lib64/stub/ge_ir_build.cc




LOCAL_SHARED_LIBRARIES := LOCAL_SHARED_LIBRARIES :=


+ 3
- 36
src/ge/ge_runner.mk View File

@@ -23,7 +23,6 @@ LIBGE_LOCAL_SRC_FILES := \
common/formats/utils/formats_trans_utils.cc \ common/formats/utils/formats_trans_utils.cc \
common/fp16_t.cc \ common/fp16_t.cc \
common/ge/plugin_manager.cc\ common/ge/plugin_manager.cc\
common/ge/op_tiling_manager.cc\
common/helper/model_cache_helper.cc \ common/helper/model_cache_helper.cc \
common/profiling/profiling_manager.cc \ common/profiling/profiling_manager.cc \
engine_manager/dnnengine_manager.cc \ engine_manager/dnnengine_manager.cc \
@@ -78,6 +77,7 @@ LIBGE_LOCAL_SRC_FILES := \
graph/load/new_model_manager/task_info/task_info.cc \ graph/load/new_model_manager/task_info/task_info.cc \
graph/load/new_model_manager/tbe_handle_store.cc \ graph/load/new_model_manager/tbe_handle_store.cc \
graph/load/new_model_manager/zero_copy_task.cc \ graph/load/new_model_manager/zero_copy_task.cc \
graph/load/output/output.cc \
graph/manager/graph_context.cc \ graph/manager/graph_context.cc \
graph/manager/graph_manager.cc \ graph/manager/graph_manager.cc \
graph/manager/graph_manager_utils.cc \ graph/manager/graph_manager_utils.cc \
@@ -99,7 +99,6 @@ LIBGE_LOCAL_SRC_FILES := \
graph/passes/aicpu_constant_folding_pass.cc \ graph/passes/aicpu_constant_folding_pass.cc \
graph/passes/assert_pass.cc \ graph/passes/assert_pass.cc \
graph/passes/atomic_addr_clean_pass.cc \ graph/passes/atomic_addr_clean_pass.cc \
graph/passes/mark_same_addr_pass.cc \
graph/partition/dynamic_shape_partition.cc \ graph/partition/dynamic_shape_partition.cc \
graph/passes/base_pass.cc \ graph/passes/base_pass.cc \
graph/passes/cast_remove_pass.cc \ graph/passes/cast_remove_pass.cc \
@@ -159,8 +158,8 @@ LIBGE_LOCAL_SRC_FILES := \
graph/passes/get_original_format_pass.cc \ graph/passes/get_original_format_pass.cc \
graph/passes/guarantee_const_pass.cc \ graph/passes/guarantee_const_pass.cc \
graph/passes/hccl_memcpy_pass.cc \ graph/passes/hccl_memcpy_pass.cc \
graph/passes/identify_reference_pass.cc \
graph/passes/identity_pass.cc \ graph/passes/identity_pass.cc \
graph/passes/ref_identity_delete_op_pass.cc \
graph/passes/infershape_pass.cc \ graph/passes/infershape_pass.cc \
graph/passes/isolated_op_remove_pass.cc \ graph/passes/isolated_op_remove_pass.cc \
graph/passes/iterator_op_pass.cc \ graph/passes/iterator_op_pass.cc \
@@ -192,9 +191,7 @@ LIBGE_LOCAL_SRC_FILES := \
graph/passes/data_pass.cc \ graph/passes/data_pass.cc \
graph/passes/switch_data_edges_bypass.cc \ graph/passes/switch_data_edges_bypass.cc \
graph/passes/switch_logic_remove_pass.cc \ graph/passes/switch_logic_remove_pass.cc \
graph/passes/merge_to_stream_merge_pass.cc \
graph/passes/switch_to_stream_switch_pass.cc \
graph/passes/attach_stream_label_pass.cc \
graph/passes/switch_op_pass.cc \
graph/passes/switch_dead_branch_elimination.cc \ graph/passes/switch_dead_branch_elimination.cc \
graph/passes/replace_transshape_pass.cc \ graph/passes/replace_transshape_pass.cc \
graph/passes/transop_breadth_fusion_pass.cc \ graph/passes/transop_breadth_fusion_pass.cc \
@@ -233,7 +230,6 @@ LIBGE_LOCAL_SRC_FILES := \
single_op/task/op_task.cc \ single_op/task/op_task.cc \
single_op/task/tbe_task_builder.cc \ single_op/task/tbe_task_builder.cc \
single_op/task/aicpu_task_builder.cc \ single_op/task/aicpu_task_builder.cc \
single_op/task/aicpu_kernel_task_builder.cc \
hybrid/common/tensor_value.cc \ hybrid/common/tensor_value.cc \
hybrid/common/npu_memory_allocator.cc \ hybrid/common/npu_memory_allocator.cc \
hybrid/executor/rt_callback_manager.cc \ hybrid/executor/rt_callback_manager.cc \
@@ -243,15 +239,12 @@ LIBGE_LOCAL_SRC_FILES := \
hybrid/executor/hybrid_model_executor.cc \ hybrid/executor/hybrid_model_executor.cc \
hybrid/executor/hybrid_model_async_executor.cc \ hybrid/executor/hybrid_model_async_executor.cc \
hybrid/executor/hybrid_execution_context.cc \ hybrid/executor/hybrid_execution_context.cc \
hybrid/executor/subgraph_context.cc \
hybrid/executor/subgraph_executor.cc \
hybrid/executor/worker/task_compile_engine.cc \ hybrid/executor/worker/task_compile_engine.cc \
hybrid/executor/worker/shape_inference_engine.cc \ hybrid/executor/worker/shape_inference_engine.cc \
hybrid/executor/worker/execution_engine.cc \ hybrid/executor/worker/execution_engine.cc \
hybrid/model/hybrid_model.cc \ hybrid/model/hybrid_model.cc \
hybrid/model/hybrid_model_builder.cc \ hybrid/model/hybrid_model_builder.cc \
hybrid/model/node_item.cc \ hybrid/model/node_item.cc \
hybrid/model/graph_item.cc \
hybrid/node_executor/aicore/aicore_node_executor.cc \ hybrid/node_executor/aicore/aicore_node_executor.cc \
hybrid/node_executor/aicore/aicore_op_task.cc \ hybrid/node_executor/aicore/aicore_op_task.cc \
hybrid/node_executor/aicore/aicore_task_builder.cc \ hybrid/node_executor/aicore/aicore_task_builder.cc \
@@ -260,9 +253,6 @@ LIBGE_LOCAL_SRC_FILES := \
hybrid/node_executor/aicpu/aicpu_node_executor.cc \ hybrid/node_executor/aicpu/aicpu_node_executor.cc \
hybrid/node_executor/compiledsubgraph/known_node_executor.cc \ hybrid/node_executor/compiledsubgraph/known_node_executor.cc \
hybrid/node_executor/hostcpu/ge_local_node_executor.cc \ hybrid/node_executor/hostcpu/ge_local_node_executor.cc \
hybrid/node_executor/controlop/control_op_executor.cc \
hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \
hybrid/node_executor/hccl/hccl_node_executor.cc \
hybrid/node_executor/node_executor.cc \ hybrid/node_executor/node_executor.cc \
hybrid/node_executor/task_context.cc \ hybrid/node_executor/task_context.cc \
hybrid/hybrid_davinci_model.cc \ hybrid/hybrid_davinci_model.cc \
@@ -348,28 +338,6 @@ LOCAL_SHARED_LIBRARIES += \


include $(BUILD_HOST_SHARED_LIBRARY) include $(BUILD_HOST_SHARED_LIBRARY)


#compiler for GeRunner
include $(CLEAR_VARS)

LOCAL_MODULE := stub/libge_runner

LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif


LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES)

LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc


LOCAL_SHARED_LIBRARIES :=

LOCAL_LDFLAGS := -lrt -ldl

include $(BUILD_HOST_SHARED_LIBRARY)


# add engine_conf.json to host # add engine_conf.json to host
include $(CLEAR_VARS) include $(CLEAR_VARS)
@@ -439,7 +407,6 @@ LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD
LOCAL_CFLAGS += -g -O0 LOCAL_CFLAGS += -g -O0


LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES)

LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES)
LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES)




+ 333
- 0
src/ge/ge_train.mk View File

@@ -0,0 +1,333 @@
LOCAL_PATH := $(call my-dir)

COMMON_LOCAL_SRC_FILES := \
proto/fusion_model.proto \
proto/optimizer_priority.proto \
session/inner_session.cc \
session/session_manager.cc \
common/ge/plugin_manager.cc\
common/fp16_t.cc \
common/formats/utils/formats_trans_utils.cc \
common/formats/format_transfers/datatype_transfer.cc \
common/formats/format_transfers/format_transfer_transpose.cc \
common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \
common/formats/format_transfers/format_transfer_fractal_z.cc \
common/formats/format_transfers/format_transfer_fractal_nz.cc \
common/formats/format_transfers/format_transfer_fractal_zz.cc \
common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \
common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \
common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \
common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \
common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \
common/formats/format_transfers/format_transfer_fracz_nchw.cc \
common/formats/format_transfers/format_transfer_fracz_nhwc.cc \
common/formats/format_transfers/format_transfer_fracz_hwcn.cc \
common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \
common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \
common/formats/formats.cc \
init/gelib.cc \
engine_manager/dnnengine_manager.cc \
opskernel_manager/ops_kernel_manager.cc \
graph/manager/graph_manager.cc \
graph/manager/graph_manager_utils.cc \
graph/manager/graph_context.cc \
graph/preprocess/graph_preprocess.cc \
graph/preprocess/multi_batch_copy_graph.cc \
graph/execute/graph_execute.cc \
graph/load/graph_loader.cc \
graph/optimize/graph_optimize.cc \
graph/passes/folding_pass.cc \
graph/optimize/summary_optimize.cc \
graph/build/graph_builder.cc \
graph/partition/engine_place.cc \
graph/partition/graph_partition.cc \
graph/partition/dynamic_shape_partition.cc \
generator/ge_generator.cc \
generator/generator_api.cc \
common/profiling/profiling_manager.cc \
ge_local_engine/engine/host_cpu_engine.cc \
common/helper/model_cache_helper.cc \

OMG_HOST_SRC_FILES := \
model/ge_model.cc \
model/ge_root_model.cc \
graph/common/transop_util.cc \
graph/manager/graph_var_manager.cc \
graph/manager/trans_var_data_utils.cc \
omm/csa_interact.cc \
graph/passes/pass_manager.cc \
graph/passes/pass_utils.cc \
graph/passes/base_pass.cc \
graph/passes/resource_pair_add_control_pass.cc \
graph/passes/resource_pair_remove_control_pass.cc \
graph/passes/constant_folding_pass.cc \
graph/passes/aicpu_constant_folding_pass.cc \
graph/passes/reshape_remove_pass.cc \
graph/passes/reshape_recovery_pass.cc \
graph/passes/transop_breadth_fusion_pass.cc \
graph/passes/transop_depth_fusion_pass.cc \
graph/passes/same_transdata_breadth_fusion_pass.cc \
graph/passes/transop_without_reshape_fusion_pass.cc \
graph/passes/compile_nodes_pass.cc \
graph/passes/transop_nearby_allreduce_fusion_pass.cc \
graph/passes/variable_prepare_op_pass.cc \
graph/passes/variable_ref_delete_op_pass.cc \
graph/passes/variable_ref_useless_control_out_delete_pass.cc \
graph/passes/variable_op_pass.cc \
graph/passes/cast_remove_pass.cc \
graph/passes/replace_transshape_pass.cc \
graph/passes/transpose_transdata_pass.cc \
graph/passes/identify_reference_pass.cc \
graph/passes/variable_format_pass.cc \
graph/passes/subgraph_pass.cc \
graph/passes/data_pass.cc \
graph/passes/net_output_pass.cc \
graph/passes/constant_fuse_same_pass.cc \
graph/passes/print_op_pass.cc \
graph/passes/no_use_reshape_remove_pass.cc \
graph/passes/iterator_op_pass.cc \
graph/passes/atomic_addr_clean_pass.cc \
graph/optimize/optimizer/allreduce_fusion_pass.cc \
graph/common/omg_util.cc \
graph/common/bcast.cc \
graph/passes/dimension_compute_pass.cc \
graph/passes/dimension_adjust_pass.cc \
graph/passes/get_original_format_pass.cc \
graph/passes/shape_operate_op_remove_pass.cc \
graph/passes/unused_op_remove_pass.cc \
graph/passes/assert_pass.cc \
graph/passes/dropout_pass.cc \
graph/passes/infershape_pass.cc \
graph/passes/unused_const_pass.cc \
graph/passes/isolated_op_remove_pass.cc \
graph/passes/permute_pass.cc \
graph/passes/ctrl_edge_transfer_pass.cc \
host_kernels/broadcast_gradient_args_kernel.cc \
host_kernels/greater_kernel.cc \
host_kernels/gather_v2_kernel.cc \
host_kernels/maximum_kernel.cc \
host_kernels/floormod_kernel.cc \
host_kernels/floordiv_kernel.cc \
host_kernels/range_kernel.cc \
host_kernels/shape_kernel.cc \
host_kernels/size_kernel.cc \
host_kernels/shape_n_kernel.cc \
host_kernels/rank_kernel.cc \
host_kernels/broadcast_args_kernel.cc \
host_kernels/fill_kernel.cc \
host_kernels/empty_kernel.cc \
host_kernels/expanddims_kernel.cc \
host_kernels/reshape_kernel.cc \
host_kernels/squeeze_kernel.cc \
host_kernels/kernel_utils.cc \
host_kernels/cast_kernel.cc \
host_kernels/transdata_kernel.cc \
host_kernels/transpose_kernel.cc \
host_kernels/permute_kernel.cc \
host_kernels/pack_kernel.cc \
host_kernels/concat_v2_kernel.cc \
host_kernels/concat_offset_kernel.cc \
host_kernels/strided_slice_kernel.cc \
host_kernels/ssd_prior_box_kernel.cc \
host_kernels/add_kernel.cc \
host_kernels/unpack_kernel.cc \
host_kernels/sub_kernel.cc \
host_kernels/mul_kernel.cc \
host_kernels/reduce_prod_kernel.cc \
host_kernels/rsqrt_kernel.cc \
host_kernels/slice_kernel.cc \
host_kernels/slice_d_kernel.cc \
host_kernels/dynamic_stitch_kernel.cc \
graph/passes/stop_gradient_pass.cc \
graph/passes/prevent_gradient_pass.cc \
graph/passes/identity_pass.cc \
graph/passes/placeholder_with_default_pass.cc \
graph/passes/snapshot_pass.cc \
graph/passes/guarantee_const_pass.cc \
graph/passes/var_is_initialized_op_pass.cc \
graph/passes/parallel_concat_start_op_pass.cc \
graph/passes/cast_translate_pass.cc \
graph/passes/addn_pass.cc \
graph/passes/common_subexpression_elimination_pass.cc \
graph/passes/transop_symmetry_elimination_pass.cc \
graph/passes/save_pass.cc \
graph/passes/switch_dead_branch_elimination.cc \
graph/passes/merge_pass.cc \
graph/passes/prune_pass.cc \
graph/passes/flow_ctrl_pass.cc \
graph/passes/control_trigger_pass.cc \
graph/passes/switch_data_edges_bypass.cc \
graph/passes/switch_op_pass.cc \
graph/passes/multi_batch_pass.cc \
graph/passes/switch_logic_remove_pass.cc \
graph/passes/next_iteration_pass.cc \
graph/passes/cond_pass.cc \
graph/passes/cond_remove_pass.cc \
graph/passes/for_pass.cc \
graph/passes/enter_pass.cc \
graph/passes/hccl_memcpy_pass.cc \
graph/passes/link_gen_mask_nodes_pass.cc \
graph/passes/replace_with_empty_const_pass.cc \
graph/passes/hccl_group_pass.cc \

OME_SRC_FILES := \
graph/manager/graph_mem_allocator.cc \
graph/manager/graph_caching_allocator.cc \
graph/manager/model_manager/event_manager.cc \
graph/manager/util/debug.cc \
graph/manager/util/rt_context_util.cc \
graph/manager/util/variable_accelerate_ctrl.cc \
graph/manager/util/hcom_util.cc \
graph/load/new_model_manager/model_manager.cc \
graph/load/new_model_manager/data_inputer.cc \
graph/load/new_model_manager/davinci_model.cc \
graph/load/new_model_manager/davinci_model_parser.cc \
graph/load/new_model_manager/model_utils.cc \
graph/load/new_model_manager/tbe_handle_store.cc \
graph/load/new_model_manager/cpu_queue_schedule.cc \
graph/load/new_model_manager/zero_copy_task.cc \
graph/load/output/output.cc \
graph/load/new_model_manager/data_dumper.cc \
graph/load/new_model_manager/task_info/task_info.cc \
graph/load/new_model_manager/task_info/event_record_task_info.cc \
graph/load/new_model_manager/task_info/event_wait_task_info.cc \
graph/load/new_model_manager/task_info/fusion_start_task_info.cc \
graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \
graph/load/new_model_manager/task_info/hccl_task_info.cc \
graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \
graph/load/new_model_manager/task_info/kernel_task_info.cc \
graph/load/new_model_manager/task_info/label_set_task_info.cc \
graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \
graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \
graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \
graph/load/new_model_manager/task_info/stream_active_task_info.cc \
graph/load/new_model_manager/task_info/stream_switch_task_info.cc \
graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \
graph/load/new_model_manager/task_info/end_graph_task_info.cc \
graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \
graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \
single_op/task/op_task.cc \
single_op/task/build_task_utils.cc \
single_op/task/tbe_task_builder.cc \
single_op/task/aicpu_task_builder.cc \
single_op/single_op.cc \
single_op/single_op_model.cc \
single_op/stream_resource.cc \
single_op/single_op_manager.cc \
hybrid/hybrid_davinci_model_stub.cc \


COMMON_LOCAL_C_INCLUDES := \
proto/om.proto \
proto/task.proto \
proto/insert_op.proto \
proto/ge_ir.proto \
proto/fwk_adapter.proto \
proto/op_mapping_info.proto \
proto/tensorflow/attr_value.proto \
proto/tensorflow/function.proto \
proto/tensorflow/graph.proto \
proto/tensorflow/node_def.proto \
proto/tensorflow/op_def.proto \
proto/tensorflow/resource_handle.proto \
proto/tensorflow/tensor.proto \
proto/tensorflow/tensor_shape.proto \
proto/tensorflow/types.proto \
proto/tensorflow/versions.proto \
$(LOCAL_PATH) ./ \
$(TOPDIR)inc \
$(TOPDIR)inc/external \
$(TOPDIR)inc/external/graph \
$(TOPDIR)inc/framework \
$(TOPDIR)inc/framework/common \
$(TOPDIR)inc/runtime \
$(TOPDIR)libc_sec/include \
$(TOPDIR)ops/built-in/op_proto/inc \
third_party/json/include \
third_party/protobuf/include \
third_party/opencv/include \

NEW_OMG_HOST_SRC_FILES := \
graph/preprocess/insert_op/util_insert_aipp_op.cc \
graph/preprocess/insert_op/ge_aipp_op.cc \
graph/build/model_builder.cc \
graph/build/task_generator.cc \
graph/build/stream_allocator.cc \
graph/build/logical_stream_allocator.cc \
graph/build/stream_graph_optimizer.cc \
graph/build/run_context.cc \
graph/build/label_allocator.cc \
graph/label/label_maker.cc \
graph/label/if_label_maker.cc \
graph/label/case_label_maker.cc \
graph/label/while_label_maker.cc \
graph/label/partitioned_call_label_maker.cc \



#compiler for host train
include $(CLEAR_VARS)

LOCAL_MODULE := libge_train

LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
LOCAL_CFLAGS += -DDAVINCI_CLOUD -DDAVINCI_TRAIN -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING
LOCAL_CFLAGS += -DFMK_SUPPORT_DEBUG
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif

LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)

LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)
LOCAL_SRC_FILES += $(OMG_HOST_SRC_FILES)
LOCAL_SRC_FILES += $(OME_SRC_FILES)
LOCAL_SRC_FILES += $(NEW_OMG_HOST_SRC_FILES)

LOCAL_STATIC_LIBRARIES := libge_memory \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libslog \
libmmpa \
libgraph \
libregister \
libge_common \
libhccl \
libmsprof \


LOCAL_LDFLAGS := -lrt -ldl

LOCAL_SHARED_LIBRARIES += \
libruntime \
libresource \

include $(BUILD_HOST_SHARED_LIBRARY)

# add engine_conf.json to host
include $(CLEAR_VARS)

LOCAL_MODULE := engine_conf.json

LOCAL_SRC_FILES := engine_manager/engine_conf.json

LOCAL_MODULE_CLASS := ETC

LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/engine_conf.json
include $(BUILD_HOST_PREBUILT)

# add optimizer_priority.pbtxt to host
include $(CLEAR_VARS)

LOCAL_MODULE := optimizer_priority.pbtxt

LOCAL_SRC_FILES := opskernel_manager/optimizer_priority.pbtxt

LOCAL_MODULE_CLASS := ETC

LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/optimizer_priority.pbtxt
include $(BUILD_HOST_PREBUILT)

+ 1
- 134
src/ge/generator/ge_generator.cc View File

@@ -207,13 +207,6 @@ class GeGenerator::Impl {
GraphManager graph_manager_; GraphManager graph_manager_;
SaveParam save_param_; SaveParam save_param_;
bool is_offline_ = true; bool is_offline_ = true;

private:
static std::string Trim(const std::string &str);
bool ParseVersion(const std::string &line, std::string &version);
bool GetVersionFromPath(const std::string &file_path, std::string &version);
bool SetAtcVersionInfo(AttrHolder &obj);
bool SetOppVersionInfo(AttrHolder &obj);
}; };


Status GeGenerator::Initialize(const map<string, string> &options) { Status GeGenerator::Initialize(const map<string, string> &options) {
@@ -295,124 +288,6 @@ Status GeGenerator::GenerateInfershapeGraph(const Graph &graph) {
return SUCCESS; return SUCCESS;
} }


// Remove the space and tab before and after the string
std::string GeGenerator::Impl::Trim(const std::string &str) {
if (str.empty()) {
return str;
}

std::string::size_type start = str.find_first_not_of(" \t\r\n");
if (start == std::string::npos) {
return str;
}

std::string::size_type end = str.find_last_not_of(" \t\r\n") + 1;
return str.substr(start, end);
}

// Parsing the command line
bool GeGenerator::Impl::ParseVersion(const std::string &line, std::string &version) {
std::string flag = "Version=";
std::string temp = Trim(line);

if (temp.empty()) {
GELOGW("line is empty.");
return false;
}

std::string::size_type pos = temp.find(flag);
if (pos == std::string::npos) {
GELOGW("Incorrect line [%s], it must include [%s].", line.c_str(), flag.c_str());
return false;
}

if (temp.size() == flag.size()) {
GELOGW("version information is empty. %s", line.c_str());
return false;
}

version = temp.substr(pos + flag.size());
GELOGI("Version=%s", version.c_str());

return true;
}

bool GeGenerator::Impl::GetVersionFromPath(const std::string &file_path, std::string &version) {
// Normalize the path
string resolved_file_path = RealPath(file_path.c_str());
if (resolved_file_path.empty()) {
GELOGW("Invalid input file path [%s], make sure that the file path is correct.", file_path.c_str());
return false;
}
std::ifstream fs(resolved_file_path, std::ifstream::in);
if (!fs.is_open()) {
GELOGW("Open %s failed.", file_path.c_str());
return false;
}

std::string line;
if (getline(fs, line)) {
if (!ParseVersion(line, version)) {
GELOGW("Parse version failed. content is [%s].", line.c_str());
fs.close();
return false;
}
} else {
GELOGW("No version information found in the file path:%s", file_path.c_str());
fs.close();
return false;
}

fs.close(); // close the file
return true;
}

// Set package version information in the model
bool GeGenerator::Impl::SetAtcVersionInfo(AttrHolder &obj) {
std::string path_base = ge::GELib::GetPath();
path_base = path_base.substr(0, path_base.rfind('/'));
path_base = path_base.substr(0, path_base.rfind('/') + 1);

std::string version_path = path_base + "version.info";
GELOGI("version_path is %s", version_path.c_str());
std::string version;
if (!GetVersionFromPath(version_path, version)) {
GELOGW("Get atc version information failed!");
return false;
}
// set version info
if (!ge::AttrUtils::SetStr(obj, ATTR_MODEL_ATC_VERSION, version)) {
GELOGW("Ge model set atc version failed!");
return false;
}
GELOGI("Ge model set atc version information success.");
return true;
}

// Set package version information in the model
bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) {
const char *path_env = std::getenv("ASCEND_OPP_PATH");
if (path_env == nullptr) {
GELOGW("Get environment variable ASCEND_OPP_PATH failed!");
return false;
}
std::string version_path = path_env;
version_path += "/version.info";
GELOGI("version_path is %s", version_path.c_str());
std::string version;
if (!GetVersionFromPath(version_path, version)) {
GELOGW("Get opp version information failed!");
return false;
}
// set version info
if (!ge::AttrUtils::SetStr(obj, ATTR_MODEL_OPP_VERSION, version)) {
GELOGW("Ge model set opp version failed!");
return false;
}
GELOGI("Ge Model set opp version information success.");
return true;
}

Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs, Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs,
ModelBufferData &model, bool is_offline) { ModelBufferData &model, bool is_offline) {
rtContext_t ctx = nullptr; rtContext_t ctx = nullptr;
@@ -440,7 +315,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
string model_name = ""; string model_name = "";
Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), model_name); Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), model_name);
if (name_ret != SUCCESS) { if (name_ret != SUCCESS) {
ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"});
GELOGE(FAILED, "Get model_name failed. Param --output is invalid"); GELOGE(FAILED, "Get model_name failed. Param --output is invalid");
return PARAM_INVALID; return PARAM_INVALID;
} }
@@ -590,14 +464,6 @@ Status GeGenerator::Impl::SaveParams(GeModelPtr &ge_model, const string &type, c
} }


Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr &model, ModelBufferData &model_buff) { Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr &model, ModelBufferData &model_buff) {
// set atc version
if (!SetAtcVersionInfo(*(model.get()))) {
GELOGW("SetPackageVersionInfo of atc failed!");
}
// set opp version
if (!SetOppVersionInfo(*(model.get()))) {
GELOGW("SetPackageVersionInfo of ops failed!");
}
ModelHelper model_helper; ModelHelper model_helper;
model_helper.SetSaveMode(is_offline_); model_helper.SetSaveMode(is_offline_);
Status ret = model_helper.SaveToOmModel(model, save_param_, file_name_prefix, model_buff); Status ret = model_helper.SaveToOmModel(model, save_param_, file_name_prefix, model_buff);
@@ -660,4 +526,5 @@ Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph, GraphId &g


return SUCCESS; return SUCCESS;
} }

} // namespace ge } // namespace ge

+ 0
- 48
src/ge/graph/common/ge_call_wrapper.h View File

@@ -18,41 +18,6 @@
#define GE_GE_CALL_WRAPPER_H_ #define GE_GE_CALL_WRAPPER_H_
#include "framework/common/debug/ge_log.h" #include "framework/common/debug/ge_log.h"


#define GE_TIMESTAMP_START(stage) uint64_t startUsec_##stage = ge::GetCurrentTimestap()

#define GE_TIMESTAMP_END(stage, stage_name) \
do { \
uint64_t endUsec_##stage = ge::GetCurrentTimestap(); \
GELOGI("[GEPERFTRACE] The time cost of %s is [%lu] micro second.", (stage_name), \
(endUsec_##stage - startUsec_##stage)); \
} while (0);

#define GE_TIMESTAMP_EVENT_END(stage, stage_name) \
do { \
uint64_t endUsec_##stage = ge::GetCurrentTimestap(); \
GEEVENT("[GEPERFTRACE] The time cost of %s is [%lu] micro second.", (stage_name), \
(endUsec_##stage - startUsec_##stage)); \
} while (0);

#define GE_TIMESTAMP_CALLNUM_START(stage) \
uint64_t startUsec_##stage = ge::GetCurrentTimestap(); \
uint64_t call_num_of##stage = 0; \
uint64_t time_of##stage = 0

#define GE_TIMESTAMP_RESTART(stage) (startUsec_##stage = ge::GetCurrentTimestap())

#define GE_TIMESTAMP_ADD(stage) \
time_of##stage += ge::GetCurrentTimestap() - startUsec_##stage; \
call_num_of##stage++

#define GE_TIMESTAMP_CALLNUM_END(stage, stage_name) \
GELOGI("[GEPERFTRACE] The time cost of %s is [%lu] micro second, call num is %lu", (stage_name), time_of##stage, \
call_num_of##stage)

#define GE_TIMESTAMP_CALLNUM_EVENT_END(stage, stage_name) \
GEEVENT("[GEPERFTRACE] The time cost of %s is [%lu] micro second, call num is %lu", (stage_name), time_of##stage, \
call_num_of##stage)

#define RUN_WITH_TIMESTAMP_NAME(var_name, prefix, func, ...) \ #define RUN_WITH_TIMESTAMP_NAME(var_name, prefix, func, ...) \
do { \ do { \
GE_TIMESTAMP_START(var_name); \ GE_TIMESTAMP_START(var_name); \
@@ -64,23 +29,10 @@
} \ } \
} while (0) } while (0)


#define RUN_WITH_PERF_TIMESTAMP_NAME(var_name, prefix, func, ...) \
do { \
GE_TIMESTAMP_START(var_name); \
auto ret_inner_macro = func(__VA_ARGS__); \
GE_TIMESTAMP_EVENT_END(var_name, #prefix "::" #func) \
if (ret_inner_macro != ge::SUCCESS) { \
GELOGE(ret_inner_macro, "Failed to process " #prefix "_" #func); \
return ret_inner_macro; \
} \
} while (0)

#define JOIN_NAME_INNER(a, b) a##b #define JOIN_NAME_INNER(a, b) a##b
#define JOIN_NAME(a, b) JOIN_NAME_INNER(a, b) #define JOIN_NAME(a, b) JOIN_NAME_INNER(a, b)
#define COUNTER_NAME(a) JOIN_NAME(a, __COUNTER__) #define COUNTER_NAME(a) JOIN_NAME(a, __COUNTER__)
#define GE_RUN(prefix, func, ...) \ #define GE_RUN(prefix, func, ...) \
RUN_WITH_TIMESTAMP_NAME(COUNTER_NAME(ge_timestamp_##prefix), prefix, func, __VA_ARGS__) RUN_WITH_TIMESTAMP_NAME(COUNTER_NAME(ge_timestamp_##prefix), prefix, func, __VA_ARGS__)
#define GE_RUN_PERF(prefix, func, ...) \
RUN_WITH_PERF_TIMESTAMP_NAME(COUNTER_NAME(ge_timestamp_##prefix), prefix, func, __VA_ARGS__)


#endif // GE_GE_CALL_WRAPPER_H_ #endif // GE_GE_CALL_WRAPPER_H_

+ 4
- 10
src/ge/graph/execute/graph_execute.cc View File

@@ -120,7 +120,7 @@ Status GraphExecutor::FreeInOutBuffer() {
} }
} }


Status GraphExecutor::MallocInOutBuffer(const std::vector<uint64_t> &buffer_size, std::vector<void *> &data_addr) {
Status GraphExecutor::MallocInOutBuffer(const std::vector<uint32_t> &buffer_size, std::vector<void *> &data_addr) {
if (malloc_flag_) { if (malloc_flag_) {
auto all_size_same = true; auto all_size_same = true;
if (buffer_size.size() == buffer_size_.size()) { if (buffer_size.size() == buffer_size_.size()) {
@@ -169,7 +169,7 @@ Status GraphExecutor::PrepareInputData(const std::vector<GeTensor> &input_tensor
graph_input_data.timestamp = 0; graph_input_data.timestamp = 0;
std::size_t inputSize = input_tensor.size(); std::size_t inputSize = input_tensor.size();
std::size_t output_size = output_desc.size(); std::size_t output_size = output_desc.size();
std::vector<uint64_t> bufferSizeVec;
std::vector<uint32_t> bufferSizeVec;
std::vector<void *> addrVec; std::vector<void *> addrVec;


for (std::size_t i = 0; i < inputSize; ++i) { for (std::size_t i = 0; i < inputSize; ++i) {
@@ -211,7 +211,7 @@ Status GraphExecutor::PrepareInputData(const std::vector<GeTensor> &input_tensor


for (std::size_t j = 0; j < output_size; j++) { for (std::size_t j = 0; j < output_size; j++) {
auto desc = output_desc[j]; auto desc = output_desc[j];
uint64_t buffer_size = desc.size;
uint32_t buffer_size = desc.size;


DataBuffer out_data_buf; DataBuffer out_data_buf;
out_data_buf.data = reinterpret_cast<uint8_t *>(addrVec[inputSize + j]); out_data_buf.data = reinterpret_cast<uint8_t *>(addrVec[inputSize + j]);
@@ -225,13 +225,6 @@ Status GraphExecutor::PrepareInputData(const std::vector<GeTensor> &input_tensor


Status GraphExecutor::SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &input_tensor, Status GraphExecutor::SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &input_tensor,
std::vector<GeTensor> &output_tensor) { std::vector<GeTensor> &output_tensor) {
auto model_manager = ge::ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
if (model_manager->IsDynamicShape(model_id)) {
GELOGI("[ExecuteGraph] GetInputOutputDescInfo via dynamic shape model executor, modelId=%u", model_id);
return model_manager->SyncExecuteModel(model_id, input_tensor, output_tensor);
}

// Prepare input and output // Prepare input and output
std::vector<InputOutputDescInfo> inputs_desc; std::vector<InputOutputDescInfo> inputs_desc;
std::vector<InputOutputDescInfo> output_desc; std::vector<InputOutputDescInfo> output_desc;
@@ -582,4 +575,5 @@ Status GraphExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t inde


return SUCCESS; return SUCCESS;
} }

} // namespace ge } // namespace ge

+ 2
- 2
src/ge/graph/execute/graph_execute.h View File

@@ -110,7 +110,7 @@ class GraphExecutor {


Status FreeInOutBuffer(); Status FreeInOutBuffer();


Status MallocInOutBuffer(const std::vector<uint64_t> &buffer_size, std::vector<void *> &data_addr);
Status MallocInOutBuffer(const std::vector<uint32_t> &buffer_size, std::vector<void *> &data_addr);


bool init_flag_; bool init_flag_;


@@ -129,7 +129,7 @@ class GraphExecutor {


bool malloc_flag_; bool malloc_flag_;
std::vector<void *> buffer_addr_; std::vector<void *> buffer_addr_;
std::vector<uint64_t> buffer_size_;
std::vector<uint32_t> buffer_size_;
}; };
} // namespace ge } // namespace ge




+ 1
- 2
src/ge/graph/load/graph_loader.cc View File

@@ -350,8 +350,7 @@ Status GraphLoader::GetMemoryInfo(int64_t &free) {
return RT_FAILED; return RT_FAILED;
} }
// Add small page memory size // Add small page memory size
free =
static_cast<int64_t>(free_mem + VarManager::Instance(GetContext().SessionId())->GetUseMaxMemorySize() - total_mem);
free = static_cast<int64_t>(free_mem + VarManager::Instance(0)->GetUseMaxMemorySize() - total_mem);
GELOGI("GetMemoryInfo free[%zu], total[%zu], return free[%ld]", free_mem, total_mem, free); GELOGI("GetMemoryInfo free[%zu], total[%zu], return free[%ld]", free_mem, total_mem, free);
return SUCCESS; return SUCCESS;
} }


+ 1
- 1
src/ge/graph/load/new_model_manager/cpu_queue_schedule.cc View File

@@ -339,7 +339,7 @@ Status CpuTaskActiveEntry::Distribute() {
return RT_FAILED; return RT_FAILED;
} }


GELOGI("Cpu kernel launch active entry task success.");
GELOGI("Cpu kernel launch wait end task success.");
return SUCCESS; return SUCCESS;
} }




+ 86
- 280
src/ge/graph/load/new_model_manager/data_dumper.cc View File

@@ -21,6 +21,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>


#include "common/debug/log.h"
#include "common/properties_manager.h" #include "common/properties_manager.h"
#include "framework/common/debug/ge_log.h" #include "framework/common/debug/ge_log.h"
#include "framework/common/util.h" #include "framework/common/util.h"
@@ -36,36 +37,9 @@
namespace { namespace {
const uint32_t kAicpuLoadFlag = 1; const uint32_t kAicpuLoadFlag = 1;
const uint32_t kAicpuUnloadFlag = 0; const uint32_t kAicpuUnloadFlag = 0;
const int64_t kOpDebugSize = 2048;
const int64_t kOpDebugShape = 2048;
const int8_t kDecimal = 10;
const uint32_t kAddrLen = sizeof(void *);
const char *const kDumpOutput = "output"; const char *const kDumpOutput = "output";
const char *const kDumpInput = "input"; const char *const kDumpInput = "input";
const char *const kDumpAll = "all"; const char *const kDumpAll = "all";

// parse for format like nodename:input:index
static bool ParseNameIndex(const std::string &node_name_index, std::string &node_name, std::string &input_or_output,
size_t &index) {
auto sep = node_name_index.rfind(':');
if (sep == std::string::npos) {
return false;
}
auto index_str = node_name_index.substr(sep + 1);
index = static_cast<size_t>(std::strtol(index_str.c_str(), nullptr, kDecimal));
auto node_name_without_index = node_name_index.substr(0, sep);
sep = node_name_without_index.rfind(':');
if (sep == std::string::npos) {
return false;
}
node_name = node_name_without_index.substr(0, sep);
input_or_output = node_name_without_index.substr(sep + 1);
return !(input_or_output != kDumpInput && input_or_output != kDumpOutput);
}

static bool IsTensorDescWithSkipDumpAddrType(bool has_mem_type_attr, vector<int64_t> v_memory_type, size_t i) {
return has_mem_type_attr && (v_memory_type[i] == RT_MEMORY_L1);
}
} // namespace } // namespace


static int32_t GetIrDataType(ge::DataType data_type) { static int32_t GetIrDataType(ge::DataType data_type) {
@@ -164,13 +138,6 @@ void DataDumper::SaveEndGraphId(uint32_t task_id, uint32_t stream_id) {
end_graph_stream_id_ = stream_id; end_graph_stream_id_ = stream_id;
} }


void DataDumper::SaveOpDebugId(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, bool is_op_debug) {
op_debug_task_id_ = task_id;
op_debug_stream_id_ = stream_id;
op_debug_addr_ = op_debug_addr;
is_op_debug_ = is_op_debug;
}

void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc,
uintptr_t args) { uintptr_t args) {
if (op_desc == nullptr) { if (op_desc == nullptr) {
@@ -235,121 +202,56 @@ static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uin
} }
} }


Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
const uintptr_t &addr, size_t index) {
output.set_data_type(static_cast<int32_t>(GetIrDataType(tensor_descs.at(index).GetDataType())));
output.set_format(static_cast<int32_t>(tensor_descs.at(index).GetFormat()));
Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
GELOGI("Start dump output");
if (inner_dump_info.is_task) {
// tbe or aicpu op
const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc();
const auto input_size = inner_dump_info.op->GetAllInputsDesc().size();
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op, false);
if (output_descs.size() != output_addrs.size()) {
GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(),
inner_dump_info.op->GetName().c_str(), output_descs.size());
return PARAM_INVALID;
}


for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
output.mutable_shape()->add_dim(dim);
}
int64_t output_size = 0;
if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get output size filed");
return PARAM_INVALID;
}
GELOGD("Get output size in dump is %ld", output_size);
std::string origin_name;
int32_t origin_output_index = -1;
(void)AttrUtils::GetStr(&tensor_descs.at(index), ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
(void)AttrUtils::GetInt(&tensor_descs.at(index), ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
output.set_size(output_size);
output.set_original_name(origin_name);
output.set_original_output_index(origin_output_index);
output.set_original_output_format(static_cast<int32_t>(tensor_descs.at(index).GetOriginFormat()));
output.set_original_output_data_type(static_cast<int32_t>(tensor_descs.at(index).GetOriginDataType()));
output.set_address(static_cast<uint64_t>(addr));
return SUCCESS;
}
for (size_t i = 0; i < output_descs.size(); ++i) {
aicpu::dump::Output output;
output.set_data_type(static_cast<int32_t>(GetIrDataType(output_descs.at(i).GetDataType())));
output.set_format(static_cast<int32_t>(output_descs.at(i).GetFormat()));


Status DataDumper::DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Output &output,
size_t i, const std::string &node_name_index) {
std::string dump_op_name;
std::string input_or_output;
size_t index;
// parser and find which node's input or output tensor desc is chosen for dump info
if (!ParseNameIndex(node_name_index, dump_op_name, input_or_output, index)) {
GELOGE(PARAM_INVALID, "Op [%s] output desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s].",
inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str());
return PARAM_INVALID;
}
GE_CHECK_NOTNULL(compute_graph_);
auto replace_node = compute_graph_->FindNode(dump_op_name);
GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(replace_node == nullptr,
"Op [%s] output desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s],"
" cannot find redirect node[%s].",
inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str(),
dump_op_name.c_str());
auto replace_opdesc = replace_node->GetOpDesc();
GE_CHECK_NOTNULL(replace_opdesc);
auto iter = ref_info_.find(replace_opdesc);
GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(iter == ref_info_.end(),
"Op [%s] output desc[%zu] cannot find any saved redirect node[%s]'s info.",
inner_dump_info.op->GetName().c_str(), i, replace_opdesc->GetName().c_str());
GE_CHECK_NOTNULL(iter->second);
auto addr = reinterpret_cast<uintptr_t>(iter->second);
if (input_or_output == kDumpInput) {
const auto &replace_input_descs = replace_opdesc->GetAllInputsDesc();
addr += kAddrLen * index;
GE_CHK_STATUS_RET(GenerateOutput(output, replace_input_descs, addr, index), "Generate output failed");
} else if (input_or_output == kDumpOutput) {
const auto &replace_output_descs = replace_opdesc->GetAllOutputsDesc();
const auto replace_input_size = replace_opdesc->GetAllInputsDesc().size();
addr += (index + replace_input_size) * kAddrLen;
GE_CHK_STATUS_RET(GenerateOutput(output, replace_output_descs, addr, index), "Generate output failed");
}
GELOGD("Op [%s] output desc[%zu] dump info is replaced by node[%s] [%s] tensor_desc [%zu]",
inner_dump_info.op->GetName().c_str(), i, dump_op_name.c_str(), input_or_output.c_str(), index);
return SUCCESS;
}
for (auto dim : output_descs.at(i).GetShape().GetDims()) {
output.mutable_shape()->add_dim(dim);
}


Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc();
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op);
if (output_descs.size() != output_addrs.size()) {
GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(),
inner_dump_info.op->GetName().c_str(), output_descs.size());
return PARAM_INVALID;
}
std::vector<int64_t> v_memory_type;
bool has_mem_type_attr = ge::AttrUtils::GetListInt(inner_dump_info.op, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, v_memory_type);
GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(has_mem_type_attr && (v_memory_type.size() != output_descs.size()),
"DumpOutputWithTask[%s], output size[%zu], output memory type size[%zu]",
inner_dump_info.op->GetName().c_str(), output_descs.size(),
v_memory_type.size());

for (size_t i = 0; i < output_descs.size(); ++i) {
aicpu::dump::Output output;
std::string node_name_index;
const auto &output_desc = output_descs.at(i);
// check dump output tensor desc is redirected by attr ATTR_DATA_DUMP_REF
if (AttrUtils::GetStr(&output_desc, ATTR_DATA_DUMP_REF, node_name_index)) {
GE_CHK_STATUS_RET(DumpRefOutput(inner_dump_info, output, i, node_name_index), "DumpRefOutput failed");
} else {
GE_IF_BOOL_EXEC(
IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i),
GELOGD("DumpOutputWithTask[%s] output[%zu] is l1 addr, skip it", inner_dump_info.op->GetName().c_str(), i);
continue;);

const auto input_size = inner_dump_info.op->GetInputsSize();
auto addr = inner_dump_info.args + (i + input_size) * kAddrLen;
GE_CHK_STATUS_RET(GenerateOutput(output, output_descs, addr, i), "Generate output failed");
int64_t output_size = 0;
if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get output size filed");
return PARAM_INVALID;
}
GELOGI("Get output size in dump is %ld", output_size);
std::string origin_name;
int32_t origin_output_index = -1;
(void)AttrUtils::GetStr(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
(void)AttrUtils::GetInt(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
GE_IF_BOOL_EXEC(output_size <= 0, GELOGE(PARAM_INVALID, "Output size %ld is less than zero", output_size);
return PARAM_INVALID)
output.set_size(output_size);
output.set_original_name(origin_name);
output.set_original_output_index(origin_output_index);
output.set_original_output_format(static_cast<int32_t>(output_descs.at(i).GetOriginFormat()));
output.set_original_output_data_type(static_cast<int32_t>(output_descs.at(i).GetOriginDataType()));
output.set_address(static_cast<uint64_t>(inner_dump_info.args + (i + input_size) * sizeof(void *)));

task.mutable_output()->Add(std::move(output));
} }
task.mutable_output()->Add(std::move(output));
return SUCCESS;
} }
return SUCCESS;
}


Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
GELOGI("Start dump output");
if (inner_dump_info.is_task) {
// tbe or aicpu op, these ops are with task
return DumpOutputWithTask(inner_dump_info, task);
}
// else data, const or variable op // else data, const or variable op
aicpu::dump::Output output; aicpu::dump::Output output;
auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index); auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index);
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op);
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op, false);
if (output_tensor == nullptr) { if (output_tensor == nullptr) {
GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index, GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index,
inner_dump_info.op->GetOutputsSize()); inner_dump_info.op->GetOutputsSize());
@@ -367,6 +269,9 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump:
int32_t origin_output_index = -1; int32_t origin_output_index = -1;
(void)AttrUtils::GetStr(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name); (void)AttrUtils::GetStr(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
(void)AttrUtils::GetInt(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index); (void)AttrUtils::GetInt(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
GE_IF_BOOL_EXEC(inner_dump_info.data_size <= 0,
GELOGE(PARAM_INVALID, "The size of data %ld is less than zero", inner_dump_info.data_size);
return PARAM_INVALID)
output.set_size(inner_dump_info.data_size); output.set_size(inner_dump_info.data_size);
output.set_original_name(origin_name); output.set_original_name(origin_name);
output.set_original_output_index(origin_output_index); output.set_original_output_index(origin_output_index);
@@ -377,7 +282,7 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump:
GELOGE(FAILED, "Index is out of range."); GELOGE(FAILED, "Index is out of range.");
return FAILED; return FAILED;
} }
auto data_addr = inner_dump_info.args + kAddrLen * static_cast<uint32_t>(inner_dump_info.input_anchor_index);
auto data_addr = inner_dump_info.args + sizeof(void *) * static_cast<uint32_t>(inner_dump_info.input_anchor_index);
output.set_address(static_cast<uint64_t>(data_addr)); output.set_address(static_cast<uint64_t>(data_addr));


task.mutable_output()->Add(std::move(output)); task.mutable_output()->Add(std::move(output));
@@ -385,98 +290,37 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump:
return SUCCESS; return SUCCESS;
} }


Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
const uintptr_t &addr, size_t index) {
input.set_data_type(static_cast<int32_t>(GetIrDataType(tensor_descs.at(index).GetDataType())));
input.set_format(static_cast<int32_t>(tensor_descs.at(index).GetFormat()));

for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
input.mutable_shape()->add_dim(dim);
}
int64_t input_size = 0;
if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
GELOGI("Get aipp input size according to attr is %ld", input_size);
} else if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), input_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get input size filed");
return PARAM_INVALID;
}
GELOGD("Get input size in dump is %ld", input_size);
input.set_size(input_size);
input.set_address(static_cast<uint64_t>(addr));
return SUCCESS;
}

Status DataDumper::DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i,
const std::string &node_name_index) {
std::string dump_op_name;
std::string input_or_output;
size_t index;
// parser and find which node's input or output tensor desc is chosen for dump info
if (!ParseNameIndex(node_name_index, dump_op_name, input_or_output, index)) {
GELOGE(PARAM_INVALID, "Op [%s] input desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s].",
inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str());
return PARAM_INVALID;
}
GE_CHECK_NOTNULL(compute_graph_);
auto replace_node = compute_graph_->FindNode(dump_op_name);
GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(replace_node == nullptr,
"Op [%s] input desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s],"
" cannot find redirect node[%s].",
inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str(),
dump_op_name.c_str());
auto replace_opdesc = replace_node->GetOpDesc();
GE_CHECK_NOTNULL(replace_opdesc);
auto iter = ref_info_.find(replace_opdesc);
GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(iter == ref_info_.end(),
"Op [%s] input desc[%zu] cannot find any saved redirect node[%s]'s info.",
inner_dump_info.op->GetName().c_str(), i, replace_opdesc->GetName().c_str());
GE_CHECK_NOTNULL(iter->second);
auto addr = reinterpret_cast<uintptr_t>(iter->second);
if (input_or_output == kDumpInput) {
const auto &replace_input_descs = replace_opdesc->GetAllInputsDesc();
addr += kAddrLen * index;
GE_CHK_STATUS_RET(GenerateInput(input, replace_input_descs, addr, index), "Generate input failed");
} else if (input_or_output == kDumpOutput) {
const auto &replace_output_descs = replace_opdesc->GetAllOutputsDesc();
const auto replace_input_size = replace_opdesc->GetAllInputsDesc().size();
addr += (index + replace_input_size) * kAddrLen;
GE_CHK_STATUS_RET(GenerateInput(input, replace_output_descs, addr, index), "Generate input failed");
}
GELOGD("Op [%s] input desc[%zu] dump info is replaced by node[%s] [%s] tensor_desc [%zu]",
inner_dump_info.op->GetName().c_str(), i, dump_op_name.c_str(), input_or_output.c_str(), index);
return SUCCESS;
}

Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
GELOGI("Start dump input"); GELOGI("Start dump input");
const auto &input_descs = inner_dump_info.op->GetAllInputsDesc(); const auto &input_descs = inner_dump_info.op->GetAllInputsDesc();
const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, inner_dump_info.op);
const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, inner_dump_info.op, false);
if (input_descs.size() != input_addrs.size()) { if (input_descs.size() != input_addrs.size()) {
GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(), GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(),
inner_dump_info.op->GetName().c_str(), input_descs.size()); inner_dump_info.op->GetName().c_str(), input_descs.size());
return PARAM_INVALID; return PARAM_INVALID;
} }
std::vector<int64_t> v_memory_type;
bool has_mem_type_attr = ge::AttrUtils::GetListInt(inner_dump_info.op, ATTR_NAME_INPUT_MEM_TYPE_LIST, v_memory_type);
GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(has_mem_type_attr && (v_memory_type.size() != input_descs.size()),
"DumpInput[%s], input size[%zu], input memory type size[%zu]",
inner_dump_info.op->GetName().c_str(), input_descs.size(), v_memory_type.size());


for (size_t i = 0; i < input_descs.size(); ++i) { for (size_t i = 0; i < input_descs.size(); ++i) {
aicpu::dump::Input input; aicpu::dump::Input input;
std::string node_name_index;
// check dump input tensor desc is redirected by attr ATTR_DATA_DUMP_REF
if (AttrUtils::GetStr(&input_descs.at(i), ATTR_DATA_DUMP_REF, node_name_index)) {
GE_CHK_STATUS_RET(DumpRefInput(inner_dump_info, input, i, node_name_index), "DumpRefInput failed");
// normal dump without attr
} else {
GE_IF_BOOL_EXEC(IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i),
GELOGD("DumpInput[%s] input[%zu] is l1 addr, skip it", inner_dump_info.op->GetName().c_str(), i);
continue;);

auto addr = inner_dump_info.args + kAddrLen * i;
GE_CHK_STATUS_RET(GenerateInput(input, input_descs, addr, i), "Generate input failed");
input.set_data_type(static_cast<int32_t>(GetIrDataType(input_descs.at(i).GetDataType())));
input.set_format(static_cast<int32_t>(input_descs.at(i).GetFormat()));

for (auto dim : input_descs.at(i).GetShape().GetDims()) {
input.mutable_shape()->add_dim(dim);
} }

int64_t input_size = 0;
if (AttrUtils::GetInt(&input_descs.at(i), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
GELOGI("Get aipp input size according to attr is %ld", input_size);
} else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get input size filed");
return PARAM_INVALID;
}
GELOGI("Get input size in dump is %ld", input_size);
GE_IF_BOOL_EXEC(input_size <= 0, GELOGE(PARAM_INVALID, "Input size %ld is less than zero", input_size);
return PARAM_INVALID;)
input.set_size(input_size);
input.set_address(static_cast<uint64_t>(inner_dump_info.args + sizeof(void *) * i));
task.mutable_input()->Add(std::move(input)); task.mutable_input()->Add(std::move(input));
} }
return SUCCESS; return SUCCESS;
@@ -556,38 +400,36 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_
GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size); GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size);
return SUCCESS; return SUCCESS;
} }

Status DataDumper::LoadDumpInfo() { Status DataDumper::LoadDumpInfo() {
std::string dump_list_key; std::string dump_list_key;
PrintCheckLog(dump_list_key); PrintCheckLog(dump_list_key);


if (op_list_.empty()) { if (op_list_.empty()) {
GELOGW("op_list_ is empty");
return SUCCESS;
} }


aicpu::dump::OpMappingInfo op_mapping_info; aicpu::dump::OpMappingInfo op_mapping_info;


auto dump_path = dump_properties_.GetDumpPath() + std::to_string(device_id_) + "/";
op_mapping_info.set_dump_path(dump_path);
auto dump_path = PropertiesManager::Instance().GetDumpOutputPath();
op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + std::to_string(device_id_) + "/");
op_mapping_info.set_model_name(dump_list_key); op_mapping_info.set_model_name(dump_list_key);
op_mapping_info.set_model_id(model_id_); op_mapping_info.set_model_id(model_id_);
op_mapping_info.set_flag(kAicpuLoadFlag); op_mapping_info.set_flag(kAicpuLoadFlag);
op_mapping_info.set_dump_step(dump_properties_.GetDumpStep());
op_mapping_info.set_dump_step(PropertiesManager::Instance().GetDumpStep());
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
GELOGI("Dump step is %s and dump path is %s in load dump info", dump_properties_.GetDumpStep().c_str(),
GELOGI("Dump step is %s and dump path is %s in load dump info", PropertiesManager::Instance().GetDumpStep().c_str(),
dump_path.c_str()); dump_path.c_str());


for (const auto &op_iter : op_list_) { for (const auto &op_iter : op_list_) {
auto op_desc = op_iter.op;
GELOGD("Op %s in model %s begin to add task in op_mapping_info", op_desc->GetName().c_str(), dump_list_key.c_str());
aicpu::dump::Task task; aicpu::dump::Task task;
auto op_desc = op_iter.op;
task.set_end_graph(false); task.set_end_graph(false);
task.set_task_id(op_iter.task_id); task.set_task_id(op_iter.task_id);
task.set_stream_id(op_iter.stream_id); task.set_stream_id(op_iter.stream_id);
task.mutable_op()->set_op_name(op_desc->GetName()); task.mutable_op()->set_op_name(op_desc->GetName());
task.mutable_op()->set_op_type(op_desc->GetType()); task.mutable_op()->set_op_type(op_desc->GetType());


if (dump_properties_.GetDumpMode() == kDumpOutput) {
if (PropertiesManager::Instance().GetDumpMode() == kDumpOutput) {
if (DumpOutput(op_iter, task) != SUCCESS) { if (DumpOutput(op_iter, task) != SUCCESS) {
GELOGE(FAILED, "Dump output failed"); GELOGE(FAILED, "Dump output failed");
return FAILED; return FAILED;
@@ -595,7 +437,7 @@ Status DataDumper::LoadDumpInfo() {
op_mapping_info.mutable_task()->Add(std::move(task)); op_mapping_info.mutable_task()->Add(std::move(task));
continue; continue;
} }
if (dump_properties_.GetDumpMode() == kDumpInput) {
if (PropertiesManager::Instance().GetDumpMode() == kDumpInput) {
if (op_iter.is_task) { if (op_iter.is_task) {
if (DumpInput(op_iter, task) != SUCCESS) { if (DumpInput(op_iter, task) != SUCCESS) {
GELOGE(FAILED, "Dump input failed"); GELOGE(FAILED, "Dump input failed");
@@ -605,7 +447,7 @@ Status DataDumper::LoadDumpInfo() {
op_mapping_info.mutable_task()->Add(std::move(task)); op_mapping_info.mutable_task()->Add(std::move(task));
continue; continue;
} }
if (dump_properties_.GetDumpMode() == kDumpAll) {
if (PropertiesManager::Instance().GetDumpMode() == kDumpAll) {
auto ret = DumpOutput(op_iter, task); auto ret = DumpOutput(op_iter, task);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(FAILED, "Dump output failed when in dumping all"); GELOGE(FAILED, "Dump output failed when in dumping all");
@@ -625,22 +467,19 @@ Status DataDumper::LoadDumpInfo() {


SetEndGraphIdToAicpu(end_graph_task_id_, end_graph_stream_id_, op_mapping_info); SetEndGraphIdToAicpu(end_graph_task_id_, end_graph_stream_id_, op_mapping_info);


SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info);

if (!op_list_.empty() || is_op_debug_) {
auto ret = ExecuteLoadDumpInfo(op_mapping_info);
if (ret != SUCCESS) {
GELOGE(FAILED, "Execute load dump info failed");
return FAILED;
}
auto ret = ExecuteLoadDumpInfo(op_mapping_info);
if (ret != SUCCESS) {
GELOGE(FAILED, "Execute load dump info failed");
return FAILED;
} }
return SUCCESS; return SUCCESS;
} }


void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id,
aicpu::dump::OpMappingInfo &op_mapping_info) { aicpu::dump::OpMappingInfo &op_mapping_info) {
if (dump_properties_.GetDumpMode() == kDumpOutput || dump_properties_.GetDumpMode() == kDumpInput ||
dump_properties_.GetDumpMode() == kDumpAll) {
if (PropertiesManager::Instance().GetDumpMode() == kDumpOutput ||
PropertiesManager::Instance().GetDumpMode() == kDumpInput ||
PropertiesManager::Instance().GetDumpMode() == kDumpAll) {
GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_);
aicpu::dump::Task task; aicpu::dump::Task task;
task.set_end_graph(true); task.set_end_graph(true);
@@ -652,37 +491,6 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id,
} }
} }


void DataDumper::SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr,
aicpu::dump::OpMappingInfo &op_mapping_info) {
if (is_op_debug_) {
GELOGI("add op_debug_info to aicpu, task_id is %u, stream_id is %u", task_id, stream_id);
aicpu::dump::Task task;
task.set_end_graph(false);
task.set_task_id(task_id);
task.set_stream_id(stream_id);
task.mutable_op()->set_op_name(NODE_NAME_OP_DEBUG);
task.mutable_op()->set_op_type(OP_TYPE_OP_DEBUG);

// set output
aicpu::dump::Output output;
output.set_data_type(DT_UINT8);
output.set_format(FORMAT_ND);

output.mutable_shape()->add_dim(kOpDebugShape);

output.set_original_name(NODE_NAME_OP_DEBUG);
output.set_original_output_index(0);
output.set_original_output_format(FORMAT_ND);
output.set_original_output_data_type(DT_UINT8);
// due to lhisi virtual addr bug, cannot use args now
output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr)));
output.set_size(kOpDebugSize);

task.mutable_output()->Add(std::move(output));
op_mapping_info.mutable_task()->Add(std::move(task));
}
}

Status DataDumper::UnloadDumpInfo() { Status DataDumper::UnloadDumpInfo() {
if (!load_flag_) { if (!load_flag_) {
GELOGI("No need to UnloadDumpInfo."); GELOGI("No need to UnloadDumpInfo.");
@@ -709,17 +517,15 @@ Status DataDumper::UnloadDumpInfo() {
} }


void DataDumper::PrintCheckLog(string &dump_list_key) { void DataDumper::PrintCheckLog(string &dump_list_key) {
std::set<std::string> model_list = dump_properties_.GetAllDumpModel();
std::set<std::string> model_list = PropertiesManager::Instance().GetAllDumpModel();
if (model_list.empty()) { if (model_list.empty()) {
GELOGI("No model need dump."); GELOGI("No model need dump.");
return; return;
} }


GELOGI("%zu op need dump in %s.", op_list_.size(), model_name_.c_str());
bool not_find_by_omname = model_list.find(om_name_) == model_list.end(); bool not_find_by_omname = model_list.find(om_name_) == model_list.end();
bool not_find_by_modelname = model_list.find(model_name_) == model_list.end(); bool not_find_by_modelname = model_list.find(model_name_) == model_list.end();
dump_list_key = not_find_by_omname ? model_name_ : om_name_;
GELOGI("%zu op need dump in %s.", op_list_.size(), dump_list_key.c_str());

if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) { if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) {
if (not_find_by_omname && not_find_by_modelname) { if (not_find_by_omname && not_find_by_modelname) {
std::string model_list_str; std::string model_list_str;
@@ -727,12 +533,12 @@ void DataDumper::PrintCheckLog(string &dump_list_key) {
model_list_str += "[" + model + "]."; model_list_str += "[" + model + "].";
} }


GELOGW("Model %s will not be set to dump, dump list: %s", dump_list_key.c_str(), model_list_str.c_str());
GELOGW("Model %s will not be set to dump, dump list: %s", model_name_.c_str(), model_list_str.c_str());
return; return;
} }
} }
std::set<std::string> config_dump_op_list = dump_properties_.GetPropertyValue(dump_list_key);
dump_list_key = not_find_by_omname ? model_name_ : om_name_;
std::set<std::string> config_dump_op_list = PropertiesManager::Instance().GetDumpPropertyValue(dump_list_key);
std::set<std::string> dump_op_list; std::set<std::string> dump_op_list;
for (auto &inner_dump_info : op_list_) { for (auto &inner_dump_info : op_list_) {
// oplist value OpDescPtr is not nullptr // oplist value OpDescPtr is not nullptr


+ 1
- 33
src/ge/graph/load/new_model_manager/data_dumper.h View File

@@ -23,9 +23,7 @@
#include <vector> #include <vector>


#include "framework/common/ge_inner_error_codes.h" #include "framework/common/ge_inner_error_codes.h"
#include "common/properties_manager.h"
#include "graph/node.h" #include "graph/node.h"
#include "graph/compute_graph.h"
#include "proto/ge_ir.pb.h" #include "proto/ge_ir.pb.h"
#include "proto/op_mapping_info.pb.h" #include "proto/op_mapping_info.pb.h"
#include "runtime/mem.h" #include "runtime/mem.h"
@@ -46,9 +44,7 @@ class DataDumper {
device_id_(0), device_id_(0),
global_step_(0), global_step_(0),
loop_per_iter_(0), loop_per_iter_(0),
loop_cond_(0),
compute_graph_(nullptr),
ref_info_() {}
loop_cond_(0) {}


~DataDumper(); ~DataDumper();


@@ -60,10 +56,6 @@ class DataDumper {


void SetDeviceId(uint32_t device_id) { device_id_ = device_id; } void SetDeviceId(uint32_t device_id) { device_id_ = device_id; }


void SetComputeGraph(const ComputeGraphPtr &compute_graph) { compute_graph_ = compute_graph; };

void SetRefInfo(const std::map<OpDescPtr, void *> &ref_info) { ref_info_ = ref_info; };

void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond); void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond);


void SaveDumpInput(const std::shared_ptr<Node> &node); void SaveDumpInput(const std::shared_ptr<Node> &node);
@@ -73,15 +65,11 @@ class DataDumper {
void SaveEndGraphId(uint32_t task_id, uint32_t stream_id); void SaveEndGraphId(uint32_t task_id, uint32_t stream_id);


void SetOmName(const std::string &om_name) { om_name_ = om_name; } void SetOmName(const std::string &om_name) { om_name_ = om_name; }
void SaveOpDebugId(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, bool is_op_debug);


Status LoadDumpInfo(); Status LoadDumpInfo();


Status UnloadDumpInfo(); Status UnloadDumpInfo();


void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; }
const DumpProperties &GetDumpProperties() const { return dump_properties_; }

private: private:
void ReleaseDevMem(void **ptr) noexcept; void ReleaseDevMem(void **ptr) noexcept;


@@ -109,32 +97,12 @@ class DataDumper {
uintptr_t global_step_; uintptr_t global_step_;
uintptr_t loop_per_iter_; uintptr_t loop_per_iter_;
uintptr_t loop_cond_; uintptr_t loop_cond_;
ComputeGraphPtr compute_graph_;
std::map<OpDescPtr, void *> ref_info_;

uint32_t op_debug_task_id_ = 0;
uint32_t op_debug_stream_id_ = 0;
void *op_debug_addr_ = nullptr;
bool is_op_debug_ = false;

DumpProperties dump_properties_;


Status DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); Status DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task);
Status DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Output &output, size_t i,
const std::string &node_name_index);
Status DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task);
Status DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); Status DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task);
Status DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i,
const std::string &node_name_index);
Status ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); Status ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info);
void SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, aicpu::dump::OpMappingInfo &op_mapping_info); void SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, aicpu::dump::OpMappingInfo &op_mapping_info);
void SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr,
aicpu::dump::OpMappingInfo &op_mapping_info);
Status ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); Status ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info);
Status GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
const uintptr_t &addr, size_t index);
Status GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
const uintptr_t &addr, size_t index);
}; };
struct DataDumper::InnerDumpInfo { struct DataDumper::InnerDumpInfo {
uint32_t task_id; uint32_t task_id;


+ 363
- 541
src/ge/graph/load/new_model_manager/davinci_model.cc
File diff suppressed because it is too large
View File


+ 36
- 80
src/ge/graph/load/new_model_manager/davinci_model.h View File

@@ -29,7 +29,6 @@
#include "common/helper/om_file_helper.h" #include "common/helper/om_file_helper.h"
#include "common/opskernel/ge_task_info.h" #include "common/opskernel/ge_task_info.h"
#include "common/types.h" #include "common/types.h"
#include "common/properties_manager.h"
#include "framework/common/util.h" #include "framework/common/util.h"
#include "graph/debug/ge_attr_define.h" #include "graph/debug/ge_attr_define.h"
#include "graph/load/new_model_manager/data_dumper.h" #include "graph/load/new_model_manager/data_dumper.h"
@@ -48,10 +47,6 @@
#include "task_info/task_info.h" #include "task_info/task_info.h"


namespace ge { namespace ge {
// op debug need 2048 bits buffer
const size_t kOpDebugMemorySize = 2048UL;
const size_t kDebugP2pSize = 8UL;

typedef enum tagModelProcStage { typedef enum tagModelProcStage {
MODEL_LOAD_START = 1, MODEL_LOAD_START = 1,
MODEL_LOAD_END, MODEL_LOAD_END,
@@ -176,6 +171,13 @@ class DavinciModel {
// get session id // get session id
uint64_t SessionId() const { return runtime_param_.session_id; } uint64_t SessionId() const { return runtime_param_.session_id; }


vector<OpDescPtr> GetOpDesc() {
vector<OpDescPtr> opDescVector;
GE_IF_BOOL_EXEC(AttrUtils::GetListOpDesc(GetGeModel(), MODEL_ATTR_FUSION_MODEL_DEF, opDescVector),
GELOGI("get opDesc of opDescVector"));
return opDescVector;
}

// get model priority // get model priority
int32_t Priority() const { return priority_; } int32_t Priority() const { return priority_; }


@@ -246,9 +248,15 @@ class DavinciModel {
/// ///
Format GetFormat(); Format GetFormat();


rtModel_t GetRtModelHandle() const { return rt_model_handle_; }
rtModel_t GetRtModelHandle() {
rtModel_t res = rt_model_handle_;
return res;
}


rtStream_t GetRtModelStream() const { return rt_model_stream_; }
rtStream_t GetRtModelStream() {
rtModel_t res = rt_model_stream_;
return res;
}


uint64_t GetRtBaseAddr() const { return runtime_param_.logic_mem_base; } uint64_t GetRtBaseAddr() const { return runtime_param_.logic_mem_base; }


@@ -287,7 +295,7 @@ class DavinciModel {
/// @param [out] batch_info /// @param [out] batch_info
/// @return execute result /// @return execute result
/// ///
Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info) const;
Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info);


void GetCurShape(std::vector<int64_t> &batch_info); void GetCurShape(std::vector<int64_t> &batch_info);


@@ -336,9 +344,10 @@ class DavinciModel {
/// ///
/// @ingroup ge /// @ingroup ge
/// @brief dump all op input and output information /// @brief dump all op input and output information
/// @return void
/// @param [in] op_list model_id
/// @return Status
/// ///
void DumpOpInputOutput();
Status DumpOpInputOutput();


/// ///
/// @ingroup ge /// @ingroup ge
@@ -394,9 +403,7 @@ class DavinciModel {
/// ///
uint32_t GetDeviceId() const { return device_id_; } uint32_t GetDeviceId() const { return device_id_; }


bool NeedDestroyAicpuKernel() const { return need_destroy_aicpu_kernel_; }

Status UpdateSessionId(uint64_t session_id);
GeModelPtr GetGeModel() { return ge_model_; }


const RuntimeParam &GetRuntimeParam() { return runtime_param_; } const RuntimeParam &GetRuntimeParam() { return runtime_param_; }


@@ -456,19 +463,6 @@ class DavinciModel {
void *cur_args = static_cast<char *>(args_) + offset; void *cur_args = static_cast<char *>(args_) + offset;
return cur_args; return cur_args;
} }
void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size);
int64_t GetFixedAddrsSize(string tensor_name);
void *GetCurrentFixedAddr(int64_t offset) const {
void *cur_addr = static_cast<char *>(fixed_addrs_) + offset;
return cur_addr;
}

uint32_t GetFixedAddrOutputIndex(string tensor_name) {
if (tensor_name_to_peer_output_index_.find(tensor_name) != tensor_name_to_peer_output_index_.end()) {
return tensor_name_to_peer_output_index_[tensor_name];
}
return UINT32_MAX;
}
void SetKnownNode(bool known_node) { known_node_ = known_node; } void SetKnownNode(bool known_node) { known_node_ = known_node; }
bool IsKnownNode() { return known_node_; } bool IsKnownNode() { return known_node_; }
Status MallocKnownArgs(); Status MallocKnownArgs();
@@ -483,9 +477,6 @@ class DavinciModel {
// om file name // om file name
void SetOmName(string om_name) { om_name_ = om_name; } void SetOmName(string om_name) { om_name_ = om_name; }


void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); }
const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); }

private: private:
// memory address of weights // memory address of weights
uint8_t *weights_mem_base_; uint8_t *weights_mem_base_;
@@ -502,6 +493,8 @@ class DavinciModel {
struct timeInfo time_info_; struct timeInfo time_info_;
int32_t dataInputTid; int32_t dataInputTid;


void InitZeroCopyUtil(bool is_dynamic_batch, bool &input_zero_copy, bool &output_zero_copy);

/// ///
/// @ingroup ge /// @ingroup ge
/// @brief Save Batch label Info. /// @brief Save Batch label Info.
@@ -537,13 +530,6 @@ class DavinciModel {
/// ///
bool CheckInputAndModelSize(const int64_t &input_size, const int64_t &op_size, bool is_dynamic); bool CheckInputAndModelSize(const int64_t &input_size, const int64_t &op_size, bool is_dynamic);


///
/// @ingroup ge
/// @brief Set copy only for No task feed NetOutput address.
/// @return None.
///
void SetCopyOnlyOutput();

/// ///
/// @ingroup ge /// @ingroup ge
/// @brief Copy Input/Output to model for direct use. /// @brief Copy Input/Output to model for direct use.
@@ -569,10 +555,14 @@ class DavinciModel {


Status CopyInputData(const InputData &input_data, bool device_data = false); Status CopyInputData(const InputData &input_data, bool device_data = false);


Status CopyOutputData(uint32_t data_id, OutputData &output_data, rtMemcpyKind_t kind);
Status CopyOutputData(uint32_t data_id, OutputData &output_data);

Status CopyOutputDataToUser(OpDescPtr &op_desc, std::vector<DataBuffer> &blobs, uint32_t &data_index);


Status SyncVarData(); Status SyncVarData();


Status SyncDataAndDump();

Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize); Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize);


void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input);
@@ -599,12 +589,7 @@ class DavinciModel {


bool IsAicpuKernelConnectSpecifiedLayer(); bool IsAicpuKernelConnectSpecifiedLayer();


///
/// @ingroup ge
/// @brief Reduce memory usage after task sink.
/// @return: void
///
void Shrink();
Status MarkSpecifiedAicpuKernel();


/// ///
/// @ingroup ge /// @ingroup ge
@@ -740,9 +725,10 @@ class DavinciModel {
/// ///
/// @ingroup ge /// @ingroup ge
/// @brief definiteness queue schedule, active original model stream. /// @brief definiteness queue schedule, active original model stream.
/// @param [in] streams: streams will active by S0.
/// @return: 0 for success / others for fail /// @return: 0 for success / others for fail
/// ///
Status CpuActiveStream();
Status CpuActiveStream(const std::vector<rtStream_t> &stream_list);


/// ///
/// @ingroup ge /// @ingroup ge
@@ -760,9 +746,6 @@ class DavinciModel {
/// ///
Status CpuModelRepeat(); Status CpuModelRepeat();


Status InitEntryTask();
Status AddHeadStream();

/// ///
/// @ingroup ge /// @ingroup ge
/// @brief set ts device. /// @brief set ts device.
@@ -770,10 +753,6 @@ class DavinciModel {
/// ///
Status SetTSDevice(); Status SetTSDevice();


Status OpDebugRegister();

void OpDebugUnRegister();

void CheckHasHcomOp(); void CheckHasHcomOp();


Status DoTaskSink(); Status DoTaskSink();
@@ -781,17 +760,17 @@ class DavinciModel {
void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result);


Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id);
Status CopyVarData(ComputeGraphPtr &graph);


// get desc info of graph for profiling // get desc info of graph for profiling
Status GetComputeGraphInfo(const ComputeGraphPtr &graph, vector<ComputeGraphDescInfo> &graph_desc_info);
Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &compute_graph_desc_info);


void SetDataDumperArgs(const ComputeGraphPtr &compute_graph);
void SetDataDumperArgs();


Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data,
std::vector<ge::OutputTensorInfo> &outputs); std::vector<ge::OutputTensorInfo> &outputs);


void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info);
void GetFixedAddrAttr(const OpDescPtr &op_desc);


bool is_model_has_inited_; bool is_model_has_inited_;
uint32_t model_id_; uint32_t model_id_;
@@ -804,9 +783,6 @@ class DavinciModel {
uint32_t version_; uint32_t version_;
GeModelPtr ge_model_; GeModelPtr ge_model_;


bool need_destroy_aicpu_kernel_{false};
vector<std::string> out_node_name_;

map<uint32_t, OpDescPtr> op_list_; map<uint32_t, OpDescPtr> op_list_;


// data op_desc // data op_desc
@@ -867,11 +843,6 @@ class DavinciModel {


bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_. bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_.


bool is_pure_head_stream_{false};
rtStream_t rt_head_stream_{nullptr};
rtStream_t rt_entry_stream_{nullptr};
rtAicpuDeployType_t deploy_type_{AICPU_DEPLOY_RESERVED};

// ACL queue schedule, save queue ids for Init. // ACL queue schedule, save queue ids for Init.
std::vector<TaskInfoPtr> cpu_task_list_; std::vector<TaskInfoPtr> cpu_task_list_;
std::vector<uint32_t> input_queue_ids_; // input queue ids created by caller. std::vector<uint32_t> input_queue_ids_; // input queue ids created by caller.
@@ -893,6 +864,8 @@ class DavinciModel {
std::vector<rtStream_t> active_stream_list_; std::vector<rtStream_t> active_stream_list_;
std::set<uint32_t> active_stream_indication_; std::set<uint32_t> active_stream_indication_;


std::shared_ptr<domi::ModelTaskDef> model_task_def_;
std::set<uint32_t> aicpu_streams_;
std::set<uint32_t> hcom_streams_; std::set<uint32_t> hcom_streams_;
RuntimeParam runtime_param_; RuntimeParam runtime_param_;


@@ -904,39 +877,22 @@ class DavinciModel {
// for profiling task and graph info // for profiling task and graph info
std::map<uint32_t, std::string> op_name_map_; std::map<uint32_t, std::string> op_name_map_;
std::vector<TaskDescInfo> task_desc_info_; std::vector<TaskDescInfo> task_desc_info_;
ComputeGraphPtr compute_graph_;


int64_t maxDumpOpNum_; int64_t maxDumpOpNum_;
// for data dump // for data dump
DataDumper data_dumper_; DataDumper data_dumper_;
uint64_t iterator_count_; uint64_t iterator_count_;
bool is_l1_fusion_enable_; bool is_l1_fusion_enable_;
std::map<OpDescPtr, void *> saved_task_addrs_;


bool known_node_ = false; bool known_node_ = false;
uint32_t total_args_size_ = 0; uint32_t total_args_size_ = 0;
void *args_ = nullptr; void *args_ = nullptr;
void *args_host_ = nullptr; void *args_host_ = nullptr;
void *fixed_addrs_ = nullptr;
int64_t total_fixed_addr_size_ = 0;
std::map<const void *, void *> knonw_input_data_info_; std::map<const void *, void *> knonw_input_data_info_;
std::map<const void *, void *> knonw_output_data_info_; std::map<const void *, void *> knonw_output_data_info_;


vector<vector<int64_t>> batch_info_;

vector<uint64_t> batch_size_; vector<uint64_t> batch_size_;
// key: input tensor name, generally rts op;
// value: the fixed addr of input anchor, same as the peer output anchor addr of the peer op
std::map<string, int64_t> tensor_name_to_fixed_addr_size_;

// key: input tensor name, generally rts op; value: the peer output anchor of the peer op
std::map<string, int64_t> tensor_name_to_peer_output_index_;
// if model is first execute
bool is_first_execute_;
// for op debug
std::mutex debug_reg_mutex_;
bool is_op_debug_reg_ = false;
void *op_debug_addr_ = nullptr;
void *p2p_debug_addr_ = nullptr;
bool is_new_model_desc_{false}; bool is_new_model_desc_{false};
}; };
} // namespace ge } // namespace ge


+ 44
- 38
src/ge/graph/load/new_model_manager/model_manager.cc View File

@@ -22,9 +22,8 @@
#include "common/profiling/profiling_manager.h" #include "common/profiling/profiling_manager.h"
#include "common/properties_manager.h" #include "common/properties_manager.h"
#include "framework/common/debug/ge_log.h" #include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "graph/common/ge_call_wrapper.h"
#include "graph/debug/ge_attr_define.h" #include "graph/debug/ge_attr_define.h"
#include "framework/common/util.h"
#include "graph/load/new_model_manager/davinci_model.h" #include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/new_model_manager/davinci_model_parser.h" #include "graph/load/new_model_manager/davinci_model_parser.h"
#include "model/ge_root_model.h" #include "model/ge_root_model.h"
@@ -34,10 +33,9 @@ thread_local uint32_t device_count = 0;
namespace { namespace {
const int kCmdParSize = 2; const int kCmdParSize = 2;
const int kDumpCmdPairSize = 2; const int kDumpCmdPairSize = 2;
const char *const kNeedDestroySpecifiedAicpuKernel = "need_destroy_specified_aicpu_kernel";
} // namespace } // namespace


DumpProperties ModelManager::dump_properties_;

std::shared_ptr<ModelManager> ModelManager::GetInstance() { std::shared_ptr<ModelManager> ModelManager::GetInstance() {
static const std::shared_ptr<ModelManager> instance_ptr = static const std::shared_ptr<ModelManager> instance_ptr =
shared_ptr<ModelManager>(new (std::nothrow) ModelManager(), ModelManager::FinalizeForPtr); shared_ptr<ModelManager>(new (std::nothrow) ModelManager(), ModelManager::FinalizeForPtr);
@@ -274,10 +272,6 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
davinci_model->SetId(model_id); davinci_model->SetId(model_id);
davinci_model->SetDeviceId(GetContext().DeviceId()); davinci_model->SetDeviceId(GetContext().DeviceId());


const DumpProperties &dump_properties = PropertiesManager::Instance().GetDumpProperties(GetContext().SessionId());
davinci_model->SetDumpProperties(dump_properties);
dump_properties_ = dump_properties;

auto root_graph = ge_root_model->GetRootGraph(); auto root_graph = ge_root_model->GetRootGraph();
GE_CHECK_NOTNULL(root_graph); GE_CHECK_NOTNULL(root_graph);
string root_model_name = root_graph->GetName(); string root_model_name = root_graph->GetName();
@@ -302,6 +296,9 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 +
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond
davinci_model->SetProfileTime(MODEL_LOAD_END); davinci_model->SetProfileTime(MODEL_LOAD_END);
if (davinci_model->SinkModelProfile() != SUCCESS) {
GELOGW("Sink model profile failed.");
}
} }
} while (0); } while (0);


@@ -614,10 +611,10 @@ Status ModelManager::HandleDumpCommand(const Command &command) {
GELOGE(PARAM_INVALID, "parser dump model failed"); GELOGE(PARAM_INVALID, "parser dump model failed");
return FAILED; return FAILED;
} }
GELOGI("dump model = %s.", dump_model.c_str());
GELOGI("dump status = %s.", dump_model.c_str());


if (dump_status == "off" || dump_status == "OFF") { if (dump_status == "off" || dump_status == "OFF") {
dump_properties_.DeletePropertyValue(dump_model);
PropertiesManager::Instance().DeleteDumpPropertyValue(dump_model);
return SUCCESS; return SUCCESS;
} }


@@ -634,10 +631,9 @@ Status ModelManager::HandleDumpCommand(const Command &command) {
return FAILED; return FAILED;
} }
if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') { if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') {
dump_path = dump_path + "/";
dump_path = dump_path + "/" + CurrentTimeInStr() + "/";
} }
dump_path = dump_path + CurrentTimeInStr() + "/";
GELOGI("dump path = %s.", dump_path.c_str());
GELOGI("dump status = %s.", dump_path.c_str());


ret = ParserPara(command, DUMP_MODE, dump_mode); ret = ParserPara(command, DUMP_MODE, dump_mode);
if (ret != SUCCESS) { if (ret != SUCCESS) {
@@ -646,10 +642,20 @@ Status ModelManager::HandleDumpCommand(const Command &command) {
} }
GELOGI("dump mode = %s", dump_mode.c_str()); GELOGI("dump mode = %s", dump_mode.c_str());


dump_properties_.AddPropertyValue(dump_model, dump_layers);
dump_properties_.SetDumpPath(dump_path);
dump_properties_.SetDumpMode(dump_mode);
auto iter_dump_mode = std::find(command.cmd_params.begin(), command.cmd_params.end(), DUMP_MODE);
if (iter_dump_mode != command.cmd_params.end()) {
++iter_dump_mode;
if (iter_dump_mode == command.cmd_params.end()) {
GELOGE(PARAM_INVALID, "Invalid access.");
return PARAM_INVALID;
}
dump_mode = *iter_dump_mode;
GELOGI("dump mode = %s", dump_mode.c_str());
}


PropertiesManager::Instance().AddDumpPropertyValue(dump_model, dump_layers);
PropertiesManager::Instance().SetDumpOutputPath(dump_path);
PropertiesManager::Instance().SetDumpMode(dump_mode);
return SUCCESS; return SUCCESS;
} }


@@ -765,6 +771,17 @@ Status ModelManager::GenSessionId(uint64_t &session_id) {
return SUCCESS; return SUCCESS;
} }


Status ModelManager::UpdateSessionId(std::shared_ptr<DavinciModel> &davinci_model, uint64_t session_id) {
GeModelPtr ge_model_current = davinci_model->GetGeModel();
GE_CHECK_NOTNULL(ge_model_current);
if (!ge::AttrUtils::SetInt(ge_model_current, ge::MODEL_ATTR_SESSION_ID, static_cast<int64_t>(session_id))) {
GELOGW("Set attr[%s] failed in updating session_id.", MODEL_ATTR_SESSION_ID.c_str());
}

GELOGD("Update session id: %lu.", session_id);
return SUCCESS;
}

Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener, Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener,
void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, PARAM_INVALID, GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, PARAM_INVALID,
@@ -807,7 +824,6 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
} }
davinci_model->SetDeviceId(device_id); davinci_model->SetDeviceId(device_id);
davinci_model->SetOmName(model.om_name); davinci_model->SetOmName(model.om_name);
davinci_model->SetDumpProperties(dump_properties_);


/// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. /// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail.
/// These session_ids come from the same model, so the values of session_id are the same. /// These session_ids come from the same model, so the values of session_id are the same.
@@ -815,7 +831,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
uint64_t new_session_id; uint64_t new_session_id;
ret = GenSessionId(new_session_id); ret = GenSessionId(new_session_id);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Generate session_id for infer failed."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Generate session_id for infer failed.");
ret = davinci_model->UpdateSessionId(new_session_id);
ret = UpdateSessionId(davinci_model, new_session_id);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Update session_id for infer failed."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Update session_id for infer failed.");


ret = davinci_model->Init(dev_ptr, mem_size, weight_ptr, weight_size); ret = davinci_model->Init(dev_ptr, mem_size, weight_ptr, weight_size);
@@ -830,6 +846,9 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 +
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond
davinci_model->SetProfileTime(MODEL_LOAD_END); davinci_model->SetProfileTime(MODEL_LOAD_END);
if (davinci_model->SinkModelProfile() != SUCCESS) {
GELOGW("Sink model profile failed.");
}
} }


GE_IF_BOOL_EXEC(ret == SUCCESS, device_count++); GE_IF_BOOL_EXEC(ret == SUCCESS, device_count++);
@@ -879,7 +898,7 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d
uint64_t new_session_id; uint64_t new_session_id;
ret = GenSessionId(new_session_id); ret = GenSessionId(new_session_id);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed.");
ret = davinci_model->UpdateSessionId(new_session_id);
ret = UpdateSessionId(davinci_model, new_session_id);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed.");


GenModelId(&model_id); GenModelId(&model_id);
@@ -890,8 +909,6 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d
return ret; return ret;
} }


davinci_model->SetDumpProperties(dump_properties_);

ret = davinci_model->Init(); ret = davinci_model->Init();
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "init model failed."); GELOGE(ret, "init model failed.");
@@ -918,8 +935,12 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id);


if (davinci_model->NeedDestroyAicpuKernel()) {
GELOGI("Start to destroy specified aicpu kernel.");
GeModelPtr ge_model_current = davinci_model->GetGeModel();
bool need_destroy_aicpu_kernel = false;
bool result = ge::AttrUtils::GetBool(ge_model_current, kNeedDestroySpecifiedAicpuKernel, need_destroy_aicpu_kernel);
if (result && need_destroy_aicpu_kernel) {
GELOGI("Get attr %s successfully, start to destroy specified aicpu kernel.", kNeedDestroySpecifiedAicpuKernel);

// Zero copy is enabled by default, no need to judge. // Zero copy is enabled by default, no need to judge.
uint64_t session_id_davinci = davinci_model->GetSessionId(); uint64_t session_id_davinci = davinci_model->GetSessionId();
uint32_t model_id_davinci = davinci_model->GetModelId(); uint32_t model_id_davinci = davinci_model->GetModelId();
@@ -1029,19 +1050,4 @@ Status ModelManager::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index
return davinci_model->GetAllAippInputOutputDims(index, input_dims, output_dims); return davinci_model->GetAllAippInputOutputDims(index, input_dims, output_dims);
} }


bool ModelManager::IsDynamicShape(uint32_t model_id) {
auto model = GetHybridModel(model_id);
return model != nullptr;
}

ge::Status ModelManager::SyncExecuteModel(uint32_t model_id, const vector<GeTensor> &inputs,
vector<GeTensor> &outputs) {
auto model = GetHybridModel(model_id);
if (model == nullptr) {
GELOGE(FAILED, "Hybrid model not found. model id = %u.", model_id);
return FAILED;
}

return model->Execute(inputs, outputs);
}
} // namespace ge } // namespace ge

+ 1
- 7
src/ge/graph/load/new_model_manager/model_manager.h View File

@@ -31,7 +31,6 @@
#include "common/ge_types.h" #include "common/ge_types.h"
#include "common/helper/model_helper.h" #include "common/helper/model_helper.h"
#include "common/helper/om_file_helper.h" #include "common/helper/om_file_helper.h"
#include "common/properties_manager.h"
#include "common/types.h" #include "common/types.h"
#include "ge/ge_api_types.h" #include "ge/ge_api_types.h"
#include "graph/ge_context.h" #include "graph/ge_context.h"
@@ -142,8 +141,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
ge::Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, ge::Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
OutputData &output_data); OutputData &output_data);


ge::Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs);

/// ///
/// @ingroup domi_ome /// @ingroup domi_ome
/// @brief model stop /// @brief model stop
@@ -252,8 +249,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims, ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
std::vector<InputOutputDims> &output_dims); std::vector<InputOutputDims> &output_dims);


bool IsDynamicShape(uint32_t model_id);

private: private:
/// ///
/// @ingroup domi_ome /// @ingroup domi_ome
@@ -281,6 +276,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
ge::Status DeleteModel(uint32_t id); ge::Status DeleteModel(uint32_t id);


void GenModelId(uint32_t *id); void GenModelId(uint32_t *id);
ge::Status UpdateSessionId(std::shared_ptr<DavinciModel> &davinci_model, uint64_t session_id);


std::map<uint32_t, std::shared_ptr<DavinciModel>> model_map_; std::map<uint32_t, std::shared_ptr<DavinciModel>> model_map_;
std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_; std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_;
@@ -291,8 +287,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
std::mutex session_id_create_mutex_; std::mutex session_id_create_mutex_;
uint64_t session_id_bias_; uint64_t session_id_bias_;
std::set<uint64_t> sess_ids_; std::set<uint64_t> sess_ids_;

static DumpProperties dump_properties_;
}; };
} // namespace ge } // namespace ge




+ 128
- 109
src/ge/graph/load/new_model_manager/model_utils.cc View File

@@ -31,7 +31,7 @@


namespace ge { namespace ge {
/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get input size. /// @brief Get input size.
/// @return vector<uint32_t> /// @return vector<uint32_t>
/// ///
@@ -43,26 +43,22 @@ vector<int64_t> ModelUtils::GetInputSize(ConstOpDescPtr op_desc) {


const vector<bool> v_is_input_const = op_desc->GetIsInputConst(); const vector<bool> v_is_input_const = op_desc->GetIsInputConst();
for (size_t i = 0; i < inputs_size; ++i) { for (size_t i = 0; i < inputs_size; ++i) {
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i);
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}

int64_t tensor_size = 0;
if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) { if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) {
// TBE: add weights size to input // TBE: add weights size to input
GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size));
GeTensorDesc tensor_desc = op_desc->GetInputDesc(i);
int64_t tensor_size = 0;
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
if (tensor_size) { if (tensor_size) {
v_input_size.push_back(tensor_size); v_input_size.push_back(tensor_size);
} }
continue; continue;
} }


int64_t tensor_size = 0;
GE_IF_BOOL_EXEC( GE_IF_BOOL_EXEC(
TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS,
TensorUtils::GetSize(op_desc->GetInputDesc(i), tensor_size) != GRAPH_SUCCESS,
GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i);
continue);
continue;);


v_input_size.push_back(tensor_size); v_input_size.push_back(tensor_size);
} }
@@ -71,7 +67,7 @@ vector<int64_t> ModelUtils::GetInputSize(ConstOpDescPtr op_desc) {
} }


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get output size. /// @brief Get output size.
/// @return vector<uint32_t> /// @return vector<uint32_t>
/// ///
@@ -86,17 +82,11 @@ vector<int64_t> ModelUtils::GetOutputSize(ConstOpDescPtr op_desc) {
return v_output_size;); return v_output_size;);


for (size_t i = 0; i < outputs_size; ++i) { for (size_t i = 0; i < outputs_size; ++i) {
const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i);
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}

int64_t tensor_size = 0; int64_t tensor_size = 0;
GE_IF_BOOL_EXEC( GE_IF_BOOL_EXEC(
TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS,
TensorUtils::GetSize(op_desc->GetOutputDesc(i), tensor_size) != GRAPH_SUCCESS,
GELOGI("Get size from TensorDesc failed, op : %s, output index : %zu", op_desc->GetName().c_str(), i); GELOGI("Get size from TensorDesc failed, op : %s, output index : %zu", op_desc->GetName().c_str(), i);
continue);
continue;);


v_output_size.push_back(tensor_size); v_output_size.push_back(tensor_size);
} }
@@ -105,7 +95,7 @@ vector<int64_t> ModelUtils::GetOutputSize(ConstOpDescPtr op_desc) {
} }


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get workspace size. /// @brief Get workspace size.
/// @return vector<uint32_t> /// @return vector<uint32_t>
/// ///
@@ -128,7 +118,7 @@ vector<int64_t> ModelUtils::GetWorkspaceSize(ConstOpDescPtr op_desc) {
} }


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get weight size. /// @brief Get weight size.
/// @return vector<uint32_t> /// @return vector<uint32_t>
/// ///
@@ -152,14 +142,8 @@ vector<int64_t> ModelUtils::GetWeightSize(ConstOpDescPtr op_desc) {
const vector<bool> v_is_input_const = op_desc->GetIsInputConst(); const vector<bool> v_is_input_const = op_desc->GetIsInputConst();
for (size_t i = 0; i < inputs_size; ++i) { for (size_t i = 0; i < inputs_size; ++i) {
if ((i < v_is_input_const.size()) && v_is_input_const[i]) { if ((i < v_is_input_const.size()) && v_is_input_const[i]) {
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i);
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}

int64_t tensor_size = 0; int64_t tensor_size = 0;
(void)TensorUtils::GetSize(*tensor_desc, tensor_size);
(void)TensorUtils::GetSize(op_desc->GetInputDesc(i), tensor_size);
v_weight_size.push_back(tensor_size); v_weight_size.push_back(tensor_size);
} }
} }
@@ -168,7 +152,7 @@ vector<int64_t> ModelUtils::GetWeightSize(ConstOpDescPtr op_desc) {
} }


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get weights. /// @brief Get weights.
/// @return vector<ConstGeTensorPtr> /// @return vector<ConstGeTensorPtr>
/// ///
@@ -192,14 +176,9 @@ vector<ConstGeTensorPtr> ModelUtils::GetWeights(ConstOpDescPtr op_desc) {
const vector<bool> v_is_input_const = op_desc->GetIsInputConst(); const vector<bool> v_is_input_const = op_desc->GetIsInputConst();
for (size_t i = 0; i < inputs_size; ++i) { for (size_t i = 0; i < inputs_size; ++i) {
if ((i < v_is_input_const.size()) && v_is_input_const[i]) { if ((i < v_is_input_const.size()) && v_is_input_const[i]) {
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i);
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}

ConstGeTensorPtr weight = nullptr; ConstGeTensorPtr weight = nullptr;
if (AttrUtils::GetTensor(*tensor_desc, ATTR_NAME_WEIGHTS, weight)) {
GeTensorDesc tensor_desc = op_desc->GetInputDesc(i);
if (AttrUtils::GetTensor(tensor_desc, ATTR_NAME_WEIGHTS, weight)) {
v_weights.push_back(weight); v_weights.push_back(weight);
} }
} }
@@ -209,7 +188,7 @@ vector<ConstGeTensorPtr> ModelUtils::GetWeights(ConstOpDescPtr op_desc) {
} }


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get AiCpuOp Input descriptor. /// @brief Get AiCpuOp Input descriptor.
/// @return vector<::tagCcAICPUTensor> /// @return vector<::tagCcAICPUTensor>
/// ///
@@ -226,25 +205,20 @@ vector<::tagCcAICPUTensor> ModelUtils::GetInputDescs(ConstOpDescPtr op_desc) {
continue; continue;
} }


const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i);
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}

uint32_t dim_cnt = 0; uint32_t dim_cnt = 0;
GE_CHK_BOOL_EXEC_WARN(TensorUtils::GetRealDimCnt(*tensor_desc, dim_cnt) == GRAPH_SUCCESS, continue,
const auto &descriptor = op_desc->GetInputDesc(i);
GE_CHK_BOOL_EXEC_WARN(TensorUtils::GetRealDimCnt(descriptor, dim_cnt) == GRAPH_SUCCESS, continue,
"Get dim_cnt failed"); "Get dim_cnt failed");


opTensor_t tmp; opTensor_t tmp;
uint32_t tmp_fmt = tensor_desc->GetFormat();
uint32_t tmp_fmt = descriptor.GetFormat();
tmp.format = tagOpTensorFormat(tmp_fmt); tmp.format = tagOpTensorFormat(tmp_fmt);
tmp.dim_cnt = static_cast<int32_t>(dim_cnt); tmp.dim_cnt = static_cast<int32_t>(dim_cnt);
uint32_t tmp_type = tensor_desc->GetDataType();
uint32_t tmp_type = descriptor.GetDataType();
tmp.data_type = tagOpDataType(tmp_type); tmp.data_type = tagOpDataType(tmp_type);


for (int32_t j = 0; j < 4; j++) { // 4 dims for (int32_t j = 0; j < 4; j++) { // 4 dims
tmp.dim[j] = (j < tmp.dim_cnt ? tensor_desc->GetShape().GetDim(j) : 1);
tmp.dim[j] = (j < tmp.dim_cnt ? descriptor.GetShape().GetDim(j) : 1);
} }


v_input_descs.push_back(tmp); v_input_descs.push_back(tmp);
@@ -254,7 +228,7 @@ vector<::tagCcAICPUTensor> ModelUtils::GetInputDescs(ConstOpDescPtr op_desc) {
} }


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get AiCpuOp Output descriptor. /// @brief Get AiCpuOp Output descriptor.
/// @return vector<::tagCcAICPUTensor> /// @return vector<::tagCcAICPUTensor>
/// ///
@@ -266,25 +240,20 @@ vector<::tagCcAICPUTensor> ModelUtils::GetOutputDescs(ConstOpDescPtr op_desc) {
// init op output opTensor_t struct // init op output opTensor_t struct
const size_t output_num = op_desc->GetOutputsSize(); const size_t output_num = op_desc->GetOutputsSize();
for (size_t i = 0; i < output_num; ++i) { for (size_t i = 0; i < output_num; ++i) {
const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i);
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}

uint32_t dim_cnt = 0; uint32_t dim_cnt = 0;
GE_CHK_BOOL_EXEC_WARN(TensorUtils::GetRealDimCnt(*tensor_desc, dim_cnt) == GRAPH_SUCCESS, continue,
const auto &descriptor = op_desc->GetOutputDesc(i);
GE_CHK_BOOL_EXEC_WARN(TensorUtils::GetRealDimCnt(descriptor, dim_cnt) == GRAPH_SUCCESS, continue,
"Get dim_cnt failed"); "Get dim_cnt failed");


opTensor_t tmp; opTensor_t tmp;
uint32_t tmp_fmt = tensor_desc->GetFormat();
uint32_t tmp_fmt = descriptor.GetFormat();
tmp.format = tagOpTensorFormat(tmp_fmt); tmp.format = tagOpTensorFormat(tmp_fmt);
tmp.dim_cnt = static_cast<int32_t>(dim_cnt); tmp.dim_cnt = static_cast<int32_t>(dim_cnt);
uint32_t tmp_type = tensor_desc->GetDataType();
uint32_t tmp_type = descriptor.GetDataType();
tmp.data_type = tagOpDataType(tmp_type); tmp.data_type = tagOpDataType(tmp_type);


for (int32_t j = 0; j < 4; j++) { // 4 dims for (int32_t j = 0; j < 4; j++) { // 4 dims
tmp.dim[j] = (j < tmp.dim_cnt ? tensor_desc->GetShape().GetDim(j) : 1);
tmp.dim[j] = (j < tmp.dim_cnt ? descriptor.GetShape().GetDim(j) : 1);
} }


v_output_descs.push_back(tmp); v_output_descs.push_back(tmp);
@@ -294,14 +263,44 @@ vector<::tagCcAICPUTensor> ModelUtils::GetOutputDescs(ConstOpDescPtr op_desc) {
} }


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get input data address. /// @brief Get input data address.
/// @return vector<void*> /// @return vector<void*>
/// ///
vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc) {
vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc,
bool need_convert) {
vector<void *> v_input_data_addr; // init as:buf_base + op_def_->input(i)); vector<void *> v_input_data_addr; // init as:buf_base + op_def_->input(i));
GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_data_addr); GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_data_addr);
uint64_t session_id = model_param.session_id; uint64_t session_id = model_param.session_id;
uint8_t *mem_base = model_param.mem_base;
uint8_t *var_base = model_param.var_base;
uint8_t *weight_base = model_param.weight_base;
const uint64_t logic_mem_base = 0;
uint64_t logic_weight_base = 0;
uint64_t logic_var_base = model_param.logic_var_base;
uint64_t mem_size = model_param.mem_size;
uint64_t weight_size = model_param.weight_size;
uint64_t var_size = model_param.var_size;

if (need_convert) {
Status status = ConvertVirtualAddressToPhysical(mem_base, mem_size, mem_base);
if (status != SUCCESS) {
GELOGE(RT_FAILED, "Convert virtual address to physical for mem_base failed.");
return v_input_data_addr;
}

status = ConvertVirtualAddressToPhysical(weight_base, weight_size, weight_base);
if (status != SUCCESS) {
GELOGE(RT_FAILED, "Convert virtual address to physical for weight_base failed.");
return v_input_data_addr;
}

status = ConvertVirtualAddressToPhysical(var_base, var_size, var_base);
if (status != SUCCESS) {
GELOGE(RT_FAILED, "Convert virtual address to physical for var_base failed.");
return v_input_data_addr;
}
}


const size_t inputs_size = op_desc->GetInputsSize(); const size_t inputs_size = op_desc->GetInputsSize();
const vector<int64_t> v_input_offset = op_desc->GetInputOffset(); const vector<int64_t> v_input_offset = op_desc->GetInputOffset();
@@ -320,18 +319,13 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
for (size_t i = 0; i < inputs_size; ++i) { for (size_t i = 0; i < inputs_size; ++i) {
if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) { if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) {
// TBE: add weights address to input // TBE: add weights address to input
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i);
if (tensor_desc == nullptr) {
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}

GeTensorDesc tensor_desc = op_desc->GetInputDesc(i);
int64_t tensor_size = 0; int64_t tensor_size = 0;
GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size));
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
if (tensor_size) { if (tensor_size) {
int64_t data_offset = 0; int64_t data_offset = 0;
GE_CHK_STATUS(TensorUtils::GetDataOffset(*tensor_desc, data_offset));
uint8_t *weight_addr = model_param.weight_base + data_offset;
GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, data_offset));
uint8_t *weight_addr = static_cast<uint8_t *>(weight_base + data_offset - logic_weight_base);
v_input_data_addr.push_back(weight_addr); v_input_data_addr.push_back(weight_addr);
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[C] name[%s] input[%zu] memaddr[%p]", model_param.graph_id, GELOGI("[IMAS]GetInputDataAddrs graph_%u type[C] name[%s] input[%zu] memaddr[%p]", model_param.graph_id,
op_desc->GetName().c_str(), i, weight_addr); op_desc->GetName().c_str(), i, weight_addr);
@@ -346,13 +340,17 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co


int64_t input_offset = v_input_offset[non_const_index]; int64_t input_offset = v_input_offset[non_const_index];
non_const_index++; non_const_index++;
GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset),
uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base;
GE_IF_BOOL_EXEC(var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset),
uint8_t *variable_addr = var_base + input_offset - logic_var_base;
v_input_data_addr.push_back(variable_addr); v_input_data_addr.push_back(variable_addr);
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]",
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr);
continue);
continue;);


bool input_tensor = false;
GE_IF_BOOL_EXEC(TensorUtils::GetInputTensor(op_desc->GetOutputDesc(i), input_tensor) != GRAPH_SUCCESS,
GELOGW("get size from TensorDesc failed, op: %s, input index: %zu", op_desc->GetName().c_str(), i);
continue;);
// feature maps // feature maps
uint8_t *mem_addr = nullptr; uint8_t *mem_addr = nullptr;
// fusion // fusion
@@ -360,7 +358,7 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(input_offset)); mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(input_offset));
v_input_data_addr.push_back(mem_addr); v_input_data_addr.push_back(mem_addr);
} else { } else {
mem_addr = model_param.mem_base + input_offset;
mem_addr = static_cast<uint8_t *>(mem_base + input_offset - logic_mem_base);
v_input_data_addr.push_back(mem_addr); v_input_data_addr.push_back(mem_addr);
} }
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[F] name[%s] input[%zu] memaddr[%p]", model_param.graph_id, GELOGI("[IMAS]GetInputDataAddrs graph_%u type[F] name[%s] input[%zu] memaddr[%p]", model_param.graph_id,
@@ -371,20 +369,41 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
} }


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get output data address. /// @brief Get output data address.
/// @return vector<void*> /// @return vector<void*>
/// ///
vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc) {
vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc,
bool need_convert) {
vector<void *> v_output_data_addr; // init as:buf_base + op_def_->output(i) vector<void *> v_output_data_addr; // init as:buf_base + op_def_->output(i)
GE_CHECK_NOTNULL_EXEC(op_desc, return v_output_data_addr); GE_CHECK_NOTNULL_EXEC(op_desc, return v_output_data_addr);
uint64_t session_id = model_param.session_id; uint64_t session_id = model_param.session_id;
uint8_t *mem_base = model_param.mem_base;
uint8_t *var_base = model_param.var_base;
const uint64_t logic_mem_base = 0;
uint64_t logic_var_base = model_param.logic_var_base;
uint64_t mem_size = model_param.mem_size;
uint64_t var_size = model_param.var_size;

if (need_convert) {
Status status = ConvertVirtualAddressToPhysical(mem_base, mem_size, mem_base);
if (status != SUCCESS) {
GELOGE(RT_FAILED, "Convert virtual address to physical for mem_base failed.");
return v_output_data_addr;
}

status = ConvertVirtualAddressToPhysical(var_base, var_size, var_base);
if (status != SUCCESS) {
GELOGE(RT_FAILED, "Convert virtual address to physical for var_base failed.");
return v_output_data_addr;
}
}


const size_t outputs_size = op_desc->GetOutputsSize(); const size_t outputs_size = op_desc->GetOutputsSize();
const vector<int64_t> v_output_offset = op_desc->GetOutputOffset(); const vector<int64_t> v_output_offset = op_desc->GetOutputOffset();
GE_IF_BOOL_EXEC(v_output_offset.size() != outputs_size, GE_IF_BOOL_EXEC(v_output_offset.size() != outputs_size,
GELOGW("Output param invalid: output_offset=%zu, outputs=%zu.", v_output_offset.size(), outputs_size); GELOGW("Output param invalid: output_offset=%zu, outputs=%zu.", v_output_offset.size(), outputs_size);
return v_output_data_addr);
return v_output_data_addr;);
vector<int64_t> v_memory_type; vector<int64_t> v_memory_type;
bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, v_memory_type); bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, v_memory_type);
if (has_mem_type_attr && (v_memory_type.size() != outputs_size)) { if (has_mem_type_attr && (v_memory_type.size() != outputs_size)) {
@@ -394,12 +413,12 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C
return v_output_data_addr; return v_output_data_addr;
} }
for (size_t i = 0; i < outputs_size; ++i) { for (size_t i = 0; i < outputs_size; ++i) {
GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]),
uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base;
GE_IF_BOOL_EXEC(var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]),
uint8_t *variable_addr = static_cast<uint8_t *>(var_base + v_output_offset[i] - logic_var_base);
v_output_data_addr.push_back(variable_addr); v_output_data_addr.push_back(variable_addr);
GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]",
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr);
continue);
continue;);
// feature maps // feature maps
uint8_t *mem_addr = nullptr; uint8_t *mem_addr = nullptr;
// fusion // fusion
@@ -407,7 +426,7 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C
mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_output_offset[i])); mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_output_offset[i]));
v_output_data_addr.push_back(mem_addr); v_output_data_addr.push_back(mem_addr);
} else { } else {
mem_addr = static_cast<uint8_t *>(model_param.mem_base + v_output_offset[i]);
mem_addr = static_cast<uint8_t *>(mem_base + v_output_offset[i] - logic_mem_base);
v_output_data_addr.push_back(mem_addr); v_output_data_addr.push_back(mem_addr);
} }
GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[F] name[%s] output[%zu] memaddr[%p]", model_param.graph_id, GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[F] name[%s] output[%zu] memaddr[%p]", model_param.graph_id,
@@ -417,13 +436,24 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C
} }


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get workspace data address. /// @brief Get workspace data address.
/// @return vector<void*> /// @return vector<void*>
/// ///
vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc) {
vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc,
bool need_convert) {
vector<void *> v_workspace_data_addr; vector<void *> v_workspace_data_addr;
GE_CHECK_NOTNULL_EXEC(op_desc, return v_workspace_data_addr); GE_CHECK_NOTNULL_EXEC(op_desc, return v_workspace_data_addr);
uint8_t *mem_base = model_param.mem_base;
uint64_t mem_size = model_param.mem_size;

if (need_convert) {
Status status = ConvertVirtualAddressToPhysical(mem_base, mem_size, mem_base);
if (status != SUCCESS) {
GELOGE(RT_FAILED, "Convert virtual address to physical for mem_base failed.");
return v_workspace_data_addr;
}
}


const vector<int64_t> v_workspace_offset = op_desc->GetWorkspace(); const vector<int64_t> v_workspace_offset = op_desc->GetWorkspace();
const vector<int64_t> v_workspace_bytes = op_desc->GetWorkspaceBytes(); const vector<int64_t> v_workspace_bytes = op_desc->GetWorkspaceBytes();
@@ -436,13 +466,13 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param
bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, v_memory_type); bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, v_memory_type);
for (size_t i = 0; i < v_workspace_bytes.size(); ++i) { for (size_t i = 0; i < v_workspace_bytes.size(); ++i) {
if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) {
v_workspace_data_addr.push_back(reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_workspace_offset[i])));
v_workspace_data_addr.push_back(reinterpret_cast<uint8_t *>(v_workspace_offset[i]));
GELOGI("Fusion: op: %s, GetWorkspaceDataAddrs mem_addr[workspace index %zu]:%p", op_desc->GetName().c_str(), i, GELOGI("Fusion: op: %s, GetWorkspaceDataAddrs mem_addr[workspace index %zu]:%p", op_desc->GetName().c_str(), i,
reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_workspace_offset[i]))); reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_workspace_offset[i])));
} else { } else {
int64_t workspace_offset = v_workspace_offset[i]; int64_t workspace_offset = v_workspace_offset[i];
int64_t workspace_bytes = v_workspace_bytes[i]; int64_t workspace_bytes = v_workspace_bytes[i];
uint8_t *mem_addr = workspace_bytes == 0 ? nullptr : model_param.mem_base + workspace_offset;
uint8_t *mem_addr = workspace_bytes == 0 ? nullptr : mem_base + workspace_offset;
v_workspace_data_addr.push_back(mem_addr); v_workspace_data_addr.push_back(mem_addr);
GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]", GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]",
model_param.graph_id, op_desc->GetName().c_str(), i, workspace_offset, workspace_bytes, mem_addr); model_param.graph_id, op_desc->GetName().c_str(), i, workspace_offset, workspace_bytes, mem_addr);
@@ -452,32 +482,21 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param
return v_workspace_data_addr; return v_workspace_data_addr;
} }


///
/// @ingroup ge
/// @brief Get runtime memory address.
/// @return Status
///
Status ModelUtils::GetRtAddress(const RuntimeParam &param, uintptr_t logic_addr, uint8_t *&mem_addr) {
uint8_t *runtime_base_addr = nullptr;
if ((param.logic_mem_base <= logic_addr) && (logic_addr < param.logic_mem_base + param.mem_size)) {
runtime_base_addr = param.mem_base - param.logic_mem_base;
GELOGI("The logic addr:0x%lx is data address, base:0x%lx, size:%lu", logic_addr, param.logic_mem_base,
param.mem_size);
} else if ((param.logic_weight_base <= logic_addr) && (logic_addr < param.logic_weight_base + param.weight_size)) {
runtime_base_addr = param.weight_base - param.logic_weight_base;
GELOGI("The logic addr:0x%lx is weight address, base:0x%lx, size:%lu", logic_addr, param.logic_weight_base,
param.weight_size);
} else if ((param.logic_var_base <= logic_addr) && (logic_addr < param.logic_var_base + param.var_size)) {
runtime_base_addr = param.var_base - param.logic_var_base;
GELOGI("The logic addr:0x%lx is variable address, base:0x%lx, size:%lu", logic_addr, param.logic_var_base,
param.var_size);
} else if (logic_addr != 0) {
mem_addr = nullptr;
GELOGE(PARAM_INVALID, "The logic addr:0x%lx is abnormal", logic_addr);
return PARAM_INVALID;
Status ModelUtils::ConvertVirtualAddressToPhysical(uint8_t *virtual_address, uint64_t size,
uint8_t *&physical_address) {
// Indicates whether use physical address.
const char *use_physical_address = std::getenv("GE_USE_PHYSICAL_ADDRESS");
if (use_physical_address == nullptr || virtual_address == 0 || size == 0) {
return SUCCESS;
}

rtError_t ret = rtKernelConfigTransArg(virtual_address, size, 0, reinterpret_cast<void **>(&physical_address));
if (ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtKernelConfigTransArg failed, ret: 0x%X", ret);
return RT_FAILED;
} }


mem_addr = runtime_base_addr + logic_addr;
GELOGD("virtual_address=%p, physical_address=%p", virtual_address, physical_address);
return SUCCESS; return SUCCESS;
} }
} // namespace ge } // namespace ge

+ 18
- 19
src/ge/graph/load/new_model_manager/model_utils.h View File

@@ -34,79 +34,78 @@ class ModelUtils {
~ModelUtils() = default; ~ModelUtils() = default;


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get input size. /// @brief Get input size.
/// @return vector<uint32_t> /// @return vector<uint32_t>
/// ///
static vector<int64_t> GetInputSize(ConstOpDescPtr op_desc); static vector<int64_t> GetInputSize(ConstOpDescPtr op_desc);


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get output size. /// @brief Get output size.
/// @return vector<uint32_t> /// @return vector<uint32_t>
/// ///
static vector<int64_t> GetOutputSize(ConstOpDescPtr op_desc); static vector<int64_t> GetOutputSize(ConstOpDescPtr op_desc);


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get workspace size. /// @brief Get workspace size.
/// @return vector<uint32_t> /// @return vector<uint32_t>
/// ///
static vector<int64_t> GetWorkspaceSize(ConstOpDescPtr op_desc); static vector<int64_t> GetWorkspaceSize(ConstOpDescPtr op_desc);


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get weight size. /// @brief Get weight size.
/// @return vector<uint32_t> /// @return vector<uint32_t>
/// ///
static vector<int64_t> GetWeightSize(ConstOpDescPtr op_desc); static vector<int64_t> GetWeightSize(ConstOpDescPtr op_desc);


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get weights. /// @brief Get weights.
/// @return vector<ConstGeTensorPtr> /// @return vector<ConstGeTensorPtr>
/// ///
static vector<ConstGeTensorPtr> GetWeights(ConstOpDescPtr op_desc); static vector<ConstGeTensorPtr> GetWeights(ConstOpDescPtr op_desc);


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get AiCpuOp Input descriptor. /// @brief Get AiCpuOp Input descriptor.
/// @return vector<::tagCcAICPUTensor> /// @return vector<::tagCcAICPUTensor>
/// ///
static vector<::tagCcAICPUTensor> GetInputDescs(ConstOpDescPtr op_desc); static vector<::tagCcAICPUTensor> GetInputDescs(ConstOpDescPtr op_desc);
/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get AiCpuOp Output descriptor. /// @brief Get AiCpuOp Output descriptor.
/// @return vector<::tagCcAICPUTensor> /// @return vector<::tagCcAICPUTensor>
/// ///
static vector<::tagCcAICPUTensor> GetOutputDescs(ConstOpDescPtr op_desc); static vector<::tagCcAICPUTensor> GetOutputDescs(ConstOpDescPtr op_desc);


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get input data address. /// @brief Get input data address.
/// @return vector<void*> /// @return vector<void*>
/// ///
static vector<void *> GetInputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc);
static vector<void *> GetInputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc,
bool need_convert = true);
/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get output data address. /// @brief Get output data address.
/// @return vector<void*> /// @return vector<void*>
/// ///
static vector<void *> GetOutputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc);
static vector<void *> GetOutputDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc,
bool need_convert = true);


/// ///
/// @ingroup ge
/// @ingroup domi_ome
/// @brief Get workspace data address. /// @brief Get workspace data address.
/// @return vector<void*> /// @return vector<void*>
/// ///
static vector<void *> GetWorkspaceDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc);
static vector<void *> GetWorkspaceDataAddrs(const RuntimeParam &model_param, ConstOpDescPtr op_desc,
bool need_convert = true);


///
/// @ingroup ge
/// @brief Get memory runtime base.
/// @return Status
///
static Status GetRtAddress(const RuntimeParam &model_param, uintptr_t logic_addr, uint8_t *&mem_addr);
static ge::Status ConvertVirtualAddressToPhysical(uint8_t *virtual_address, uint64_t size,
uint8_t *&physical_address);
}; };
} // namespace ge } // namespace ge




+ 2
- 1
src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc View File

@@ -45,7 +45,7 @@ Status EndGraphTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
Status EndGraphTaskInfo::Distribute() { Status EndGraphTaskInfo::Distribute() {
GELOGI("EndGraphTaskInfo Distribute Start."); GELOGI("EndGraphTaskInfo Distribute Start.");
GE_CHECK_NOTNULL(davinci_model_); GE_CHECK_NOTNULL(davinci_model_);
auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel();
auto all_dump_model = PropertiesManager::Instance().GetAllDumpModel();
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() || all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() ||
all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) { all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) {
@@ -80,4 +80,5 @@ Status EndGraphTaskInfo::Distribute() {
} }


REGISTER_TASK_INFO(RT_MODEL_TASK_MODEL_END_GRAPH, EndGraphTaskInfo); REGISTER_TASK_INFO(RT_MODEL_TASK_MODEL_END_GRAPH, EndGraphTaskInfo);

} // namespace ge } // namespace ge

+ 5
- 5
src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h View File

@@ -22,7 +22,7 @@
namespace ge { namespace ge {
class EndGraphTaskInfo : public TaskInfo { class EndGraphTaskInfo : public TaskInfo {
public: public:
EndGraphTaskInfo() {}
EndGraphTaskInfo() : model_(0) {}


~EndGraphTaskInfo() override { model_ = nullptr; } ~EndGraphTaskInfo() override { model_ = nullptr; }


@@ -35,10 +35,10 @@ class EndGraphTaskInfo : public TaskInfo {
uint32_t GetStreamId() override { return stream_id_; } uint32_t GetStreamId() override { return stream_id_; }


private: private:
rtModel_t model_{nullptr};
DavinciModel *davinci_model_{nullptr};
uint32_t task_id_{0};
uint32_t stream_id_{0};
rtModel_t model_;
DavinciModel *davinci_model_;
uint32_t task_id_;
uint32_t stream_id_;
}; };
} // namespace ge } // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_

+ 36
- 89
src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc View File

@@ -42,7 +42,6 @@ HcclTaskInfo::~HcclTaskInfo() {
davinci_model_ = nullptr; davinci_model_ = nullptr;
ops_kernel_store_ = nullptr; ops_kernel_store_ = nullptr;
max_node_of_hccl_stream_ = 0; max_node_of_hccl_stream_ = 0;
args_ = nullptr;
} }
Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GELOGI("HcclTaskInfo Init Start."); GELOGI("HcclTaskInfo Init Start.");
@@ -61,61 +60,54 @@ Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_m
GELOGI("HcclTaskInfo Init, op_index is: %u", op_index); GELOGI("HcclTaskInfo Init, op_index is: %u", op_index);


// Get HCCL op // Get HCCL op
op_desc_ = davinci_model->GetOpByIndex(op_index);
GE_CHECK_NOTNULL(op_desc_);
OpDescPtr op_desc = davinci_model->GetOpByIndex(op_index);
GE_CHECK_NOTNULL(op_desc);


// Create the kernel hccl infos // Create the kernel hccl infos
CreateKernelHcclInfo(op_desc_);
CreateKernelHcclInfo(op_desc);


// Initialize the hccl_type of all kernel hccl info // Initialize the hccl_type of all kernel hccl info
HcomOmeUtil::GetHcclType(task_def, kernel_hccl_infos_); HcomOmeUtil::GetHcclType(task_def, kernel_hccl_infos_);


// Only in Horovod scenario should get the inputName and GeShape // Only in Horovod scenario should get the inputName and GeShape
ret = HcomOmeUtil::GetHorovodInputs(op_desc_, kernel_hccl_infos_);
ret = HcomOmeUtil::GetHorovodInputs(op_desc, kernel_hccl_infos_);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(FAILED, "davinci_model: GetHorovodInputs fail! domi error: %u", ret); GELOGE(FAILED, "davinci_model: GetHorovodInputs fail! domi error: %u", ret);
return FAILED; return FAILED;
} }
Status dmrt = HcomOmeUtil::GetHcclDataType(op_desc_, kernel_hccl_infos_);
Status dmrt = HcomOmeUtil::GetHcclDataType(op_desc, kernel_hccl_infos_);
if (dmrt != SUCCESS) { if (dmrt != SUCCESS) {
GELOGE(FAILED, "davinci_model: GetHcomDataType fail! domi error: %u", dmrt); GELOGE(FAILED, "davinci_model: GetHcomDataType fail! domi error: %u", dmrt);
return FAILED; return FAILED;
} }
dmrt = HcomOmeUtil::GetHcclCount(op_desc_, kernel_hccl_infos_);
dmrt = HcomOmeUtil::GetHcclCount(op_desc, kernel_hccl_infos_);
if (dmrt != SUCCESS) { if (dmrt != SUCCESS) {
GELOGE(FAILED, "davinci_model: GetHcomCount fail! domi error: %u", dmrt); GELOGE(FAILED, "davinci_model: GetHcomCount fail! domi error: %u", dmrt);
return FAILED; return FAILED;
} }
// Only HCOMBROADCAST and HVDCALLBACKBROADCAST need to get the rootId // Only HCOMBROADCAST and HVDCALLBACKBROADCAST need to get the rootId
dmrt = HcomOmeUtil::GetAllRootId(op_desc_, kernel_hccl_infos_);
dmrt = HcomOmeUtil::GetAllRootId(op_desc, kernel_hccl_infos_);
if (dmrt != SUCCESS) { if (dmrt != SUCCESS) {
GELOGE(FAILED, "davinci_model: Get rootId fail! domi error: %u", dmrt); GELOGE(FAILED, "davinci_model: Get rootId fail! domi error: %u", dmrt);
return FAILED; return FAILED;
} }

// GE's new process: hccl declares the number of streams required, creates a stream by GE, and sends it to hccl
ret = SetFollowStream(op_desc_, davinci_model);
if (ret != SUCCESS) {
GELOGE(ret, "SetStream Fail.");
return ret;
}

if (davinci_model_->IsKnownNode()) {
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_);
}

ret = SetAddrs(op_desc_, kernel_hccl_infos_);
ret = SetAddrs(op_desc, kernel_hccl_infos_);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "Setaddrs Fail."); GELOGE(ret, "Setaddrs Fail.");
return ret; return ret;
} }
// GE's new process: hccl declares the need for Workspace size, and GE allocates Workspace // GE's new process: hccl declares the need for Workspace size, and GE allocates Workspace
ret = SetWorkspace(op_desc_, kernel_hccl_infos_);
ret = SetWorkspace(op_desc, kernel_hccl_infos_);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "SetWorkspace Fail."); GELOGE(ret, "SetWorkspace Fail.");
return ret; return ret;
} }
// GE's new process: hccl declares the number of streams required, creates a stream by GE, and sends it to hccl
ret = SetFollowStream(op_desc, davinci_model);
if (ret != SUCCESS) {
GELOGE(ret, "SetStream Fail.");
return ret;
}


GELOGI("HcclTaskInfo Init Success"); GELOGI("HcclTaskInfo Init Success");
return SUCCESS; return SUCCESS;
@@ -217,83 +209,40 @@ Status HcclTaskInfo::Distribute() {
GELOGI("HcclTaskInfo Distribute Success."); GELOGI("HcclTaskInfo Distribute Success.");
return SUCCESS; return SUCCESS;
} }

Status HcclTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GE_CHECK_NOTNULL(davinci_model);
auto hccl_def = task_def.kernel_hccl();
uint32_t op_index = hccl_def.op_index();
GELOGI("HcclTaskInfo Init, op_index is: %u", op_index);
// Get HCCL op
auto op_desc = davinci_model->GetOpByIndex(op_index);
GE_CHECK_NOTNULL(op_desc);
GELOGI("Calc opType[%s] args size. Node name is [%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str());
// Only need the number of addr to allocate args memory
auto input_size = op_desc->GetInputsSize();
auto output_size = op_desc->GetOutputsSize();
auto workspace_size = op_desc->GetWorkspaceBytes().size();
uint32_t args_size = sizeof(void *) * (input_size + output_size + workspace_size);
args_offset_ = davinci_model->GetTotalArgsSize();
davinci_model->SetTotalArgsSize(args_size);
GELOGI("Calculate hccl task args , args_size %u, args_offset %u", args_size, args_offset_);
return SUCCESS;
}

Status HcclTaskInfo::UpdateArgs() {
GELOGI("HcclTaskInfo::UpdateArgs in.");
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
input_data_addrs_ = ModelUtils::GetInputDataAddrs(rts_param, op_desc_);
output_data_addrs_ = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_);
workspace_data_addrs_ = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);

vector<void *> io_addrs;
io_addrs.insert(io_addrs.end(), input_data_addrs_.begin(), input_data_addrs_.end());
io_addrs.insert(io_addrs.end(), output_data_addrs_.begin(), output_data_addrs_.end());
io_addrs.insert(io_addrs.end(), workspace_data_addrs_.begin(), workspace_data_addrs_.end());

GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_),
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str());

GELOGI("HcclTaskInfo::UpdateArgs success.");
return SUCCESS;
}

Status HcclTaskInfo::SetAddrs(const std::shared_ptr<OpDesc> &op_desc, Status HcclTaskInfo::SetAddrs(const std::shared_ptr<OpDesc> &op_desc,
std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos) { std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos) {
GE_CHECK_NOTNULL(op_desc); GE_CHECK_NOTNULL(op_desc);
GE_CHK_STATUS_RET(HcomOmeUtil::CheckKernelHcclInfo(op_desc, kernel_hccl_infos),
"HcomOmeUtil:: the number of GETaskKernelHcclInfo is invalid.");
if (HcomOmeUtil::CheckKernelHcclInfo(op_desc, kernel_hccl_infos) != SUCCESS) {
GELOGE(PARAM_INVALID, "HcomOmeUtil:: the number of GETaskKernelHcclInfo is invalid.");
return PARAM_INVALID;
}
GELOGI("Set hccl task input output address, node[%s}, type[%s] kernel_hccl_infos.size[%zu].", GELOGI("Set hccl task input output address, node[%s}, type[%s] kernel_hccl_infos.size[%zu].",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), kernel_hccl_infos.size()); op_desc->GetName().c_str(), op_desc->GetType().c_str(), kernel_hccl_infos.size());
if (op_desc->GetType() == HVDWAIT) { if (op_desc->GetType() == HVDWAIT) {
return SUCCESS; return SUCCESS;
} }
domi::Status dmrt;
hcclRedOp_t op_type = HCCL_REP_OP_SUM; hcclRedOp_t op_type = HCCL_REP_OP_SUM;
GE_CHECK_NOTNULL(davinci_model_); GE_CHECK_NOTNULL(davinci_model_);
GELOGI("Calc opType[%s] input address before. Node name[%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str()); GELOGI("Calc opType[%s] input address before. Node name[%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str());
if (!davinci_model_->IsKnownNode()) {
input_data_addrs_ = ModelUtils::GetInputDataAddrs(davinci_model_->GetRuntimeParam(), op_desc);
output_data_addrs_ = ModelUtils::GetOutputDataAddrs(davinci_model_->GetRuntimeParam(), op_desc);
}
void *input_data_addr = nullptr;
void *output_data_addr = nullptr;
auto input_data_addr_list = ModelUtils::GetInputDataAddrs(davinci_model_->GetRuntimeParam(), op_desc);

auto output_data_addr_list = ModelUtils::GetOutputDataAddrs(davinci_model_->GetRuntimeParam(), op_desc);
// initialize every kernel_hccl_info inputDataAddr // initialize every kernel_hccl_info inputDataAddr
for (size_t i = 0; i < kernel_hccl_infos.size(); i++) { for (size_t i = 0; i < kernel_hccl_infos.size(); i++) {
std::string hccl_type = kernel_hccl_infos[i].hccl_type; std::string hccl_type = kernel_hccl_infos[i].hccl_type;
if (davinci_model_->IsKnownNode()) {
input_data_addr = reinterpret_cast<void *>(reinterpret_cast<uint64_t *>(args_) + i);
output_data_addr = reinterpret_cast<void *>(reinterpret_cast<uint64_t *>(args_) + op_desc->GetInputsSize() + i);
GELOGI("Hccl task info known input addr %p, output addr %p.", input_data_addr, output_data_addr);
} else {
input_data_addr = input_data_addrs_.empty() ? nullptr : input_data_addrs_[i];
output_data_addr = output_data_addrs_.empty() ? nullptr : output_data_addrs_[i];
}
void *input_data_addr = input_data_addr_list.empty() ? nullptr : input_data_addr_list[i];
kernel_hccl_infos[i].inputDataAddr = input_data_addr; kernel_hccl_infos[i].inputDataAddr = input_data_addr;

void *output_data_addr = output_data_addr_list.empty() ? nullptr : output_data_addr_list[i];
if (hccl_type == HCOMALLGATHER || hccl_type == HCOMRECEIVE || hccl_type == HVDCALLBACKALLGATHER) { if (hccl_type == HCOMALLGATHER || hccl_type == HCOMRECEIVE || hccl_type == HVDCALLBACKALLGATHER) {
kernel_hccl_infos[i].outputDataAddr = output_data_addr; kernel_hccl_infos[i].outputDataAddr = output_data_addr;
} else if (hccl_type == HCOMALLREDUCE || hccl_type == HCOMREDUCESCATTER || hccl_type == HVDCALLBACKALLREDUCE) { } else if (hccl_type == HCOMALLREDUCE || hccl_type == HCOMREDUCESCATTER || hccl_type == HVDCALLBACKALLREDUCE) {
GE_CHK_STATUS_RET(HcomOmeUtil::GetHcclOperationType(op_desc, op_type),
"davinci_model: GetHcomOperationType fail!");
dmrt = HcomOmeUtil::GetHcclOperationType(op_desc, op_type);
if (dmrt != SUCCESS) {
GELOGE(FAILED, "davinci_model: GetHcomOperationType fail! domi error: %u", dmrt);
return FAILED;
}
kernel_hccl_infos[i].outputDataAddr = output_data_addr; kernel_hccl_infos[i].outputDataAddr = output_data_addr;
kernel_hccl_infos[i].opType = op_type; kernel_hccl_infos[i].opType = op_type;
} }
@@ -361,7 +310,6 @@ void HcclTaskInfo::CreateKernelHcclInfo(const ge::ConstOpDescPtr &op_desc) {
Status HcclTaskInfo::SetWorkspace(const std::shared_ptr<OpDesc> &op_desc, Status HcclTaskInfo::SetWorkspace(const std::shared_ptr<OpDesc> &op_desc,
std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos) { std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos) {
GE_CHECK_NOTNULL(op_desc); GE_CHECK_NOTNULL(op_desc);
GE_CHECK_NOTNULL(davinci_model_);
GELOGI("SetWorkspace Node[%s] opType[%s] set workspace.", op_desc->GetName().c_str(), op_desc->GetType().c_str()); GELOGI("SetWorkspace Node[%s] opType[%s] set workspace.", op_desc->GetName().c_str(), op_desc->GetType().c_str());
uint64_t workspace_mem_size = 0; uint64_t workspace_mem_size = 0;
void *workspace_addr = nullptr; void *workspace_addr = nullptr;
@@ -371,12 +319,11 @@ Status HcclTaskInfo::SetWorkspace(const std::shared_ptr<OpDesc> &op_desc,
GELOGI("hccl need workSpaceMemSize=%lu", workspace_mem_size_tmp); GELOGI("hccl need workSpaceMemSize=%lu", workspace_mem_size_tmp);
if (workspace_mem_size_tmp != 0) { if (workspace_mem_size_tmp != 0) {
workspace_mem_size = workspace_mem_size_tmp; workspace_mem_size = workspace_mem_size_tmp;
if (davinci_model_->IsKnownNode()) {
workspace_addr = reinterpret_cast<void *>(reinterpret_cast<uint64_t *>(args_) + op_desc->GetInputsSize() +
op_desc->GetOutputsSize());
} else {
workspace_data_addrs_ = ModelUtils::GetWorkspaceDataAddrs(davinci_model_->GetRuntimeParam(), op_desc);
workspace_addr = workspace_data_addrs_.empty() ? nullptr : workspace_data_addrs_[0];
vector<void *> workspace_data_addrs =
ModelUtils::GetWorkspaceDataAddrs(davinci_model_->GetRuntimeParam(), op_desc);
if (!workspace_data_addrs.empty()) {
GELOGI("Get workSpaceAddr");
workspace_addr = workspace_data_addrs[0];
} }
} }
} }


+ 1
- 14
src/ge/graph/load/new_model_manager/task_info/hccl_task_info.h View File

@@ -34,10 +34,7 @@ class HcclTaskInfo : public TaskInfo {
hccl_stream_list_(), hccl_stream_list_(),
ops_kernel_store_(nullptr), ops_kernel_store_(nullptr),
private_def_(nullptr), private_def_(nullptr),
private_def_len_(0),
op_desc_(nullptr),
args_(nullptr),
args_offset_(0) {}
private_def_len_(0) {}


~HcclTaskInfo() override; ~HcclTaskInfo() override;


@@ -47,10 +44,6 @@ class HcclTaskInfo : public TaskInfo {


uint32_t GetTaskID() override { return id_; } uint32_t GetTaskID() override { return id_; }


Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;

Status UpdateArgs() override;

private: private:
ge::Status SetAddrs(const std::string &hccl_type, const std::shared_ptr<OpDesc> &op); ge::Status SetAddrs(const std::string &hccl_type, const std::shared_ptr<OpDesc> &op);


@@ -79,12 +72,6 @@ class HcclTaskInfo : public TaskInfo {
static std::mutex hccl_follow_stream_mutex_; static std::mutex hccl_follow_stream_mutex_;
static uint32_t max_node_of_hccl_stream_; static uint32_t max_node_of_hccl_stream_;
vector<GETaskKernelHcclInfo> kernel_hccl_infos_; vector<GETaskKernelHcclInfo> kernel_hccl_infos_;
vector<void *> input_data_addrs_;
vector<void *> output_data_addrs_;
vector<void *> workspace_data_addrs_;
OpDescPtr op_desc_;
void *args_;
uint32_t args_offset_;
}; };
} // namespace ge } // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_HCCL_TASK_INFO_H_ #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_HCCL_TASK_INFO_H_

+ 20
- 54
src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc View File

@@ -79,9 +79,6 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
return FAILED;) return FAILED;)
} }


GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc_->GetName().c_str(),
op_desc_->GetType().c_str(), ext_info.size(), ext_info_addr_);

// 2.1 get loop cond variable for tensor array write // 2.1 get loop cond variable for tensor array write
uint64_t step_id_addr = 0; uint64_t step_id_addr = 0;
OpDescPtr step_id_node = davinci_model_->GetVariableOp(NODE_NAME_GLOBAL_STEP); OpDescPtr step_id_node = davinci_model_->GetVariableOp(NODE_NAME_GLOBAL_STEP);
@@ -100,11 +97,6 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(), kernel_id) != SUCCESS, GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(), kernel_id) != SUCCESS,
GELOGE(FAILED, "CreateAicpuKernel error."); GELOGE(FAILED, "CreateAicpuKernel error.");
return FAILED;) return FAILED;)
// 2.3 Create session
GE_CHECK_NOTNULL(ModelManager::GetInstance());
GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuSession(session_id) != SUCCESS,
GELOGE(FAILED, "CreateAicpuSession error. session id: %lu", session_id);
return FAILED;)


kernel_buf_size_ = sizeof(STR_FWK_OP_KERNEL); kernel_buf_size_ = sizeof(STR_FWK_OP_KERNEL);
if (davinci_model_->IsKnownNode()) { if (davinci_model_->IsKnownNode()) {
@@ -161,8 +153,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
return FAILED;) return FAILED;)


if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc->GetName())) {
if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc->GetName())) {
dump_flag_ = RT_KERNEL_DUMPFLAG; dump_flag_ = RT_KERNEL_DUMPFLAG;
dump_args_ = input_output_addr_; dump_args_ = input_output_addr_;
} }
@@ -175,7 +167,12 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = ext_info.size(); fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = ext_info.size();
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_addr_); fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_addr_);


// 4. Return result
// 4. Create session
GE_CHECK_NOTNULL(ModelManager::GetInstance());
GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuSession(session_id) != SUCCESS,
GELOGE(FAILED, "CreateAicpuSession error. session id: %lu", session_id);
return FAILED;)
// 5. Return result
rtError_t rt_ret = rtMalloc(&kernel_buf_, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); rtError_t rt_ret = rtMalloc(&kernel_buf_, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;) GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;)


@@ -183,7 +180,12 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;)


davinci_model_->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), input_output_addr_, addrs_size, 0);
vector<void *> virtual_io_addrs; // use virtual address for zero copy key.
const vector<void *> virtual_in_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc, false);
const vector<void *> virtual_out_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc, false);
virtual_io_addrs.insert(virtual_io_addrs.end(), virtual_in_addrs.begin(), virtual_in_addrs.end());
virtual_io_addrs.insert(virtual_io_addrs.end(), virtual_out_addrs.begin(), virtual_out_addrs.end());
davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, io_addrs.data(), input_output_addr_, addrs_size, 0);


GELOGI("KernelExTaskInfo Init Success. session id: %lu", session_id); GELOGI("KernelExTaskInfo Init Success. session id: %lu", session_id);
return SUCCESS; return SUCCESS;
@@ -205,55 +207,19 @@ Status KernelExTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciMod
uint32_t mem_size = sizeof(uint64_t) * mem_length; uint32_t mem_size = sizeof(uint64_t) * mem_length;
davinci_model->SetTotalArgsSize(mem_size); davinci_model->SetTotalArgsSize(mem_size);
GELOGI("kernel task name %s, args_size %u, args_offset %u", op_desc->GetName().c_str(), mem_size, args_offset_); GELOGI("kernel task name %s, args_size %u, args_offset %u", op_desc->GetName().c_str(), mem_size, args_offset_);

// alloc fixed addr
string peer_input_name;
if (AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) {
uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name);
if (output_index > outputs_size) {
GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", outputs_size, output_index);
return FAILED;
}
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name);
auto tensor_desc = op_desc->GetOutputDesc(output_index);
int64_t tensor_size = 0;
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size);
GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size,
fixed_addr_offset_);
}
return SUCCESS; return SUCCESS;
} }


Status KernelExTaskInfo::UpdateArgs() { Status KernelExTaskInfo::UpdateArgs() {
GELOGI("KernelExTaskInfo::UpdateArgs in."); GELOGI("KernelExTaskInfo::UpdateArgs in.");
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
vector<void *> io_addrs;
vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_); vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_);
vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_); vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_);
vector<void *> io_addrs;
if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) {
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
} else {
string peer_input_name;
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) {
uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name);
if (output_index > output_data_addrs.size()) {
GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.",
output_data_addrs.size(), output_index);
return FAILED;
}
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
for (size_t i = 0; i < output_data_addrs.size(); ++i) {
if (i == output_index) {
void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_);
io_addrs.emplace_back(fixed_addr);
continue;
}
io_addrs.emplace_back(output_data_addrs[i]);
}
}
}

io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());

GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_),
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str()); "update known node %s zero copy addr failed.", op_desc_->GetName().c_str());


@@ -265,7 +231,7 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const
const OpDescPtr &op_desc) { const OpDescPtr &op_desc) {
// Userspace copy need virtual address. // Userspace copy need virtual address.
const vector<int64_t> workspace_data_sizes = ModelUtils::GetWorkspaceSize(op_desc); const vector<int64_t> workspace_data_sizes = ModelUtils::GetWorkspaceSize(op_desc);
const vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc);
const vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc, false);
if (workspace_data_addrs.empty() || workspace_data_sizes.empty()) { if (workspace_data_addrs.empty() || workspace_data_sizes.empty()) {
GELOGE(FAILED, "Node:%s invalid workspace, addrs is %zu, size is %zu.", op_desc->GetName().c_str(), GELOGE(FAILED, "Node:%s invalid workspace, addrs is %zu, size is %zu.", op_desc->GetName().c_str(),
workspace_data_addrs.size(), workspace_data_sizes.size()); workspace_data_addrs.size(), workspace_data_sizes.size());


+ 0
- 2
src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h View File

@@ -54,7 +54,6 @@ class KernelExTaskInfo : public TaskInfo {
auto ret = reinterpret_cast<uintptr_t>(dump_args_); auto ret = reinterpret_cast<uintptr_t>(dump_args_);
return ret; return ret;
} }
bool CallSaveDumpInfo() override { return true; };


private: private:
Status CopyTaskInfo(const domi::KernelExDef &kernel_def, const RuntimeParam &rts_param, const OpDescPtr &op_desc); Status CopyTaskInfo(const domi::KernelExDef &kernel_def, const RuntimeParam &rts_param, const OpDescPtr &op_desc);
@@ -70,7 +69,6 @@ class KernelExTaskInfo : public TaskInfo {
void *dump_args_; void *dump_args_;
OpDescPtr op_desc_ = nullptr; OpDescPtr op_desc_ = nullptr;
uint32_t args_offset_ = 0; uint32_t args_offset_ = 0;
int64_t fixed_addr_offset_ = 0;
}; };
} // namespace ge } // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_

+ 34
- 103
src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc View File

@@ -47,16 +47,16 @@ const uint32_t kAddrLen = sizeof(void *);


namespace ge { namespace ge {
KernelTaskInfo::SuperKernelTaskInfo KernelTaskInfo::skt_info_ = { KernelTaskInfo::SuperKernelTaskInfo KernelTaskInfo::skt_info_ = {
0, 0, 0, 0, nullptr, nullptr, {}, {}, {}, {}, {}, RT_KERNEL_DEFAULT, kInvalidGroupKey, 0, nullptr};
0, 0, 0, 0, nullptr, nullptr, {}, {}, RT_KERNEL_DEFAULT, kInvalidGroupKey, 0, nullptr};


Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
if (davinci_model == nullptr) { if (davinci_model == nullptr) {
GELOGE(PARAM_INVALID, "davinci model is null!");
GELOGE(PARAM_INVALID, "davinci_model is null!");
return PARAM_INVALID; return PARAM_INVALID;
} }
davinci_model_ = davinci_model; davinci_model_ = davinci_model;
is_l1_fusion_enable_ = davinci_model_->GetL1FusionEnableOption(); is_l1_fusion_enable_ = davinci_model_->GetL1FusionEnableOption();
GELOGD("KernelTaskInfo init start, ge.enableL1Fusion in davinci model is %d.", is_l1_fusion_enable_);
GELOGD("KernelTaskInfo Init Start, ge.enableL1Fusion in davinci model is %d.", is_l1_fusion_enable_);


Status ret = SetStream(task_def.stream_id(), davinci_model_->GetStreamList()); Status ret = SetStream(task_def.stream_id(), davinci_model_->GetStreamList());
if (ret != SUCCESS) { if (ret != SUCCESS) {
@@ -73,7 +73,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
// get opdesc // get opdesc
op_desc_ = davinci_model_->GetOpByIndex(context.op_index()); op_desc_ = davinci_model_->GetOpByIndex(context.op_index());
if (op_desc_ == nullptr) { if (op_desc_ == nullptr) {
GELOGE(INTERNAL_ERROR, "Get op desc failed, index is out of range!");
GELOGE(INTERNAL_ERROR, "Get op_desc failed, index is out of range!");
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
(void)AttrUtils::GetBool(*op_desc_, ATTR_N_BATCH_SPILT, is_n_batch_spilt_); (void)AttrUtils::GetBool(*op_desc_, ATTR_N_BATCH_SPILT, is_n_batch_spilt_);
@@ -138,21 +138,14 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
ret = InitCceTask(kernel_def); ret = InitCceTask(kernel_def);
} }


GELOGD("KernelTaskInfo init finish, result=%u.", ret);
GELOGD("KernelTaskInfo Init finish, result=%u.", ret);
return ret; return ret;
} }


Status KernelTaskInfo::SaveSKTDumpInfo() { Status KernelTaskInfo::SaveSKTDumpInfo() {
GE_CHECK_NOTNULL(davinci_model_); GE_CHECK_NOTNULL(davinci_model_);
if (skt_dump_flag_ == RT_KERNEL_DEFAULT) {
GELOGD("no need save skt dump info");
return SUCCESS;
}
// all op in super kernel share one taskid and streamid
for (size_t i = 0; i < skt_info_.op_desc_list.size(); i++) {
davinci_model_->SaveDumpTask(skt_info_.last_task_id, skt_info_.last_stream_id, skt_info_.op_desc_list[i],
skt_info_.dump_args_list[i]);
}
davinci_model_->SaveDumpTask(skt_info_.last_task_id, skt_info_.last_stream_id, skt_info_.last_op,
skt_info_.last_dump_args);
return SUCCESS; return SUCCESS;
} }


@@ -194,9 +187,6 @@ Status KernelTaskInfo::SKTFinalize() {
GELOGI("SuperKernel Distribute [skt_id:%u]", skt_id_); GELOGI("SuperKernel Distribute [skt_id:%u]", skt_id_);
skt_info_.kernel_list.clear(); skt_info_.kernel_list.clear();
skt_info_.arg_list.clear(); skt_info_.arg_list.clear();
skt_info_.dump_flag_list.clear();
skt_info_.op_desc_list.clear();
skt_info_.dump_args_list.clear();
skt_info_.last_stream = nullptr; skt_info_.last_stream = nullptr;
skt_info_.last_block_dim = 0; skt_info_.last_block_dim = 0;
skt_info_.last_sm_desc = sm_desc_; skt_info_.last_sm_desc = sm_desc_;
@@ -207,15 +197,6 @@ Status KernelTaskInfo::SKTFinalize() {
return SUCCESS; return SUCCESS;
} }


uint32_t KernelTaskInfo::GetDumpFlag() {
for (auto flag : skt_info_.dump_flag_list) {
if (flag == RT_KERNEL_DUMPFLAG) {
return RT_KERNEL_DUMPFLAG;
}
}
return RT_KERNEL_DEFAULT;
}

Status KernelTaskInfo::SuperKernelLaunch() { Status KernelTaskInfo::SuperKernelLaunch() {
if (skt_info_.kernel_list.empty()) { if (skt_info_.kernel_list.empty()) {
GELOGI("SuperKernelLaunch: Skt_kernel_list has no task, just return"); GELOGI("SuperKernelLaunch: Skt_kernel_list has no task, just return");
@@ -225,7 +206,7 @@ Status KernelTaskInfo::SuperKernelLaunch() {
auto &skt_kernel_list = skt_info_.kernel_list; auto &skt_kernel_list = skt_info_.kernel_list;
auto &skt_arg_list = skt_info_.arg_list; auto &skt_arg_list = skt_info_.arg_list;
GELOGI("SuperKernelLaunch: Skt_kernel_list size[%d] skt_arg_list[%d]", skt_kernel_list.size(), skt_arg_list.size()); GELOGI("SuperKernelLaunch: Skt_kernel_list size[%d] skt_arg_list[%d]", skt_kernel_list.size(), skt_arg_list.size());
if (skt_kernel_list.size() == kSKTSingleSize && skt_arg_list.size() == kSKTSingleSize) {
if (skt_kernel_list.size() == kSKTSingleSize) {
rt_ret = rtKernelLaunchWithFlag(skt_info_.kernel_list[0], static_cast<uint32_t>(skt_info_.last_block_dim), rt_ret = rtKernelLaunchWithFlag(skt_info_.kernel_list[0], static_cast<uint32_t>(skt_info_.last_block_dim),
skt_info_.arg_list[0], skt_info_.last_args_size, skt_info_.arg_list[0], skt_info_.last_args_size,
static_cast<rtSmDesc_t *>(skt_info_.last_sm_desc), skt_info_.last_stream, static_cast<rtSmDesc_t *>(skt_info_.last_sm_desc), skt_info_.last_stream,
@@ -234,7 +215,6 @@ Status KernelTaskInfo::SuperKernelLaunch() {
GELOGE(RT_FAILED, "SuperKernelLaunch: Call rt api failed, ret: 0x%X", rt_ret); GELOGE(RT_FAILED, "SuperKernelLaunch: Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED; return RT_FAILED;
} }
call_save_dump_ = true;
GE_CHK_STATUS_RET(SKTFinalize(), "Skt finalize failed"); GE_CHK_STATUS_RET(SKTFinalize(), "Skt finalize failed");
return SUCCESS; return SUCCESS;
} }
@@ -246,22 +226,18 @@ Status KernelTaskInfo::SuperKernelLaunch() {
return RT_FAILED; return RT_FAILED;
} }
// Call the fuse API // Call the fuse API
std::unique_ptr<skt::SuperKernel> superKernel = nullptr;
skt::SuperKernel *superKernel = nullptr;
if (factory->FuseKernels(skt_kernel_list, skt_arg_list, skt_info_.last_block_dim, superKernel) != SUCCESS) { if (factory->FuseKernels(skt_kernel_list, skt_arg_list, skt_info_.last_block_dim, superKernel) != SUCCESS) {
GELOGE(RT_FAILED, "SuperKernelLaunch: fuse call failed"); GELOGE(RT_FAILED, "SuperKernelLaunch: fuse call failed");
return RT_FAILED; return RT_FAILED;
} }
// Launch a super kernel // Launch a super kernel
skt_dump_flag_ = GetDumpFlag();
if (superKernel->Launch(skt_info_.last_stream, skt_dump_flag_) != SUCCESS) {
if (superKernel->Launch(skt_info_.last_stream, RT_KERNEL_DUMPFLAG) != SUCCESS) {
GELOGE(RT_FAILED, "SuperKernelLaunch: launch failed"); GELOGE(RT_FAILED, "SuperKernelLaunch: launch failed");
return RT_FAILED; return RT_FAILED;
} }
GELOGI("SuperKernelLaunch: success[skt_kernel_list size[%zu] skt_arg_list[%zu]]", skt_kernel_list.size(), GELOGI("SuperKernelLaunch: success[skt_kernel_list size[%zu] skt_arg_list[%zu]]", skt_kernel_list.size(),
skt_arg_list.size()); skt_arg_list.size());
// record skt addr for release
superkernel_dev_nav_table_ = superKernel->GetNavTablePtr();
superkernel_device_args_addr_ = superKernel->GetDeviceArgsPtr();
GE_CHK_STATUS_RET(SKTFinalize(), "Skt finalize failed"); GE_CHK_STATUS_RET(SKTFinalize(), "Skt finalize failed");
return SUCCESS; return SUCCESS;
} }
@@ -274,9 +250,6 @@ Status KernelTaskInfo::SaveSuperKernelInfo() {
skt_info_.last_args_size = args_size_; skt_info_.last_args_size = args_size_;
skt_info_.last_sm_desc = sm_desc_; skt_info_.last_sm_desc = sm_desc_;
skt_info_.last_dump_flag = dump_flag_; skt_info_.last_dump_flag = dump_flag_;
skt_info_.dump_flag_list.push_back(dump_flag_);
skt_info_.op_desc_list.push_back(op_desc_);
skt_info_.dump_args_list.push_back(reinterpret_cast<uintptr_t>(dump_args_));
skt_info_.last_group_key = group_key_; skt_info_.last_group_key = group_key_;
skt_info_.last_dump_args = reinterpret_cast<uintptr_t>(dump_args_); skt_info_.last_dump_args = reinterpret_cast<uintptr_t>(dump_args_);
skt_info_.last_op = op_desc_; skt_info_.last_op = op_desc_;
@@ -355,7 +328,6 @@ Status KernelTaskInfo::SuperKernelDistribute() {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return FAILED; return FAILED;
} }
call_save_dump_ = true;
UpdateTaskId(); UpdateTaskId();
GELOGI("Current Common Task Distribute [taskid:%u]", task_id_); GELOGI("Current Common Task Distribute [taskid:%u]", task_id_);
} else { } else {
@@ -384,7 +356,6 @@ Status KernelTaskInfo::Distribute() {
rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name_.c_str()), rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name_.c_str()),
reinterpret_cast<const void *>(kernel_name_.c_str()), 1, args_, args_size_, reinterpret_cast<const void *>(kernel_name_.c_str()), 1, args_, args_size_,
nullptr, stream_, dump_flag_); nullptr, stream_, dump_flag_);
call_save_dump_ = true;
} else { } else {
/* default: not skt launch */ /* default: not skt launch */
GELOGI( GELOGI(
@@ -398,7 +369,6 @@ Status KernelTaskInfo::Distribute() {
// call rtKernelLaunch for current task // call rtKernelLaunch for current task
rt_ret = rtKernelLaunchWithFlag(stub_func_, block_dim_, args_, args_size_, static_cast<rtSmDesc_t *>(sm_desc_), rt_ret = rtKernelLaunchWithFlag(stub_func_, block_dim_, args_, args_size_, static_cast<rtSmDesc_t *>(sm_desc_),
stream_, dump_flag_); stream_, dump_flag_);
call_save_dump_ = true;
} }
} }
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
@@ -422,31 +392,9 @@ Status KernelTaskInfo::UpdateArgs() {
vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);


vector<void *> io_addrs; vector<void *> io_addrs;
if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) {
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
} else {
string peer_input_name;
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) {
uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name);
if (output_index > output_data_addrs.size()) {
GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.",
output_data_addrs.size(), output_index);
return FAILED;
}
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
for (size_t i = 0; i < output_data_addrs.size(); ++i) {
if (i == output_index) {
void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_);
io_addrs.emplace_back(fixed_addr);
continue;
}
io_addrs.emplace_back(output_data_addrs[i]);
}
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
}
}
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());


GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_),
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str()); "update known node %s zero copy addr failed.", op_desc_->GetName().c_str());
@@ -460,8 +408,6 @@ Status KernelTaskInfo::Release() {
return SUCCESS; return SUCCESS;
} }
FreeRtMem(&args_); FreeRtMem(&args_);
FreeRtMem(&superkernel_device_args_addr_);
FreeRtMem(&superkernel_dev_nav_table_);
FreeRtMem(&flowtable_); FreeRtMem(&flowtable_);
FreeRtMem(&custom_info_.input_descs); FreeRtMem(&custom_info_.input_descs);
FreeRtMem(&custom_info_.input_addrs); FreeRtMem(&custom_info_.input_addrs);
@@ -526,29 +472,6 @@ Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel
args_offset_ = davinci_model->GetTotalArgsSize(); args_offset_ = davinci_model->GetTotalArgsSize();
davinci_model->SetTotalArgsSize(args_size); davinci_model->SetTotalArgsSize(args_size);
GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);

// get opcontext stored in model
const domi::KernelContext &context = kernel_def.context();
// get opdesc
op_desc_ = davinci_model->GetOpByIndex(context.op_index());
GE_CHECK_NOTNULL(op_desc_);
// alloc fixed addr
string peer_input_name;
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) {
uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name);
if (output_index > op_desc_->GetOutputsSize()) {
GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(),
output_index);
return FAILED;
}
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name);
auto tensor_desc = op_desc_->GetOutputDesc(output_index);
int64_t tensor_size = 0;
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size);
GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size,
fixed_addr_offset_);
}
return SUCCESS; return SUCCESS;
} }


@@ -626,8 +549,8 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
return FAILED; return FAILED;
} }


if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc->GetName())) {
if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc->GetName())) {
dump_flag_ = RT_KERNEL_DUMPFLAG; dump_flag_ = RT_KERNEL_DUMPFLAG;
dump_args_ = static_cast<char *>(args_) + offset; dump_args_ = static_cast<char *>(args_) + offset;
} }
@@ -638,8 +561,10 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
} }


vector<void *> virtual_io_addrs; // use virtual address for zero copy key. vector<void *> virtual_io_addrs; // use virtual address for zero copy key.
virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
const vector<void *> virtual_in_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc, false);
const vector<void *> virtual_out_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc, false);
virtual_io_addrs.insert(virtual_io_addrs.end(), virtual_in_addrs.begin(), virtual_in_addrs.end());
virtual_io_addrs.insert(virtual_io_addrs.end(), virtual_out_addrs.begin(), virtual_out_addrs.end());
davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_info.data(), args_, args_size_, offset); davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_info.data(), args_, args_size_, offset);


GELOGD("Do InitTVMTask end"); GELOGD("Do InitTVMTask end");
@@ -677,6 +602,7 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
const std::vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); const std::vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc);
Status ret = StoreInputOutputTensor(input_data_addrs, output_data_addrs, ModelUtils::GetInputDescs(op_desc), Status ret = StoreInputOutputTensor(input_data_addrs, output_data_addrs, ModelUtils::GetInputDescs(op_desc),
ModelUtils::GetOutputDescs(op_desc)); ModelUtils::GetOutputDescs(op_desc));

if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "StoreInputOutputTensor Failed"); GELOGE(ret, "StoreInputOutputTensor Failed");
return ret; return ret;
@@ -741,9 +667,11 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
return RT_FAILED; return RT_FAILED;
} }


davinci_model_->SetZeroCopyAddr(op_desc, input_data_addrs, input_data_addrs.data(), custom_info_.input_addrs,
input_data_addrs.size() * kAddrLen, 0);
davinci_model_->SetZeroCopyAddr(op_desc, output_data_addrs, output_data_addrs.data(), custom_info_.output_addrs,
const vector<void *> virtual_in_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc, false);
const vector<void *> virtual_out_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc, false);
davinci_model_->SetZeroCopyAddr(op_desc, virtual_in_addrs, input_data_addrs.data(), custom_info_.input_addrs,
virtual_in_addrs.size() * kAddrLen, 0);
davinci_model_->SetZeroCopyAddr(op_desc, virtual_out_addrs, output_data_addrs.data(), custom_info_.output_addrs,
output_data_addrs.size() * kAddrLen, 0); output_data_addrs.size() * kAddrLen, 0);
return SUCCESS; return SUCCESS;
} }
@@ -873,9 +801,6 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
return init_ret; return init_ret;
} }
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);

aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
aicpu_param_head->extInfoLength = reinterpret_cast<uintptr_t>(ext_info.size()); aicpu_param_head->extInfoLength = reinterpret_cast<uintptr_t>(ext_info.size());


@@ -894,13 +819,19 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
return RT_FAILED; return RT_FAILED;
} }


if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc->GetName())) {
if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc->GetName())) {
dump_flag_ = RT_KERNEL_DUMPFLAG; dump_flag_ = RT_KERNEL_DUMPFLAG;
dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead); dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead);
} }


davinci_model_->SetZeroCopyAddr(op_desc, io_addrs, args_addr.get(), args_, args_size_, sizeof(aicpu::AicpuParamHead));
vector<void *> virtual_io_addrs; // use virtual address for zero copy key.
const vector<void *> virtual_in_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc, false);
const vector<void *> virtual_out_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc, false);
virtual_io_addrs.insert(virtual_io_addrs.end(), virtual_in_addrs.begin(), virtual_in_addrs.end());
virtual_io_addrs.insert(virtual_io_addrs.end(), virtual_out_addrs.begin(), virtual_out_addrs.end());
davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_addr.get(), args_, args_size_,
sizeof(aicpu::AicpuParamHead));


return SUCCESS; return SUCCESS;
} }


+ 0
- 13
src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h View File

@@ -61,8 +61,6 @@ class KernelTaskInfo : public TaskInfo {
sm_desc_ = nullptr; sm_desc_ = nullptr;
flowtable_ = nullptr; flowtable_ = nullptr;
args_ = nullptr; args_ = nullptr;
superkernel_device_args_addr_ = nullptr;
superkernel_dev_nav_table_ = nullptr;
} }


Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;
@@ -90,8 +88,6 @@ class KernelTaskInfo : public TaskInfo {


uint32_t GetSktTaskID() override { return skt_id_; } uint32_t GetSktTaskID() override { return skt_id_; }


bool CallSaveDumpInfo() override { return call_save_dump_; };

cce::ccOpContext ctx_; cce::ccOpContext ctx_;
FusionOpInfo fusion_op_info_; FusionOpInfo fusion_op_info_;


@@ -134,7 +130,6 @@ class KernelTaskInfo : public TaskInfo {
void UpdateSKTTaskId(); void UpdateSKTTaskId();
Status SKTFinalize(); Status SKTFinalize();
Status SuperKernelLaunch(); Status SuperKernelLaunch();
uint32_t GetDumpFlag();
Status SaveSuperKernelInfo(); Status SaveSuperKernelInfo();
bool IsMarkedLastNode(); bool IsMarkedLastNode();
bool IsMarkedFirstNode(); bool IsMarkedFirstNode();
@@ -158,8 +153,6 @@ class KernelTaskInfo : public TaskInfo {
OpDescPtr op_desc_; OpDescPtr op_desc_;
DavinciModel *davinci_model_; DavinciModel *davinci_model_;
uint32_t args_offset_ = 0; uint32_t args_offset_ = 0;
int64_t fixed_addr_offset_ = 0;
bool call_save_dump_ = false;


// aicpu ext_info device mem // aicpu ext_info device mem
void *aicpu_ext_info_addr_ = nullptr; void *aicpu_ext_info_addr_ = nullptr;
@@ -171,9 +164,6 @@ class KernelTaskInfo : public TaskInfo {
bool is_n_batch_spilt_; bool is_n_batch_spilt_;
int64_t group_key_; int64_t group_key_;
bool has_group_key_; bool has_group_key_;
uint32_t skt_dump_flag_ = RT_KERNEL_DEFAULT;
void *superkernel_device_args_addr_ = nullptr;
void *superkernel_dev_nav_table_ = nullptr;


struct AICPUCustomInfo { struct AICPUCustomInfo {
void *input_descs = nullptr; void *input_descs = nullptr;
@@ -193,9 +183,6 @@ class KernelTaskInfo : public TaskInfo {
void *last_sm_desc; void *last_sm_desc;
std::vector<void *> kernel_list; std::vector<void *> kernel_list;
std::vector<void *> arg_list; std::vector<void *> arg_list;
std::vector<uint32_t> dump_flag_list;
std::vector<OpDescPtr> op_desc_list;
std::vector<uintptr_t> dump_args_list;
uint32_t last_dump_flag; uint32_t last_dump_flag;
int64_t last_group_key; int64_t last_group_key;
uintptr_t last_dump_args; uintptr_t last_dump_args;


+ 2
- 31
src/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc View File

@@ -16,8 +16,8 @@


#include "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h" #include "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h"


#include "graph/debug/ge_attr_define.h"
#include "graph/load/new_model_manager/davinci_model.h" #include "graph/load/new_model_manager/davinci_model.h"
#include "graph/debug/ge_attr_define.h"


namespace ge { namespace ge {
constexpr uint8_t kLabelSwitchIndexNum = 1; constexpr uint8_t kLabelSwitchIndexNum = 1;
@@ -59,13 +59,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
op_desc->GetName().c_str(), input_data_addr.size(), kLabelSwitchIndexNum); op_desc->GetName().c_str(), input_data_addr.size(), kLabelSwitchIndexNum);
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }

if (davinci_model->IsKnownNode()) {
index_value_ = davinci_model->GetCurrentFixedAddr(fixed_addr_offset_);
} else {
index_value_ = input_data_addr[0];
}

index_value_ = input_data_addr[0];
davinci_model->DisableZeroCopy(index_value_); davinci_model->DisableZeroCopy(index_value_);


std::vector<uint32_t> label_idx_list; std::vector<uint32_t> label_idx_list;
@@ -130,28 +124,5 @@ Status LabelSwitchByIndexTaskInfo::Distribute() {
return SUCCESS; return SUCCESS;
} }


Status LabelSwitchByIndexTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GE_CHECK_NOTNULL(davinci_model);
auto label_switch = task_def.label_switch_by_index();
uint32_t op_index = label_switch.op_index();
GELOGI("Begin to calculate args, op_index is: %u", op_index);
auto op_desc = davinci_model->GetOpByIndex(op_index);
GE_CHECK_NOTNULL(op_desc);
GELOGI("Calc opType[%s] args size. Node name is [%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str());
if (op_desc->GetInputsSize() != kLabelSwitchIndexNum) {
GELOGE(FAILED, "Label switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize());
return FAILED;
}
string input_tensor_name = op_desc->GetInputNameByIndex(0);
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(input_tensor_name);
auto tensor_desc = op_desc->GetInputDesc(0);
int64_t tensor_size = 0;
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
davinci_model->SetTotalFixedAddrsSize(input_tensor_name, tensor_size);
GELOGI("Calculate stream switchn task args , tensor_size %ld, fixed_addr_offset %ld", tensor_size,
fixed_addr_offset_);
return SUCCESS;
}

REGISTER_TASK_INFO(RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, LabelSwitchByIndexTaskInfo); REGISTER_TASK_INFO(RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, LabelSwitchByIndexTaskInfo);
} // namespace ge } // namespace ge

+ 2
- 5
src/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h View File

@@ -22,8 +22,7 @@
namespace ge { namespace ge {
class LabelSwitchByIndexTaskInfo : public TaskInfo { class LabelSwitchByIndexTaskInfo : public TaskInfo {
public: public:
LabelSwitchByIndexTaskInfo()
: index_value_(nullptr), branch_max_(0), args_(nullptr), args_size_(0), fixed_addr_offset_(0) {}
LabelSwitchByIndexTaskInfo() : index_value_(nullptr), branch_max_(0), args_(nullptr), args_size_(0) {}


~LabelSwitchByIndexTaskInfo() override; ~LabelSwitchByIndexTaskInfo() override;


@@ -31,15 +30,13 @@ class LabelSwitchByIndexTaskInfo : public TaskInfo {


Status Distribute() override; Status Distribute() override;


Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;

private: private:
void *index_value_; // switch index input. void *index_value_; // switch index input.
uint32_t branch_max_; // max branch count. uint32_t branch_max_; // max branch count.
void *args_; // label info memory. void *args_; // label info memory.
uint32_t args_size_; // label info length. uint32_t args_size_; // label info length.

std::vector<rtLabel_t> label_list_; std::vector<rtLabel_t> label_list_;
int64_t fixed_addr_offset_;
}; };
} // namespace ge } // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_

+ 70
- 16
src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc View File

@@ -21,9 +21,9 @@


namespace ge { namespace ge {
Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GELOGI("MemcpyAddrAsyncTaskInfo Init Start");
GELOGI("MemcpyAddrAsyncTaskInfo Init Start.");
if (davinci_model == nullptr) { if (davinci_model == nullptr) {
GELOGE(PARAM_INVALID, "davinci_model is null");
GELOGE(PARAM_INVALID, "davinci_model is null!");
return PARAM_INVALID; return PARAM_INVALID;
} }


@@ -32,27 +32,45 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel
return ret; return ret;
} }


const auto &memcpy_async = task_def.memcpy_async();
OpDescPtr op_desc = davinci_model->GetOpByIndex(memcpy_async.op_index());
auto memcpy_async_def = task_def.memcpy_async();
uint32_t op_index = memcpy_async_def.op_index();
OpDescPtr op_desc = davinci_model->GetOpByIndex(op_index);
if (op_desc == nullptr) { if (op_desc == nullptr) {
GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async.op_index());
GELOGE(INTERNAL_ERROR, "Init MemcpyAddrAsyncTaskInfo error, index is out of range!");
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }


ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.src(), src_);
uint64_t logic_dst = memcpy_async_def.dst();
uint64_t logic_src = memcpy_async_def.src();

dst_max_ = memcpy_async_def.dst_max();

uint64_t update_base_addr = 0;
ret = GetUpdateBaseAddr(davinci_model, logic_src, update_base_addr);
if (ret != SUCCESS) { if (ret != SUCCESS) {
return ret; return ret;
} }
src_ = reinterpret_cast<uint8_t *>(update_base_addr + logic_src);
if (src_ == nullptr) {
GELOGE(PARAM_INVALID, "src_ is null!");
return PARAM_INVALID;
}


ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.dst(), dst_);
if (ret != SUCCESS) {
return ret;
uint64_t mem_base = reinterpret_cast<uint64_t>(davinci_model->MemBase());
uint64_t logic_mem_base = davinci_model->GetRtBaseAddr();
dst_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(mem_base + (logic_dst - logic_mem_base)));
if (dst_ == nullptr) {
GELOGE(PARAM_INVALID, "dst_ is null!");
return PARAM_INVALID;
} }


vector<void *> io_addrs; vector<void *> io_addrs;
io_addrs.emplace_back(src_); io_addrs.emplace_back(src_);
io_addrs.emplace_back(dst_); io_addrs.emplace_back(dst_);


count_ = memcpy_async_def.count();
kind_ = memcpy_async_def.kind();

// malloc args memory // malloc args memory
size_t args_size = sizeof(void *) * io_addrs.size(); size_t args_size = sizeof(void *) * io_addrs.size();
rtError_t rt_ret = rtMalloc(&args_, args_size, RT_MEMORY_HBM); rtError_t rt_ret = rtMalloc(&args_, args_size, RT_MEMORY_HBM);
@@ -70,18 +88,20 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel
return RT_FAILED; return RT_FAILED;
} }


count_ = memcpy_async.count();
kind_ = memcpy_async.kind();
dst_max_ = memcpy_async.dst_max();
GELOGI("InitMemcpyAddrAsyncTaskInfo, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu, args:%p, size:%zu",
memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_, args_, args_size);
// Just dest addr need zero copy.
davinci_model->SetZeroCopyAddr(op_desc, {dst_}, io_addrs.data(), args_, args_size, sizeof(void *));

GELOGI("InitMemcpyAddrAsyncTaskInfo, logic_src:%p, logic_dst:%p, src:%p, dst:%p, src_args:%p, dst_args:%p",
reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(logic_src)),
reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(logic_dst)), src_, dst_, args_,
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(args_) + args_size));


davinci_model->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), args_, args_size, 0);
return SUCCESS; return SUCCESS;
} }


Status MemcpyAddrAsyncTaskInfo::Distribute() { Status MemcpyAddrAsyncTaskInfo::Distribute() {
GELOGI("MemcpyAddrAsyncTaskInfo Distribute Start, dst_max:%lu, count:%lu, kind:%u", dst_max_, count_, kind_);
GELOGI("MemcpyAddrAsyncTaskInfo Distribute Start.");
GELOGI("Distribute MemcpyAddrAsync, dst_max:%lu, count:%lu, kind:%u.", dst_max_, count_, kind_);


rtError_t rt_ret = rtMemcpyAsync(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(args_) + sizeof(void *)), rtError_t rt_ret = rtMemcpyAsync(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(args_) + sizeof(void *)),
dst_max_, args_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_); dst_max_, args_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_);
@@ -93,5 +113,39 @@ Status MemcpyAddrAsyncTaskInfo::Distribute() {
return SUCCESS; return SUCCESS;
} }


Status MemcpyAddrAsyncTaskInfo::GetUpdateBaseAddr(DavinciModel *davinci_model, uint64_t update_addr,
uint64_t &base_addr) {
GE_CHECK_NOTNULL(davinci_model);
uint64_t data_base_addr =
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(davinci_model->MemBase())) - davinci_model->GetRtBaseAddr();
uint64_t weight_base_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(davinci_model->WeightsMemBase())) -
davinci_model->GetRtWeightAddr();
uint64_t var_base_addr =
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(davinci_model->VarMemBase())) - davinci_model->GetRtVarAddr();

uint64_t data_base_addr_start = davinci_model->GetRtBaseAddr();
uint64_t data_base_addr_end = davinci_model->GetRtBaseAddr() + davinci_model->TotalMemSize();
uint64_t wight_base_addr_start = davinci_model->GetRtWeightAddr();
uint64_t wight_base_addr_end = davinci_model->GetRtWeightAddr() + davinci_model->TotalWeightsMemSize();
uint64_t varible_base_addr_start = davinci_model->GetRtVarAddr();
uint64_t varible_base_addr_end = davinci_model->GetRtVarAddr() + davinci_model->TotalVarMemSize();

if ((data_base_addr_start <= update_addr) && (update_addr <= data_base_addr_end)) {
base_addr = data_base_addr;
GELOGI("The update_addr is data address.");
} else if ((wight_base_addr_start <= update_addr) && (update_addr <= wight_base_addr_end)) {
base_addr = weight_base_addr;
GELOGI("The update_addr is weight address.");
} else if ((varible_base_addr_start <= update_addr) && (update_addr <= varible_base_addr_end)) {
base_addr = var_base_addr;
GELOGI("The update_addr is variable address.");
} else if (update_addr != 0) {
base_addr = 0;
GELOGE(PARAM_INVALID, "The update_addr is abnormal.");
return PARAM_INVALID;
}
return SUCCESS;
}

REGISTER_TASK_INFO(RT_MODEL_TASK_MEMCPY_ADDR_ASYNC, MemcpyAddrAsyncTaskInfo); REGISTER_TASK_INFO(RT_MODEL_TASK_MEMCPY_ADDR_ASYNC, MemcpyAddrAsyncTaskInfo);
} // namespace ge } // namespace ge

+ 6
- 4
src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h View File

@@ -16,7 +16,6 @@


#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_

#include "graph/load/new_model_manager/task_info/task_info.h" #include "graph/load/new_model_manager/task_info/task_info.h"


namespace ge { namespace ge {
@@ -33,8 +32,9 @@ class MemcpyAddrAsyncTaskInfo : public TaskInfo {
if (ret != RT_ERROR_NONE) { if (ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret); GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret);
} }
args_ = nullptr;
} }

args_ = nullptr;
} }


Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;
@@ -42,9 +42,11 @@ class MemcpyAddrAsyncTaskInfo : public TaskInfo {
Status Distribute() override; Status Distribute() override;


private: private:
uint8_t *dst_;
Status GetUpdateBaseAddr(DavinciModel *davinci_model, uint64_t update_addr, uint64_t &base_addr);

void *dst_;
uint64_t dst_max_; uint64_t dst_max_;
uint8_t *src_;
void *src_;
void *args_; void *args_;
uint64_t count_; uint64_t count_;
uint32_t kind_; uint32_t kind_;


+ 51
- 55
src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc View File

@@ -21,9 +21,9 @@


namespace ge { namespace ge {
Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GELOGI("MemcpyAsyncTaskInfo Init Start");
GELOGI("MemcpyAsyncTaskInfo Init Start.");
if (davinci_model == nullptr) { if (davinci_model == nullptr) {
GELOGE(PARAM_INVALID, "davinci_model is null");
GELOGE(PARAM_INVALID, "davinci_model is null!");
return PARAM_INVALID; return PARAM_INVALID;
} }


@@ -32,38 +32,35 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
return ret; return ret;
} }


memcpy_async = task_def.memcpy_async();
count_ = memcpy_async.count();
kind_ = memcpy_async.kind();
dst_max_ = memcpy_async.dst_max();
if (davinci_model->IsKnownNode()) {
src_ = reinterpret_cast<uint8_t *>(davinci_model_->GetCurrentArgsAddr(args_offset_));
dst_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(src_) + sizeof(void *));
// for zero copy
kind_ = RT_MEMCPY_ADDR_DEVICE_TO_DEVICE;
GELOGI("MemcpyAsyncTaskInfo src_ %p, dst_ %p, args_offset %u.", src_, dst_, args_offset_);
return SUCCESS;
}
ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.src(), src_);
if (ret != SUCCESS) {
return ret;
}
auto memcpy_async_def = task_def.memcpy_async();
uint64_t logic_dst = memcpy_async_def.dst();
uint64_t logic_src = memcpy_async_def.src();

dst_max_ = memcpy_async_def.dst_max();


ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.dst(), dst_);
uint64_t update_base_addr = 0;
ret = GetUpdateBaseAddr(davinci_model, logic_src, update_base_addr);
if (ret != SUCCESS) { if (ret != SUCCESS) {
return ret; return ret;
} }
src_ = reinterpret_cast<uint8_t *>(update_base_addr + logic_src);
davinci_model->DisableZeroCopy(src_);


GELOGI("MemcpyAsyncTaskInfo Init Success, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu",
memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_);
uint64_t mem_base = reinterpret_cast<uint64_t>(davinci_model->MemBase());
uint64_t logic_mem_base = davinci_model->GetRtBaseAddr();
dst_ = reinterpret_cast<uint8_t *>(mem_base + (logic_dst - logic_mem_base));

count_ = memcpy_async_def.count();
kind_ = memcpy_async_def.kind();
GELOGI("MemcpyAsyncTaskInfo Init Success, logic_src:%p, logic_dst:%p, src:%p, dst:%p",
reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(logic_src)),
reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(logic_dst)), src_, dst_);


davinci_model->DisableZeroCopy(src_);
davinci_model->DisableZeroCopy(dst_);
return SUCCESS; return SUCCESS;
} }


Status MemcpyAsyncTaskInfo::Distribute() { Status MemcpyAsyncTaskInfo::Distribute() {
GELOGI("MemcpyAsyncTaskInfo Distribute Start. dst_max:%lu, count:%lu, kind:%u", dst_max_, count_, kind_);
GELOGI("MemcpyAsyncTaskInfo Distribute Start. dst_max:%lu, count:%lu, kind:%u.", dst_max_, count_, kind_);


rtError_t rt_ret = rtMemcpyAsync(dst_, dst_max_, src_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_); rtError_t rt_ret = rtMemcpyAsync(dst_, dst_max_, src_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
@@ -71,41 +68,40 @@ Status MemcpyAsyncTaskInfo::Distribute() {
return RT_FAILED; return RT_FAILED;
} }


GELOGI("MemcpyAsyncTaskInfo Distribute Success");
return SUCCESS;
}

Status MemcpyAsyncTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
// the num of src and dst size is 2
uint32_t args_size = sizeof(void *) * 2;
args_offset_ = davinci_model->GetTotalArgsSize();
davinci_model->SetTotalArgsSize(args_size);
davinci_model_ = davinci_model;
GELOGI("MemcpyAsyncTaskInfo kernel args_size %u, args_offset %u", args_size, args_offset_);
GELOGI("MemcpyAsyncTaskInfo Distribute Success.");
return SUCCESS; return SUCCESS;
} }


Status MemcpyAsyncTaskInfo::UpdateArgs() {
GELOGI("MemcpyAsyncTaskInfo::UpdateArgs in.");
GE_CHECK_NOTNULL(davinci_model_);
Status ret = ModelUtils::GetRtAddress(davinci_model_->GetRuntimeParam(), memcpy_async.src(), src_);
if (ret != SUCCESS) {
return ret;
}

ret = ModelUtils::GetRtAddress(davinci_model_->GetRuntimeParam(), memcpy_async.dst(), dst_);
if (ret != SUCCESS) {
return ret;
Status MemcpyAsyncTaskInfo::GetUpdateBaseAddr(DavinciModel *davinci_model, uint64_t update_addr, uint64_t &base_addr) {
GE_CHECK_NOTNULL(davinci_model);
uint64_t data_base_addr =
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(davinci_model->MemBase())) - davinci_model->GetRtBaseAddr();
uint64_t weight_base_addr = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(davinci_model->WeightsMemBase())) -
davinci_model->GetRtWeightAddr();
uint64_t var_base_addr = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(davinci_model->VarMemBase())) -
davinci_model->GetRtVarAddr();

uint64_t data_base_addr_start = davinci_model->GetRtBaseAddr();
uint64_t data_base_addr_end = davinci_model->GetRtBaseAddr() + davinci_model->TotalMemSize();
uint64_t wight_base_addr_start = davinci_model->GetRtWeightAddr();
uint64_t wight_base_addr_end = davinci_model->GetRtWeightAddr() + davinci_model->TotalWeightsMemSize();
uint64_t varible_base_addr_start = davinci_model->GetRtVarAddr();
uint64_t varible_base_addr_end = davinci_model->GetRtVarAddr() + davinci_model->TotalVarMemSize();

if ((data_base_addr_start <= update_addr) && (update_addr <= data_base_addr_end)) {
base_addr = data_base_addr;
GELOGI("The update_addr is data address.");
} else if ((wight_base_addr_start <= update_addr) && (update_addr <= wight_base_addr_end)) {
base_addr = weight_base_addr;
GELOGI("The update_addr is weight address.");
} else if ((varible_base_addr_start <= update_addr) && (update_addr <= varible_base_addr_end)) {
base_addr = var_base_addr;
GELOGI("The update_addr is variable address.");
} else if (update_addr != 0) {
base_addr = 0;
GELOGE(PARAM_INVALID, "The update_addr is abnormal.");
return PARAM_INVALID;
} }

vector<void *> io_addrs;
io_addrs.emplace_back(reinterpret_cast<void *>(src_));
io_addrs.emplace_back(reinterpret_cast<void *>(dst_));

GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_),
"update memcpyasync in known node zero copy addr failed.");

GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success.");
return SUCCESS; return SUCCESS;
} }




+ 4
- 10
src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h View File

@@ -16,7 +16,6 @@


#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_

#include "graph/load/new_model_manager/task_info/task_info.h" #include "graph/load/new_model_manager/task_info/task_info.h"


namespace ge { namespace ge {
@@ -33,19 +32,14 @@ class MemcpyAsyncTaskInfo : public TaskInfo {


Status Distribute() override; Status Distribute() override;


Status UpdateArgs() override;

Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;

private: private:
uint8_t *dst_;
Status GetUpdateBaseAddr(DavinciModel *davinci_model, uint64_t update_addr, uint64_t &base_addr);

void *dst_;
uint64_t dst_max_; uint64_t dst_max_;
uint8_t *src_;
void *src_;
uint64_t count_; uint64_t count_;
uint32_t kind_; uint32_t kind_;
DavinciModel *davinci_model_ = nullptr;
uint32_t args_offset_ = 0;
domi::MemcpyAsyncDef memcpy_async;
}; };
} // namespace ge } // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_

+ 6
- 37
src/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc View File

@@ -42,11 +42,16 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d


auto stream_switch_def = task_def.stream_switch(); auto stream_switch_def = task_def.stream_switch();
uint32_t op_index = stream_switch_def.op_index(); uint32_t op_index = stream_switch_def.op_index();

// get StreamSwitch op // get StreamSwitch op
OpDescPtr op_desc = davinci_model->GetOpByIndex(op_index); OpDescPtr op_desc = davinci_model->GetOpByIndex(op_index);
GE_CHECK_NOTNULL(op_desc); GE_CHECK_NOTNULL(op_desc);
auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc); auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc);
SetInputAndValuePtr(davinci_model, input_data_addr);
if (!input_data_addr.empty() && input_data_addr.size() >= STREAM_SWITCH_INPUT_NUM) {
input_ptr_ = input_data_addr[0];
value_ptr_ = input_data_addr[1];
}

uint32_t cond = 0; uint32_t cond = 0;
if (!AttrUtils::GetInt(op_desc, ATTR_NAME_STREAM_SWITCH_COND, cond)) { if (!AttrUtils::GetInt(op_desc, ATTR_NAME_STREAM_SWITCH_COND, cond)) {
GELOGE(INTERNAL_ERROR, "StreamSwitchOp get attr STREAM_SWITCH_COND fail."); GELOGE(INTERNAL_ERROR, "StreamSwitchOp get attr STREAM_SWITCH_COND fail.");
@@ -110,42 +115,6 @@ Status StreamSwitchTaskInfo::Distribute() {
GELOGI("StreamSwitchTaskInfo Distribute Success. cond:%d, stream:%p, datatype:%d.", cond_, true_stream_, data_type_); GELOGI("StreamSwitchTaskInfo Distribute Success. cond:%d, stream:%p, datatype:%d.", cond_, true_stream_, data_type_);
return SUCCESS; return SUCCESS;
} }
Status StreamSwitchTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GE_CHECK_NOTNULL(davinci_model);
auto stream_switch_def = task_def.stream_switch();
uint32_t op_index = stream_switch_def.op_index();
GELOGI("Begin to calculate args, op_index is: %u", op_index);
auto op_desc = davinci_model->GetOpByIndex(op_index);
GE_CHECK_NOTNULL(op_desc);
GELOGI("Calc opType[%s] args size. Node name is [%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str());
if (op_desc->GetInputsSize() != STREAM_SWITCH_INPUT_NUM) {
GELOGE(FAILED, "Stream switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize());
return FAILED;
}
for (uint32_t i = 0; i < STREAM_SWITCH_INPUT_NUM; ++i) {
string input_tensor_name = op_desc->GetInputNameByIndex(i);
int64_t fixed_addr_offset = davinci_model->GetFixedAddrsSize(input_tensor_name);
fixed_addr_offset_.emplace_back(fixed_addr_offset);
auto tensor_desc = op_desc->GetInputDesc(i);
int64_t tensor_size = 0;
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
davinci_model->SetTotalFixedAddrsSize(input_tensor_name, tensor_size);
GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr[%u] offset %ld", tensor_size, i,
fixed_addr_offset);
}
return SUCCESS;
}


void StreamSwitchTaskInfo::SetInputAndValuePtr(DavinciModel *davinci_model, const vector<void *> &input_data_addrs) {
if (davinci_model->IsKnownNode() && fixed_addr_offset_.size() == STREAM_SWITCH_INPUT_NUM) {
input_ptr_ = davinci_model->GetCurrentFixedAddr(fixed_addr_offset_[0]);
value_ptr_ = davinci_model->GetCurrentFixedAddr(fixed_addr_offset_[1]);
} else {
if (!input_data_addrs.empty() && input_data_addrs.size() >= STREAM_SWITCH_INPUT_NUM) {
input_ptr_ = input_data_addrs[0];
value_ptr_ = input_data_addrs[1];
}
}
}
REGISTER_TASK_INFO(RT_MODEL_TASK_STREAM_SWITCH, StreamSwitchTaskInfo); REGISTER_TASK_INFO(RT_MODEL_TASK_STREAM_SWITCH, StreamSwitchTaskInfo);
} // namespace ge } // namespace ge

+ 0
- 5
src/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h View File

@@ -39,18 +39,13 @@ class StreamSwitchTaskInfo : public TaskInfo {


Status Distribute() override; Status Distribute() override;


Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;

private: private:
void SetInputAndValuePtr(DavinciModel *davinci_model, const vector<void *> &input_data_addrs);
void *input_ptr_; void *input_ptr_;
rtCondition_t cond_; rtCondition_t cond_;
void *value_ptr_; void *value_ptr_;
rtStream_t true_stream_; rtStream_t true_stream_;
uint32_t true_stream_id_; uint32_t true_stream_id_;
rtSwitchDataType_t data_type_; rtSwitchDataType_t data_type_;
static const uint32_t kInputNum = 2;
vector<int64_t> fixed_addr_offset_;
}; };
} // namespace ge } // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_

+ 13
- 33
src/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc View File

@@ -24,15 +24,18 @@
namespace { namespace {
const uint32_t kDynamicBtachParamNum = 1; const uint32_t kDynamicBtachParamNum = 1;
const uint32_t kDynamicResolutionParamNum = 2; const uint32_t kDynamicResolutionParamNum = 2;
const uint8_t kStreamSwitchnInputNum = 1;
} // namespace } // namespace


namespace ge { namespace ge {
Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GELOGI("StreamSwitchNTaskInfo Init Start."); GELOGI("StreamSwitchNTaskInfo Init Start.");
GE_CHECK_NOTNULL(davinci_model);
if (davinci_model == nullptr) {
GELOGE(PARAM_INVALID, "davinci_model is null!");
return PARAM_INVALID;
}


if (SetStream(task_def.stream_id(), davinci_model->GetStreamList()) != SUCCESS) {
Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList());
if (ret != SUCCESS) {
return FAILED; return FAILED;
} }


@@ -72,16 +75,14 @@ Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *
GELOGE(FAILED, "Get true stream ptr of switchN op failed."); GELOGE(FAILED, "Get true stream ptr of switchN op failed.");
return FAILED; return FAILED;
} }
if (davinci_model->IsKnownNode()) {
input_ptr_ = davinci_model->GetCurrentFixedAddr(args_offset_);
} else {
auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc);
if (input_data_addr.empty()) {
GELOGE(FAILED, "Input data addr is nullptr.");
return FAILED;
}
input_ptr_ = input_data_addr[0];

// set input_ptr_
auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc);
if (input_data_addr.empty()) {
GELOGE(FAILED, "Input data addr is nullptr.");
return FAILED;
} }
input_ptr_ = input_data_addr[0];
davinci_model->DisableZeroCopy(input_ptr_); davinci_model->DisableZeroCopy(input_ptr_);
GELOGI("StreamSwitchNTaskInfo Init Success, inputSize:%u, elementSize:%d, trueStreamID:%ld.", input_size_, GELOGI("StreamSwitchNTaskInfo Init Success, inputSize:%u, elementSize:%d, trueStreamID:%ld.", input_size_,
element_size_, op_desc->GetStreamId()); element_size_, op_desc->GetStreamId());
@@ -139,26 +140,5 @@ Status StreamSwitchNTaskInfo::GetTrueStreamPtr(const OpDescPtr &op_desc, Davinci
return SUCCESS; return SUCCESS;
} }


Status StreamSwitchNTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GE_CHECK_NOTNULL(davinci_model);
auto stream_switchn_def = task_def.stream_switch_n();
uint32_t op_index = stream_switchn_def.op_index();
GELOGI("Begin to calculate args, op_index is: %u", op_index);
auto op_desc = davinci_model->GetOpByIndex(op_index);
GE_CHECK_NOTNULL(op_desc);
GELOGI("Calc opType[%s] args size. Node name is [%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str());
if (op_desc->GetInputsSize() != kStreamSwitchnInputNum) {
GELOGE(FAILED, "Stream switchn op only have one data input. Now input size is %zu", op_desc->GetInputsSize());
return FAILED;
}
string input_tensor_name = op_desc->GetInputNameByIndex(0);
args_offset_ = davinci_model->GetFixedAddrsSize(input_tensor_name);
auto tensor_desc = op_desc->GetInputDesc(0);
int64_t tensor_size = 0;
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
davinci_model->SetTotalFixedAddrsSize(input_tensor_name, tensor_size);
GELOGI("Calculate stream switchn task args , tensor_size %ld, args_offset %ld", tensor_size, args_offset_);
return SUCCESS;
}
REGISTER_TASK_INFO(RT_MODEL_TASK_STREAM_SWITCH_N, StreamSwitchNTaskInfo); REGISTER_TASK_INFO(RT_MODEL_TASK_STREAM_SWITCH_N, StreamSwitchNTaskInfo);
} // namespace ge } // namespace ge

+ 1
- 5
src/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h View File

@@ -29,8 +29,7 @@ class StreamSwitchNTaskInfo : public TaskInfo {
value_ptr_(nullptr), value_ptr_(nullptr),
true_stream_ptr_(nullptr), true_stream_ptr_(nullptr),
element_size_(0), element_size_(0),
data_type_(RT_SWITCH_INT64),
args_offset_(0) {}
data_type_(RT_SWITCH_INT64) {}


~StreamSwitchNTaskInfo() override {} ~StreamSwitchNTaskInfo() override {}


@@ -38,8 +37,6 @@ class StreamSwitchNTaskInfo : public TaskInfo {


Status Distribute() override; Status Distribute() override;


Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;

private: private:
Status GetTrueStreamPtr(const OpDescPtr &op_desc, DavinciModel *davinci_model); Status GetTrueStreamPtr(const OpDescPtr &op_desc, DavinciModel *davinci_model);
void *input_ptr_; void *input_ptr_;
@@ -50,7 +47,6 @@ class StreamSwitchNTaskInfo : public TaskInfo {
rtSwitchDataType_t data_type_; rtSwitchDataType_t data_type_;
vector<rtStream_t> true_stream_list_; vector<rtStream_t> true_stream_list_;
vector<int64_t> value_list_; vector<int64_t> value_list_;
int64_t args_offset_;
}; };
} // namespace ge } // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_

+ 12
- 3
src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h View File

@@ -34,13 +34,22 @@ class SuperKernel {
public: public:
SuperKernel(const void *stub, void *ptr, uint64_t sz, uint32_t dim) SuperKernel(const void *stub, void *ptr, uint64_t sz, uint32_t dim)
: func_stub_(stub), dev_nav_table_(ptr), nav_table_size_(sz), block_dim_(dim) {} : func_stub_(stub), dev_nav_table_(ptr), nav_table_size_(sz), block_dim_(dim) {}
~SuperKernel() = default;
~SuperKernel() {
// free memory when all releasing
if (device_args_addr_ != nullptr) {
GE_CHK_RT(rtFree(device_args_addr_));
GELOGI("SKT: super_kernel args addr free.");
}
if (dev_nav_table_ != nullptr) {
GE_CHK_RT(rtFree(dev_nav_table_));
GELOGI("SKT: super_kernel args addr free.");
}
}
Status Launch(rtStream_t stream, uint32_t dump_flag); Status Launch(rtStream_t stream, uint32_t dump_flag);
const void *GetFuncStub() const { return func_stub_; } const void *GetFuncStub() const { return func_stub_; }
const void *GetNavTablePtr() const { return dev_nav_table_; }
uint64_t GetNavTableSize() const { return nav_table_size_; } uint64_t GetNavTableSize() const { return nav_table_size_; }
uint32_t GetBlockDim() const { return block_dim_; } uint32_t GetBlockDim() const { return block_dim_; }
void *GetNavTablePtr() const { return dev_nav_table_; }
void *GetDeviceArgsPtr() const { return device_args_addr_; }
}; };
} // namespace skt } // namespace skt
} // namespace ge } // namespace ge


+ 79
- 27
src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc View File

@@ -42,10 +42,21 @@ Status SuperKernelFactory::Init() {
rt_ret = rtGetAddrByFun(this->func_stub_, &this->func_ptr_); rt_ret = rtGetAddrByFun(this->func_stub_, &this->func_ptr_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtGetAddrByFun failed. error: 0x%X", rt_ret); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtGetAddrByFun failed. error: 0x%X", rt_ret);
return FAILED;) return FAILED;)
GELOGD(
"SKT: fuseKernels super_kernel_template subFunc %p, device func "
"address %p",
this->func_stub_, this->func_ptr_);
if (this->use_physical_address_ != nullptr) {
void *skt_func = nullptr;
rt_ret = rtKernelConfigTransArg(this->func_ptr_, sizeof(uint64_t), 0, &skt_func);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelConfigTransArg failed. error: 0x%X", rt_ret);
return FAILED;)
GELOGD(
"SKT: fuseKernels super_kernel_template subFunc %p, device func "
"address %p, device physic PC %p",
this->func_stub_, this->func_ptr_, skt_func);
} else {
GELOGD(
"SKT: fuseKernels super_kernel_template subFunc %p, device func "
"address %p",
this->func_stub_, this->func_ptr_);
}
} }
is_init_ = true; is_init_ = true;


@@ -60,8 +71,7 @@ Status SuperKernelFactory::Uninitialize() {
} }


Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list, Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list,
const std::vector<void *> &args_addr_list, uint32_t block_dim,
std::unique_ptr<skt::SuperKernel> &h) {
const std::vector<void *> &args_addr_list, uint32_t block_dim, SuperKernel *&h) {
// Iterate through the ops to be fused // Iterate through the ops to be fused
// Each subkernel to be fused contains 2 fields: fn address offset, args // Each subkernel to be fused contains 2 fields: fn address offset, args
// address. // address.
@@ -91,28 +101,70 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list


rtError_t rt_ret; rtError_t rt_ret;
void *hbm_nav_table_addr = nullptr; void *hbm_nav_table_addr = nullptr;
for (unsigned i = 0; i < stub_func_list.size(); i++) {
void *sub_device_func = nullptr;
rt_ret = rtGetAddrByFun(stub_func_list[i], &sub_device_func);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtGetAddrByFun failed. error: 0x%X", rt_ret);
return FAILED;)
GELOGD("SKT: fuseKernels subFunc %p, device func address %p", stub_func_list[i], sub_device_func);
// store two uint64_t address
// address divided by 4 because of 32bits encoding, call offset will *4 when calculating
nav_table[i * 2] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(sub_device_func)) / 4;
GELOGD("SKT: CALL offet %lu", nav_table[i * 2]);
nav_table[i * 2 + 1] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_list[i]));
GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]);
if (this->use_physical_address_ != nullptr) {
for (unsigned i = 0; i < stub_func_list.size(); i++) {
void *sub_device_func = nullptr;
rt_ret = rtGetAddrByFun(stub_func_list[i], &sub_device_func);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtGetAddrByFun failed. error: 0x%X", rt_ret);
return FAILED;)
void *sub_device_func_pys = nullptr;
void *args_addr_pys = nullptr;
rt_ret = rtKernelConfigTransArg(sub_device_func, sizeof(uint64_t), 0, &sub_device_func_pys);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelConfigTransArg failed. error: 0x%X", rt_ret);
return FAILED;)
rt_ret = rtKernelConfigTransArg(args_addr_list[i], sizeof(uint64_t), 0, &args_addr_pys);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelConfigTransArg failed. error: 0x%X", rt_ret);
return FAILED;)
GELOGD(
"SKT: fuseKernels subFunc %p, device func address %p, device "
"physic func address %p",
stub_func_list[i], sub_device_func, sub_device_func_pys);
// store two uint64_t address
// address divided by 4 because of 32bits encoding, call offset will *4 when calculating
nav_table[i * 2] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(sub_device_func_pys)) / 4;
GELOGD("SKT: CALL offset %lu", nav_table[i * 2]);
nav_table[i * 2 + 1] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_pys));

GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]);
}

void *hbm_nav_table_addr_pys = nullptr;
rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;)
rt_ret =
rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret);
GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;)
rt_ret = rtKernelConfigTransArg(hbm_nav_table_addr, sizeof(uint64_t), 0, &hbm_nav_table_addr_pys);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelConfigTransArg failed. error: 0x%X", rt_ret);
GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;)

GELOGD("SKT: hbm_nav_table_addr %p, hbm_nav_table_addr_pys %p", hbm_nav_table_addr, hbm_nav_table_addr_pys);
// Create the necessary metadata for the super kernel
h = new SuperKernel(this->func_stub_, hbm_nav_table_addr_pys, nav_table_size, block_dim);
} else {
for (unsigned i = 0; i < stub_func_list.size(); i++) {
void *sub_device_func = nullptr;
rt_ret = rtGetAddrByFun(stub_func_list[i], &sub_device_func);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtGetAddrByFun failed. error: 0x%X", rt_ret);
return FAILED;)
GELOGD("SKT: fuseKernels subFunc %p, device func address %p", stub_func_list[i], sub_device_func);
// store two uint64_t address
// address divided by 4 because of 32bits encoding, call offset will *4 when calculating
nav_table[i * 2] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(sub_device_func)) / 4;
GELOGD("SKT: CALL offet %lu", nav_table[i * 2]);
nav_table[i * 2 + 1] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_list[i]));
GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]);
}
rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;)
rt_ret =
rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret);
GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;)
// Create the necessary metadata for the super kernel
h = new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim);
} }
rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;)
rt_ret =
rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret);
GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;)
// Create the necessary metadata for the super kernel
h =
std::unique_ptr<skt::SuperKernel>(new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim));
return SUCCESS; return SUCCESS;
} }
} // namespace skt } // namespace skt


+ 2
- 1
src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h View File

@@ -29,6 +29,7 @@ class SuperKernelFactory {
void *func_ptr_ = nullptr; void *func_ptr_ = nullptr;
void *handle_ = nullptr; void *handle_ = nullptr;
std::string sk_stub_name_ = "_Z21super_kernel_templatePmm"; std::string sk_stub_name_ = "_Z21super_kernel_templatePmm";
const char *use_physical_address_ = getenv("GE_USE_PHYSICAL_ADDRESS");
bool is_init_ = false; bool is_init_ = false;
SuperKernelFactory(){}; SuperKernelFactory(){};
~SuperKernelFactory() { ~SuperKernelFactory() {
@@ -47,7 +48,7 @@ class SuperKernelFactory {
Status Init(); Status Init();
Status Uninitialize(); Status Uninitialize();
Status FuseKernels(const std::vector<void *> &stub_func_list, const std::vector<void *> &args_addr_list, Status FuseKernels(const std::vector<void *> &stub_func_list, const std::vector<void *> &args_addr_list,
uint32_t block_dim, std::unique_ptr<skt::SuperKernel> &h);
uint32_t block_dim, SuperKernel *&h);
}; };
} // namespace skt } // namespace skt
} // namespace ge } // namespace ge


+ 0
- 2
src/ge/graph/load/new_model_manager/task_info/task_info.h View File

@@ -72,8 +72,6 @@ class TaskInfo {


virtual uint32_t GetTaskID() { return 0xFFFFFFFF; } virtual uint32_t GetTaskID() { return 0xFFFFFFFF; }


virtual bool CallSaveDumpInfo() { return false; }

virtual uint32_t GetStreamId() { return 0xFFFFFFFF; } virtual uint32_t GetStreamId() { return 0xFFFFFFFF; }


virtual uintptr_t GetDumpArgs() { return 0; } virtual uintptr_t GetDumpArgs() { return 0; }


+ 1
- 1
src/ge/graph/load/new_model_manager/task_info/task_info_factory.h View File

@@ -86,5 +86,5 @@ class TaskInfoFactory {
return ptr; \ return ptr; \
} \ } \
TaskInfoFactory::Registerar g_##type##_Task_Info_Creator(type, Creator_##type##_Task_Info); TaskInfoFactory::Registerar g_##type##_Task_Info_Creator(type, Creator_##type##_Task_Info);
} // namespace ge
}; // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_TASK_INFO_FACTORY_H_ #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_TASK_INFO_FACTORY_H_

+ 6
- 0
src/ge/graph/load/new_model_manager/zero_copy_task.cc View File

@@ -129,6 +129,12 @@ Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, const DataBuffer &data,
} }


auto dst_addr = static_cast<uint8_t *>(data.data); auto dst_addr = static_cast<uint8_t *>(data.data);
auto dst_size = static_cast<uint64_t>(data.length);
if (ModelUtils::ConvertVirtualAddressToPhysical(dst_addr, dst_size, dst_addr) != SUCCESS) {
GELOGE(FAILED, "[ZCPY] Convert virtual address to physical for dst_addr failed.");
return FAILED;
}

GELOGI("[ZCPY] %s update task, args: %p, size: %zu, offset: %zu, addr: 0x%lx, length: %u", name_.c_str(), GELOGI("[ZCPY] %s update task, args: %p, size: %zu, offset: %zu, addr: 0x%lx, length: %u", name_.c_str(),
args_addr_, args_size_, offset, addr, data.length); args_addr_, args_size_, offset, addr, data.length);
*(uintptr_t *)(args_info + offset) = reinterpret_cast<uintptr_t>(dst_addr); *(uintptr_t *)(args_info + offset) = reinterpret_cast<uintptr_t>(dst_addr);


+ 175
- 0
src/ge/graph/load/output/output.cc View File

@@ -0,0 +1,175 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/load/output/output.h"

#include <memory.h>

#include "common/properties_manager.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/manager/graph_var_manager.h"
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/tensor_utils.h"

namespace ge {
Output::Output(const OpDescPtr &op_desc, DavinciModel *model)
: base_(nullptr),
var_base_(nullptr),
logic_base_(0),
logic_var_base_(0),
model_(model),
op_desc_(op_desc),
input_num_(0) {}

Output::~Output() {
var_base_ = nullptr;
base_ = nullptr;
model_ = nullptr;
}

///
/// @ingroup domi
/// @brief Initialize input/output params
/// @return Status
///
Status Output::Init() {
if (op_desc_ == nullptr || model_ == nullptr) {
GELOGE(INTERNAL_ERROR, "The op_desc_ or model_ is nullptr.");
return INTERNAL_ERROR;
}

base_ = model_->MemBase();
var_base_ = model_->VarMemBase();
logic_base_ = model_->GetRtBaseAddr();
logic_var_base_ = model_->GetRtVarAddr();

input_num_ = op_desc_->GetInputsSize();
v_input_size_.clear();
v_input_data_addr_.clear();

auto input_vector = op_desc_->GetInputOffset();
if (input_num_ != input_vector.size()) {
GELOGE(INTERNAL_ERROR, "input desc size: %zu != input offset size: %zu.", input_num_, input_vector.size());
return INTERNAL_ERROR;
}

for (size_t i = 0; i < input_num_; i++) {
int64_t tensor_size = 0;
auto input_desc = op_desc_->GetInputDescPtr(i);
GE_CHECK_NOTNULL(input_desc);
Status ret = TensorUtils::GetSize(*input_desc, tensor_size);
if (ret != GRAPH_SUCCESS) {
GELOGE(ret, "Get size from TensorDesc failed, op : %s, input index : %zu", op_desc_->GetName().c_str(), i);
return ret;
}
v_input_size_.push_back(tensor_size);

if (VarManager::Instance(model_->SessionId())->IsVarAddr(input_vector[i])) {
v_input_data_addr_.push_back(static_cast<uint8_t *>(var_base_ + input_vector[i] - logic_var_base_));
} else {
v_input_data_addr_.push_back(static_cast<uint8_t *>(base_ + input_vector[i]));
}
}

GELOGI("Init output:%lu, %lu, %lu", input_num_, v_input_size_.size(), v_input_data_addr_.size());

return SUCCESS;
}

///
/// @ingroup domi
/// @brief Copy Op Output to user space.
/// @brief when model running, Add one DataOp as input node, Add one Output Op as output node.
/// @return Status
///
Status Output::CopyResult(OutputData &rslt, uint32_t data_begin, uint32_t &data_index, bool support_mem_share) {
uint32_t data_count = 0;
if (input_num_ > rslt.blobs.size() - data_begin) {
GELOGE(FAILED, "Tensor num %zu, data_buf num: %zu.", input_num_, rslt.blobs.size() - data_begin);
return FAILED;
} else if (input_num_ < rslt.blobs.size() - data_begin) {
GELOGW("Tensor num %zu, data_buf num: %zu.", input_num_, rslt.blobs.size() - data_begin);
}

for (size_t i = 0; i < input_num_; i++) {
DataBuffer data_buf = rslt.blobs[data_begin + data_count];
Status ret = SetDataBuf(data_buf, data_count, i, support_mem_share);
if (ret != SUCCESS) {
GELOGE(ret, "Copy data to host error. index: %zu", i);
return ret;
}
data_index = data_begin + data_count;
}

return SUCCESS;
}

Status Output::SetDataBuf(DataBuffer &data_buf, uint32_t &data_count, size_t i, bool support_mem_share) {
if (data_buf.length == 0) {
++data_count;
GELOGD("Length of data_buffer is zero, No need to copy. output op : %s, output tensor index : %zu!",
op_desc_->GetName().c_str(), i);
return SUCCESS;
}

auto tensor_desc = op_desc_->GetInputDescPtr(static_cast<uint32_t>(i));
if (tensor_desc == nullptr) {
GELOGE(FAILED, "tensor_desc is null");
return FAILED;
}

if (data_buf.isDataSupportMemShare && support_mem_share) {
GELOGI("No need to copy input data, user's output data buffer can be shared.");
} else {
// Copy result to Databuf
int64_t size = v_input_size_[i];
GELOGI("Tensor data size before: %ld", size);

graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(*tensor_desc, size);
if (graph_status != ge::GRAPH_SUCCESS) {
GELOGE(graph_status, "GetTensorSizeInBytes failed!");
return FAILED;
}

if (data_buf.length < size) {
GELOGE(FAILED, "Tensor data size: %ld data_buf length: %ld", size, data_buf.length);
return FAILED;
} else if (data_buf.length > size) {
GELOGW("Tensor data size: %ld data_buf length: %ld", size, data_buf.length);
}

rtError_t rt_ret = rtMemcpy(data_buf.data, size, v_input_data_addr_[i], size, RT_MEMCPY_DEVICE_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtmemcpy error");
return FAILED;
}
GELOGI("Tensor data size: %ld data_buf length: %ld", size, data_buf.length);
}

++data_count;
GELOGD("Successfully copy the output tensor memory to buffer, output op : %s, output tensor index : %zu!",
op_desc_->GetName().c_str(), i);

return SUCCESS;
}

void Output::GetOutputData(vector<void *> &v_data_addr, vector<int64_t> &v_data_size) {
for (size_t i = 0; i < input_num_; ++i) {
v_data_addr.push_back(v_input_data_addr_[i]);
v_data_size.push_back(v_input_size_[i]);
}
}
} // namespace ge

+ 94
- 0
src/ge/graph/load/output/output.h View File

@@ -0,0 +1,94 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_LOAD_OUTPUT_OUTPUT_H_
#define GE_GRAPH_LOAD_OUTPUT_OUTPUT_H_

#include <string>
#include <vector>

#include "common/debug/log.h"
#include "common/op/attr_value_util.h"
#include "common/op/ge_op_utils.h"
#include "common/types.h"
#include "common/util.h"
#include "common/ge_types.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/op_desc.h"
#include "graph/debug/ge_attr_define.h"

namespace ge {
using std::string;
using std::vector;

// The base class for all op
class Output {
public:
Output(const OpDescPtr &op_desc, DavinciModel *model);
virtual ~Output();

///
/// @ingroup domi
/// @brief Initialize input/output params
/// @return Status
///
virtual Status Init();

///
/// @ingroup domi
/// @brief Copy Op Output to user space.
/// @brief when model running, Add one DataOp as input node, Add one Output Op as output node.
/// @return Status
///
virtual Status CopyResult(OutputData &rslt, uint32_t data_begin, uint32_t &data_index, bool support_mem_share);

///
/// @ingroup domi
/// @brief Trans Output data to fp16
/// @return Status
///
Status SetDataBuf(DataBuffer &data_buf, uint32_t &data_count, size_t i, bool support_mem_share);

///
/// @ingroup domi
/// @brief Get Output data and size.
/// @return void
///
void GetOutputData(vector<void *> &v_data_addr, vector<int64_t> &v_data_size);

// Copy assignment operator and copy constructor are deleted
Output &operator=(const Output &output) = delete;
Output(const Output &output) = delete;

protected:
// Model's base address
uint8_t *base_;
uint8_t *var_base_;
uint64_t logic_base_;
uint64_t logic_var_base_;
// The DavinciModel which ops belong to
DavinciModel *model_;

ConstOpDescPtr op_desc_;

// Input descriptions
size_t input_num_;
vector<void *> v_input_data_addr_; // init as:buf_base + op_def_->input(i));
vector<int64_t> v_input_size_;
};
} // namespace ge

#endif // GE_GRAPH_LOAD_OUTPUT_OUTPUT_H_

+ 7
- 3
src/ge/graph/manager/graph_caching_allocator.cc View File

@@ -34,6 +34,9 @@ const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize,
26 * kGByteSize}; 26 * kGByteSize};


static bool BlockComparator(const Block *left, const Block *right) { static bool BlockComparator(const Block *left, const Block *right) {
if (left->device_id != right->device_id) {
return left->device_id < right->device_id;
}
if (left->size != right->size) { if (left->size != right->size) {
return left->size < right->size; return left->size < right->size;
} }
@@ -264,20 +267,20 @@ Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) {
return ge::FAILED; return ge::FAILED;
} }
} }
if (AddToBlockBin(memory_addr, memory_size, device_id) != ge::SUCCESS) {
if (AddToBlockBin(memory_addr, memory_size) != ge::SUCCESS) {
(void)memory_allocator_->FreeMemory(memory_addr); (void)memory_allocator_->FreeMemory(memory_addr);
return ge::FAILED; return ge::FAILED;
} }
return ge::SUCCESS; return ge::SUCCESS;
} }


Status CachingAllocator::AddToBlockBin(uint8_t *ptr, size_t size, uint32_t device_id) {
Status CachingAllocator::AddToBlockBin(uint8_t *ptr, size_t size) {
BlockBin *bin = GetBlockBin(size); BlockBin *bin = GetBlockBin(size);
if (bin == nullptr) { if (bin == nullptr) {
GELOGE(ge::FAILED, "Get block bin failed size = %zu", size); GELOGE(ge::FAILED, "Get block bin failed size = %zu", size);
return ge::FAILED; return ge::FAILED;
} }
Block *block = new (std::nothrow) Block(device_id, size, bin, nullptr);
Block *block = new (std::nothrow) Block(0, size, bin, nullptr);
if (block == nullptr) { if (block == nullptr) {
GELOGE(ge::FAILED, "Alloc block failed size = %zu", size); GELOGE(ge::FAILED, "Alloc block failed size = %zu", size);
return ge::FAILED; return ge::FAILED;
@@ -336,4 +339,5 @@ void CachingAllocator::FreeBlockBins() {
} }
} }
} }

} // namespace ge } // namespace ge

+ 5
- 7
src/ge/graph/manager/graph_caching_allocator.h View File

@@ -32,6 +32,7 @@
#include "runtime/mem.h" #include "runtime/mem.h"


namespace ge { namespace ge {

constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes
constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold
constexpr size_t kKByteSize = 1024; constexpr size_t kKByteSize = 1024;
@@ -68,10 +69,6 @@ class CachingAllocator {
public: public:
explicit CachingAllocator(rtMemType_t memory_type); explicit CachingAllocator(rtMemType_t memory_type);


CachingAllocator(const CachingAllocator &) = delete;

CachingAllocator &operator=(const CachingAllocator &) = delete;

virtual ~CachingAllocator() = default; virtual ~CachingAllocator() = default;


/// ///
@@ -140,10 +137,9 @@ class CachingAllocator {
/// @brief add memory to right bin based on size /// @brief add memory to right bin based on size
/// @param [in] memory ptr /// @param [in] memory ptr
/// @param [in] memory size /// @param [in] memory size
/// @param [in] device_id device id
/// @return Status result of function /// @return Status result of function
/// ///
Status AddToBlockBin(uint8_t *ptr, size_t size, uint32_t device_id);
Status AddToBlockBin(uint8_t *ptr, size_t size);


/// ///
/// @ingroup ge_graph /// @ingroup ge_graph
@@ -210,5 +206,7 @@ class CachingAllocator {
// block bins by different block size // block bins by different block size
BlockBin *free_block_bins_[kNumBins]; BlockBin *free_block_bins_[kNumBins];
}; };
} // namespace ge

}; // namespace ge

#endif // GE_GRAPH_MANAGER_GRAPH_CACHING_ALLOCATOR_H_ #endif // GE_GRAPH_MANAGER_GRAPH_CACHING_ALLOCATOR_H_

+ 36
- 57
src/ge/graph/manager/graph_manager.cc View File

@@ -57,6 +57,7 @@
#include "graph/passes/flow_ctrl_pass.h" #include "graph/passes/flow_ctrl_pass.h"
#include "graph/passes/hccl_group_pass.h" #include "graph/passes/hccl_group_pass.h"
#include "graph/passes/hccl_memcpy_pass.h" #include "graph/passes/hccl_memcpy_pass.h"
#include "graph/passes/identify_reference_pass.h"
#include "graph/passes/identity_pass.h" #include "graph/passes/identity_pass.h"
#include "graph/passes/iterator_op_pass.h" #include "graph/passes/iterator_op_pass.h"
#include "graph/passes/link_gen_mask_nodes_pass.h" #include "graph/passes/link_gen_mask_nodes_pass.h"
@@ -73,9 +74,7 @@
#include "graph/passes/switch_data_edges_bypass.h" #include "graph/passes/switch_data_edges_bypass.h"
#include "graph/passes/switch_dead_branch_elimination.h" #include "graph/passes/switch_dead_branch_elimination.h"
#include "graph/passes/switch_logic_remove_pass.h" #include "graph/passes/switch_logic_remove_pass.h"
#include "graph/passes/merge_to_stream_merge_pass.h"
#include "graph/passes/switch_to_stream_switch_pass.h"
#include "graph/passes/attach_stream_label_pass.h"
#include "graph/passes/switch_op_pass.h"
#include "graph/passes/transop_breadth_fusion_pass.h" #include "graph/passes/transop_breadth_fusion_pass.h"
#include "graph/passes/transop_depth_fusion_pass.h" #include "graph/passes/transop_depth_fusion_pass.h"
#include "graph/passes/transop_nearby_allreduce_fusion_pass.h" #include "graph/passes/transop_nearby_allreduce_fusion_pass.h"
@@ -84,7 +83,6 @@
#include "graph/passes/transpose_transdata_pass.h" #include "graph/passes/transpose_transdata_pass.h"
#include "graph/passes/variable_op_pass.h" #include "graph/passes/variable_op_pass.h"
#include "graph/passes/variable_prepare_op_pass.h" #include "graph/passes/variable_prepare_op_pass.h"
#include "graph/passes/ref_identity_delete_op_pass.h"
#include "graph/passes/variable_ref_delete_op_pass.h" #include "graph/passes/variable_ref_delete_op_pass.h"
#include "graph/passes/variable_ref_useless_control_out_delete_pass.h" #include "graph/passes/variable_ref_useless_control_out_delete_pass.h"
#include "graph/utils/tensor_adapter.h" #include "graph/utils/tensor_adapter.h"
@@ -349,13 +347,12 @@ Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_gr
return SUCCESS; return SUCCESS;
} }


#define GM_RUN_AND_DUMP_PERF(name, func, ...) \
#define GM_RUN_AND_DUMP(name, func, ...) \
do { \ do { \
GE_RUN_PERF(GraphManager, func, __VA_ARGS__); \
GE_RUN(GraphManager, func, __VA_ARGS__); \
GE_DUMP(compute_graph, "PreRunAfter" name); \ GE_DUMP(compute_graph, "PreRunAfter" name); \
GELOGI("Run %s on graph %s(%u) success.", name, compute_graph->GetName().c_str(), graph_node->GetGraphId()); \ GELOGI("Run %s on graph %s(%u) success.", name, compute_graph->GetName().c_str(), graph_node->GetGraphId()); \
} while (0) } while (0)

Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs, Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs,
GeRootModelPtr &ge_root_model, uint64_t session_id) { GeRootModelPtr &ge_root_model, uint64_t session_id) {
GE_CHECK_NOTNULL(graph_node); GE_CHECK_NOTNULL(graph_node);
@@ -368,30 +365,30 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
compute_graph->GetName().c_str()); compute_graph->GetName().c_str());
GE_DUMP(compute_graph, "PreRunBegin"); GE_DUMP(compute_graph, "PreRunBegin");


GM_RUN_AND_DUMP_PERF("OptimizeGraphPrepare", graph_optimize_.OptimizeOriginalGraphForQuantize, compute_graph);
GM_RUN_AND_DUMP_PERF("HandleSummaryOp", graph_optimize_.HandleSummaryOp, compute_graph);
GM_RUN_AND_DUMP_PERF("Prepare", graph_preparer_.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph,
session_id);
GM_RUN_AND_DUMP_PERF("OptimizeOriginalGraph", graph_optimize_.OptimizeOriginalGraph, compute_graph);
GM_RUN_AND_DUMP("OptimizeGraphPrepare", graph_optimize_.OptimizeOriginalGraphForQuantize, compute_graph);
GM_RUN_AND_DUMP("HandleSummaryOp", graph_optimize_.HandleSummaryOp, compute_graph);
GM_RUN_AND_DUMP("Prepare", graph_preparer_.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph,
session_id);
GM_RUN_AND_DUMP("OptimizeOriginalGraph", graph_optimize_.OptimizeOriginalGraph, compute_graph);


GM_RUN_AND_DUMP_PERF("PrepareRunningFormatRefiner", graph_preparer_.PrepareRunningFormatRefiner);
GM_RUN_AND_DUMP_PERF("RefineRunningFormat", graph_optimize_.OptimizeOriginalGraphJudgeInsert, compute_graph);
GM_RUN_AND_DUMP("PrepareRunningFormatRefiner", graph_preparer_.PrepareRunningFormatRefiner);
GM_RUN_AND_DUMP("RefineRunningFormat", graph_optimize_.OptimizeOriginalGraphJudgeInsert, compute_graph);
GE_RUN(GraphManager, graph_preparer_.RecordAIPPInfo, compute_graph); GE_RUN(GraphManager, graph_preparer_.RecordAIPPInfo, compute_graph);
if (IsTailingOptimization()) { if (IsTailingOptimization()) {
GM_RUN_AND_DUMP_PERF("OptimizeSwitchOp", graph_preparer_.SwitchOpOptimize, compute_graph);
GM_RUN_AND_DUMP("OptimizeSwitchOp", graph_preparer_.SwitchOpOptimize, compute_graph);
} }
GM_RUN_AND_DUMP_PERF("Optimize1", OptimizeStage1, compute_graph);
GM_RUN_AND_DUMP_PERF("InferShape2", compute_graph->InferShapeInNeed);
GM_RUN_AND_DUMP("Optimize1", OptimizeStage1, compute_graph);
GM_RUN_AND_DUMP("InferShape2", compute_graph->InferShapeInNeed);
const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION");
if (unknown_shape_skip != nullptr) { if (unknown_shape_skip != nullptr) {
PassManager graph_pass; PassManager graph_pass;
GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass))
GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); GE_CHK_STATUS_RET(graph_pass.Run(compute_graph));
} }
GE_CHK_STATUS_RET(graph_optimize_.IdentifyReference(compute_graph), "Identify reference failed.");
GM_RUN_AND_DUMP_PERF("OptimizeSubgraph", OptimizeSubgraph, graph_node, compute_graph, session_id);
GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph);
GM_RUN_AND_DUMP_PERF("Build", Build, graph_node, compute_graph, ge_root_model, session_id);
GM_RUN_AND_DUMP("OptimizeSubgraph", OptimizeSubgraph, graph_node, compute_graph, session_id);
GM_RUN_AND_DUMP("Optimize2", OptimizeStage2, compute_graph);
GM_RUN_AND_DUMP("Build", Build, graph_node, compute_graph, ge_root_model, session_id);


// when set incre build, save om model and var manager // when set incre build, save om model and var manager
GeModelPtr ge_model = nullptr; GeModelPtr ge_model = nullptr;
@@ -400,7 +397,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
GELOGW("Fail to save cache."); GELOGW("Fail to save cache.");
} }
// release rts generate context // release rts generate context
RtContextUtil::GetInstance().DestroyRtContexts(session_id);
RtContextUtil::GetInstance().DestroyrtContexts();
GEEVENT("[GEPERFTRACE] GE PreRun End"); GEEVENT("[GEPERFTRACE] GE PreRun End");
return SUCCESS; return SUCCESS;
} }
@@ -474,7 +471,7 @@ Status GraphManager::LoadGraph(const GeRootModelPtr &ge_root_model, const GraphN
} }
GE_TIMESTAMP_START(LoadGraph); GE_TIMESTAMP_START(LoadGraph);
Status ret = GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, model_listener); Status ret = GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, model_listener);
GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraph");
GE_TIMESTAMP_END(LoadGraph, "GraphManager::LoadGraph");
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "[StartForRunGraph] LoadGraph Failed"); GELOGE(ret, "[StartForRunGraph] LoadGraph Failed");
graph_node->SetRunFlag(false); graph_node->SetRunFlag(false);
@@ -637,7 +634,7 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vector<GeTenso
graph_optimize_.TranFrameOp(compute_graph_tmp); graph_optimize_.TranFrameOp(compute_graph_tmp);
} }


GeRootModelPtr ge_root_model = nullptr;
GeRootModelPtr ge_root_model;
ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id); ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "[RunGraph] StartForRunGraph failed!"); GELOGE(ret, "[RunGraph] StartForRunGraph failed!");
@@ -1616,6 +1613,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
SwitchDeadBranchElimination switch_dead_branch_elimination; SwitchDeadBranchElimination switch_dead_branch_elimination;
SwitchLogicRemovePass switch_logic_remove_pass; SwitchLogicRemovePass switch_logic_remove_pass;
MergePass merge_pass; MergePass merge_pass;
IdentifyReferencePass identify_reference_pass;
CastRemovePass cast_remove_pass; CastRemovePass cast_remove_pass;
TransposeTransDataPass transpose_transdata_pass; TransposeTransDataPass transpose_transdata_pass;
TransOpSymmetryEliminationPass symmetry_elimination_pass; TransOpSymmetryEliminationPass symmetry_elimination_pass;
@@ -1624,6 +1622,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
names_to_passes.emplace_back("SwitchDeadBranchElimination", &switch_dead_branch_elimination); names_to_passes.emplace_back("SwitchDeadBranchElimination", &switch_dead_branch_elimination);
names_to_passes.emplace_back("SwitchLogicRemovePass", &switch_logic_remove_pass); names_to_passes.emplace_back("SwitchLogicRemovePass", &switch_logic_remove_pass);
names_to_passes.emplace_back("MergePass", &merge_pass); names_to_passes.emplace_back("MergePass", &merge_pass);
names_to_passes.emplace_back("IdentifyReferencePass", &identify_reference_pass);
names_to_passes.emplace_back("CastRemovePass", &cast_remove_pass); names_to_passes.emplace_back("CastRemovePass", &cast_remove_pass);
names_to_passes.emplace_back("TransposeTransDataPass", &transpose_transdata_pass); names_to_passes.emplace_back("TransposeTransDataPass", &transpose_transdata_pass);
names_to_passes.emplace_back("TransOpSymmetryEliminationPass", &symmetry_elimination_pass); names_to_passes.emplace_back("TransOpSymmetryEliminationPass", &symmetry_elimination_pass);
@@ -1639,32 +1638,14 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
GELOGE(ret, "Run passes when OptimizeStage1_2 failed, ret:%u.", ret); GELOGE(ret, "Run passes when OptimizeStage1_2 failed, ret:%u.", ret);
return ret; return ret;
} }
// Calculate Op/Fe constantfolding cost
uint64_t op_constant_folding_cost = 0;
for (auto &it : constant_folding_pass.GetOpConstantFoldingPerfStatistic()) {
op_constant_folding_cost += it.second.second;
GELOGI("The time cost of %s constant folding is [%lu] micro second, calls is %lu.", it.first.c_str(),
it.second.second, it.second.first);
}
GEEVENT("[GEPERFTRACE] The time cost of extern constant folding is [%lu] micro second.", op_constant_folding_cost);
for (auto &it : constant_folding_pass.GetGeConstantFoldingPerfStatistic()) {
op_constant_folding_cost += it.second.second;
GELOGI("The time cost of %s constant folding is [%lu] micro second, calls is %lu.", it.first.c_str(),
it.second.second, it.second.first);
}


GraphUtils::DumpGEGraphToOnnx(*compute_graph, "OptimizeStage1_2"); GraphUtils::DumpGEGraphToOnnx(*compute_graph, "OptimizeStage1_2");
PassManager graph_pass; PassManager graph_pass;
// the prune pass should between SwitchPass and SwitchToStreamSwitchPass
// the prune pass should between SwtichPass and SwitchOpPass
GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::PrunePass", new (std::nothrow) PrunePass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::PrunePass", new (std::nothrow) PrunePass))
GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::NextIterationPass", new (std::nothrow) NextIterationPass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::NextIterationPass", new (std::nothrow) NextIterationPass))
GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ControlTriggerPass", new (std::nothrow) ControlTriggerPass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ControlTriggerPass", new (std::nothrow) ControlTriggerPass))
GE_CHK_STATUS_RET(
graph_pass.AddPass("OptimizeStage1_3::MergeToStreamMergePass", new (std::nothrow) MergeToStreamMergePass))
GE_CHK_STATUS_RET(
graph_pass.AddPass("OptimizeStage1_3::SwitchToStreamSwitchPass", new (std::nothrow) SwitchToStreamSwitchPass))
GE_CHK_STATUS_RET(
graph_pass.AddPass("OptimizeStage1_3::AttachStreamLabelPass", new (std::nothrow) AttachStreamLabelPass))
GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::SwitchOpPass", new (std::nothrow) SwitchOpPass))
GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::IteratorOpPass", new (std::nothrow) IteratorOpPass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::IteratorOpPass", new (std::nothrow) IteratorOpPass))
GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::VariableRefUselessControlOutDeletePass", GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::VariableRefUselessControlOutDeletePass",
new (std::nothrow) VariableRefUselessControlOutDeletePass)) new (std::nothrow) VariableRefUselessControlOutDeletePass))
@@ -1679,7 +1660,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {


NamesToPass identity_remove_pass; NamesToPass identity_remove_pass;
GE_TIMESTAMP_START(identity_remove_pass); GE_TIMESTAMP_START(identity_remove_pass);
IdentityPass identity_force_pass(true); // after SwitchToStreamSwitchPass
IdentityPass identity_force_pass(true); // after SwitchOpPass
identity_remove_pass.emplace_back("IdentityPass", &identity_force_pass); identity_remove_pass.emplace_back("IdentityPass", &identity_force_pass);
ret = GEPass(compute_graph).Run(identity_remove_pass); ret = GEPass(compute_graph).Run(identity_remove_pass);
GE_TIMESTAMP_END(identity_remove_pass, "GraphPrepare::IdentityRemovePass"); GE_TIMESTAMP_END(identity_remove_pass, "GraphPrepare::IdentityRemovePass");
@@ -1739,8 +1720,6 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {


GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::MultiBatchPass", GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::MultiBatchPass",
new (std::nothrow) MultiBatchPass)) new (std::nothrow) MultiBatchPass))
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::RefIdentityDeleteOpPass",
new (std::nothrow) RefIdentityDeleteOpPass))
// the value of the attr is the original variable name the ref-variable ref from. // the value of the attr is the original variable name the ref-variable ref from.
// The attr will be used when allocating memory, // The attr will be used when allocating memory,
// the node marked attr will be output to a variable instead of new-allocated memory. // the node marked attr will be output to a variable instead of new-allocated memory.
@@ -1798,6 +1777,8 @@ Status GraphManager::OptimizeAfterMergeSubGraph(ge::ComputeGraphPtr &compute_gra


GEPass ge_passes_for_shape(compute_graph); GEPass ge_passes_for_shape(compute_graph);
NamesToPass names_to_passes_for_shape; NamesToPass names_to_passes_for_shape;
IdentifyReferencePass identify_reference_pass;
names_to_passes_for_shape.emplace_back("IdentifyReferencePass", &identify_reference_pass);
CastRemovePass cast_remove_pass; CastRemovePass cast_remove_pass;
names_to_passes_for_shape.emplace_back("CastRemovePass", &cast_remove_pass); names_to_passes_for_shape.emplace_back("CastRemovePass", &cast_remove_pass);
TransposeTransDataPass transpose_transdata_pass; TransposeTransDataPass transpose_transdata_pass;
@@ -1885,10 +1866,7 @@ Status GraphManager::OptimizeAfterMergeSubGraph(ge::ComputeGraphPtr &compute_gra
GE_CHK_STATUS_RET(ret, "Remove isolated Constant failed, ret:%d.", ret); GE_CHK_STATUS_RET(ret, "Remove isolated Constant failed, ret:%d.", ret);


PassManager pass_for_optimize; PassManager pass_for_optimize;
const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION");
if (unknown_shape_skip == nullptr) {
GE_CHK_STATUS_RET(pass_for_optimize.AddPass("SubgraphPass", new (std::nothrow) SubgraphPass));
}
GE_CHK_STATUS_RET(pass_for_optimize.AddPass("SubgraphPass", new (std::nothrow) SubgraphPass));
GE_CHK_STATUS_RET(pass_for_optimize.AddPass("MultiBatchPass", new (std::nothrow) MultiBatchPass)); GE_CHK_STATUS_RET(pass_for_optimize.AddPass("MultiBatchPass", new (std::nothrow) MultiBatchPass));
GE_CHK_STATUS_RET(pass_for_optimize.AddPass("CompileNodesPass", new (std::nothrow) CompileNodesPass)); GE_CHK_STATUS_RET(pass_for_optimize.AddPass("CompileNodesPass", new (std::nothrow) CompileNodesPass));
GE_TIMESTAMP_START(pass_for_optimize); GE_TIMESTAMP_START(pass_for_optimize);
@@ -1928,7 +1906,7 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G
GE_CHECK_NOTNULL(graph_node->graph_run_async_listener_); GE_CHECK_NOTNULL(graph_node->graph_run_async_listener_);
Status ret = Status ret =
GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, graph_node->graph_run_async_listener_); GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, graph_node->graph_run_async_listener_);
GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraphAsync");
GE_TIMESTAMP_END(LoadGraph, "GraphManager::LoadGraphAsync");
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "[LoadGraphAsync] LoadGraphAsync Failed"); GELOGE(ret, "[LoadGraphAsync] LoadGraphAsync Failed");
graph_node->SetRunFlag(false); graph_node->SetRunFlag(false);
@@ -2331,21 +2309,21 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra
GELOGE(FAILED, "failed get dynamic shape partitioned flag on partitioned graph."); GELOGE(FAILED, "failed get dynamic shape partitioned flag on partitioned graph.");
return FAILED; return FAILED;
} }
GE_TIMESTAMP_EVENT_END(GraphPartitionDynamicShape, "OptimizeSubgraph::GraphPartitionDynamicShape");
GE_TIMESTAMP_END(GraphPartitionDynamicShape, "OptimizeSubgraph::GraphPartitionDynamicShape");
GE_TIMESTAMP_START(GraphPartition); GE_TIMESTAMP_START(GraphPartition);
ret = graph_partitioner_.Partition(compute_graph, GraphPartitioner::kPartitioning); ret = graph_partitioner_.Partition(compute_graph, GraphPartitioner::kPartitioning);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "Graph partition Failed"); GELOGE(ret, "Graph partition Failed");
return ret; return ret;
} }
GE_TIMESTAMP_EVENT_END(GraphPartition, "OptimizeSubgraph::Partition1");
GE_TIMESTAMP_END(GraphPartition, "OptimizeSubgraph::Partition1");
GE_TIMESTAMP_START(SetSubgraph); GE_TIMESTAMP_START(SetSubgraph);
ret = SetSubgraph(session_id, compute_graph); ret = SetSubgraph(session_id, compute_graph);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "Graph set subgraph Failed"); GELOGE(ret, "Graph set subgraph Failed");
return ret; return ret;
} }
GE_TIMESTAMP_EVENT_END(SetSubgraph, "OptimizeSubgraph::SetSubGraph");
GE_TIMESTAMP_END(SetSubgraph, "OptimizeSubgraph::SetSubGraph");


ComputeGraphPtr merged_compute_graph = nullptr; ComputeGraphPtr merged_compute_graph = nullptr;
std::vector<ComputeGraphPtr> merged_sub_graph_list; std::vector<ComputeGraphPtr> merged_sub_graph_list;
@@ -2364,7 +2342,7 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra
sub_graph->SetSessionID(session_id); sub_graph->SetSessionID(session_id);
sub_graph->SetGraphID(graph_node->GetGraphId()); sub_graph->SetGraphID(graph_node->GetGraphId());
} }
GE_TIMESTAMP_EVENT_END(MergeSubgraph, "OptimizeSubgraph::MergeSubGraph");
GE_TIMESTAMP_END(MergeSubgraph, "OptimizeSubgraph::MergeSubGraph");
GE_DUMP(merged_compute_graph, "mergedComputeGraph"); GE_DUMP(merged_compute_graph, "mergedComputeGraph");
compute_graph = merged_compute_graph; compute_graph = merged_compute_graph;
if (!AttrUtils::SetBool(*compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) { if (!AttrUtils::SetBool(*compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) {
@@ -2390,7 +2368,8 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp
} }


bool is_always_dump = false; bool is_always_dump = false;
if (!PropertiesManager::Instance().GetDumpProperties(session_id).GetDumpPath().empty()) {
PropertiesManager &properties_manager = PropertiesManager::Instance();
if (!properties_manager.GetDumpOutputPath().empty()) {
is_always_dump = true; is_always_dump = true;
} }




+ 1
- 1
src/ge/graph/manager/graph_manager.h View File

@@ -327,6 +327,6 @@ class GraphManager {


std::mutex run_mutex_; std::mutex run_mutex_;
}; };
} // namespace ge
}; // namespace ge


#endif // GE_GRAPH_MANAGER_GRAPH_MANAGER_H_ #endif // GE_GRAPH_MANAGER_GRAPH_MANAGER_H_

+ 1
- 1
src/ge/graph/manager/graph_mem_allocator.h View File

@@ -190,6 +190,6 @@ class MemManager {
std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_; std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_;
std::recursive_mutex allocator_mutex_; std::recursive_mutex allocator_mutex_;
}; };
} // namespace ge
}; // namespace ge


#endif // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_ #endif // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_

+ 3
- 4
src/ge/graph/manager/graph_var_manager.cc View File

@@ -91,7 +91,7 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen
std::string var_key = VarKey(var_name, tensor_desc); std::string var_key = VarKey(var_name, tensor_desc);
GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str()); GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str());
if (var_addr_mgr_map_.count(var_key) == 0) { if (var_addr_mgr_map_.count(var_key) == 0) {
uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() +
uint64_t logic_address = VarManager::Instance(0)->GetVarMemLogicBase() +
reinterpret_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); reinterpret_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address));
GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(), GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(),
TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(), TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(),
@@ -274,7 +274,7 @@ MemResource::MemResource() : total_size_(0), var_mem_size_(0) {}
Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) { Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) {
size = (size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize; size = (size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize;
uint64_t real_size = size; uint64_t real_size = size;
total_size_ = VarManager::Instance(session_id)->GetVarMemMaxSize();
total_size_ = VarManager::Instance(0)->GetVarMemMaxSize();
if (total_size_ < var_mem_size_) { if (total_size_ < var_mem_size_) {
GELOGE(PARAM_INVALID, "total_size_: %lu is smaller than var_mem_size_: %lu", total_size_, var_mem_size_); GELOGE(PARAM_INVALID, "total_size_: %lu is smaller than var_mem_size_: %lu", total_size_, var_mem_size_);
return PARAM_INVALID; return PARAM_INVALID;
@@ -684,8 +684,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty
if (mem_base == nullptr) { if (mem_base == nullptr) {
return nullptr; return nullptr;
} }
uint8_t *mem_addr =
logic_addr + reinterpret_cast<intptr_t>(mem_base) - VarManager::Instance(session_id_)->GetVarMemLogicBase();
uint8_t *mem_addr = logic_addr + reinterpret_cast<intptr_t>(mem_base) - VarManager::Instance(0)->GetVarMemLogicBase();
return mem_addr; return mem_addr;
} }




+ 1
- 1
src/ge/graph/manager/graph_var_manager.h View File

@@ -309,5 +309,5 @@ class VarManagerPool {
std::mutex var_manager_mutex_; std::mutex var_manager_mutex_;
map<uint64_t, VarManager *> var_manager_map_; map<uint64_t, VarManager *> var_manager_map_;
}; };
} // namespace ge
}; // namespace ge
#endif // GE_GRAPH_MANAGER_GRAPH_VAR_MANAGER_H_ #endif // GE_GRAPH_MANAGER_GRAPH_VAR_MANAGER_H_

+ 2
- 2
src/ge/graph/manager/model_manager/event_manager.h View File

@@ -92,6 +92,6 @@ class EventManager {
std::vector<rtEvent_t> event_list_; std::vector<rtEvent_t> event_list_;
bool inited_; bool inited_;
uint32_t current_idx_; uint32_t current_idx_;
}; // EventManager
} // namespace ge
}; // EventManager
}; // namespace ge
#endif // GE_GRAPH_MANAGER_MODEL_MANAGER_EVENT_MANAGER_H_ #endif // GE_GRAPH_MANAGER_MODEL_MANAGER_EVENT_MANAGER_H_

+ 8
- 10
src/ge/graph/manager/trans_var_data_utils.cc View File

@@ -397,11 +397,10 @@ Status TransVarDataUtils::SyncTensorToHost(const string &var_name, const ge::GeT


uint8_t *src_addr = nullptr; uint8_t *src_addr = nullptr;
GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, src_tensor_desc, &src_addr)); GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, src_tensor_desc, &src_addr));
uint8_t *mem_addr =
src_addr -
static_cast<int64_t>(reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) +
static_cast<int64_t>(
reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM)));
uint8_t *mem_addr = src_addr -
static_cast<int64_t>(reinterpret_cast<uintptr_t>(VarManager::Instance(0)->GetVarMemLogicBase())) +
static_cast<int64_t>(
reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM)));
GE_CHK_RT_RET(rtMallocHost(reinterpret_cast<void **>(host_addr), src_tensor_size)); GE_CHK_RT_RET(rtMallocHost(reinterpret_cast<void **>(host_addr), src_tensor_size));


GE_CHK_RT_RET(rtMemcpy(*host_addr, src_tensor_size, mem_addr, src_tensor_size, RT_MEMCPY_DEVICE_TO_HOST)); GE_CHK_RT_RET(rtMemcpy(*host_addr, src_tensor_size, mem_addr, src_tensor_size, RT_MEMCPY_DEVICE_TO_HOST));
@@ -414,11 +413,10 @@ Status TransVarDataUtils::SyncTensorToDevice(const string &var_name, const uint8
const ge::GeTensorDesc &dst_tensor_desc, uint64_t session_id) { const ge::GeTensorDesc &dst_tensor_desc, uint64_t session_id) {
uint8_t *dst_addr = nullptr; uint8_t *dst_addr = nullptr;
GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, dst_tensor_desc, &dst_addr)); GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, dst_tensor_desc, &dst_addr));
uint8_t *mem_addr =
dst_addr -
static_cast<int64_t>(reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) +
static_cast<int64_t>(
reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM)));
uint8_t *mem_addr = dst_addr -
static_cast<int64_t>(reinterpret_cast<uintptr_t>(VarManager::Instance(0)->GetVarMemLogicBase())) +
static_cast<int64_t>(
reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM)));
GE_CHK_RT_RET(rtMemcpy(mem_addr, addr_size, host_addr, addr_size, RT_MEMCPY_HOST_TO_DEVICE)); GE_CHK_RT_RET(rtMemcpy(mem_addr, addr_size, host_addr, addr_size, RT_MEMCPY_HOST_TO_DEVICE));


GELOGI("SyncTensorToDevice var_name %s, addr_size %u", var_name.c_str(), addr_size); GELOGI("SyncTensorToDevice var_name %s, addr_size %u", var_name.c_str(), addr_size);


+ 1
- 8
src/ge/graph/manager/util/hcom_util.cc View File

@@ -24,6 +24,7 @@
#include "graph/utils/type_utils.h" #include "graph/utils/type_utils.h"


namespace ge { namespace ge {

Status HcomOmeUtil::GetHcclDataType(const ge::ConstOpDescPtr &op_desc, Status HcomOmeUtil::GetHcclDataType(const ge::ConstOpDescPtr &op_desc,
std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos) { std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos) {
GE_CHECK_NOTNULL(op_desc); GE_CHECK_NOTNULL(op_desc);
@@ -100,12 +101,6 @@ Status HcomOmeUtil::GetHcomCount(const ge::ConstOpDescPtr &op_desc, hcclDataType
GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(i)); GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(i));
GE_CHK_STATUS_RET(ge::TensorUtils::GetSize(*op_desc->GetInputDescPtr(i), input_size), GE_CHK_STATUS_RET(ge::TensorUtils::GetSize(*op_desc->GetInputDescPtr(i), input_size),
"get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); "get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i);
// dynamic shape hccl op get size from output tensor desc
if (op_desc->HasAttr(ATTR_NAME_IS_UNKNOWN_SHAPE)) {
GE_CHECK_NOTNULL(op_desc->GetOutputDescPtr(i));
GE_CHK_STATUS_RET(ge::TensorUtils::GetSize(*op_desc->GetOutputDescPtr(i), input_size),
"get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i);
}


GE_IF_BOOL_EXEC( GE_IF_BOOL_EXEC(
op_desc->GetType() == HCOMREDUCESCATTER, int32_t rank_size = 0; op_desc->GetType() == HCOMREDUCESCATTER, int32_t rank_size = 0;
@@ -119,8 +114,6 @@ Status HcomOmeUtil::GetHcomCount(const ge::ConstOpDescPtr &op_desc, hcclDataType
total_size = total_size + block_size; continue;); total_size = total_size + block_size; continue;);


int64_t shape_size = op_desc->GetInputDescPtr(i)->GetShape().GetShapeSize(); int64_t shape_size = op_desc->GetInputDescPtr(i)->GetShape().GetShapeSize();
GELOGD("hcom util node %s inputsize %ld, shapesize %ld, datasize %d.", op_desc->GetName().c_str(), input_size,
shape_size, size);
GE_CHK_STATUS_RET(ge::CheckInt64Int32MulOverflow(shape_size, size), GE_CHK_STATUS_RET(ge::CheckInt64Int32MulOverflow(shape_size, size),
"Product of shape size and size beyond INT64_MAX"); "Product of shape size and size beyond INT64_MAX");
GE_IF_BOOL_EXEC(is_allgather, block_size = shape_size * size;); GE_IF_BOOL_EXEC(is_allgather, block_size = shape_size * size;);


+ 2
- 2
src/ge/graph/manager/util/hcom_util.h View File

@@ -144,6 +144,8 @@ class HcomOmeUtil {
/// ///
static Status GetHorovodInputs(const ge::ConstOpDescPtr &op_desc, static Status GetHorovodInputs(const ge::ConstOpDescPtr &op_desc,
std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos); std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos);

private:
/// ///
/// @ingroup domi_ome /// @ingroup domi_ome
/// @brief GetHcomCount /// @brief GetHcomCount
@@ -152,8 +154,6 @@ class HcomOmeUtil {
/// ///
static Status GetHcomCount(const ge::ConstOpDescPtr &op_desc, hcclDataType_t data_type, bool is_allgather, static Status GetHcomCount(const ge::ConstOpDescPtr &op_desc, hcclDataType_t data_type, bool is_allgather,
int &count); int &count);

private:
/// ///
/// @ingroup domi_ome /// @ingroup domi_ome
/// @brief GetHorovodCount /// @brief GetHorovodCount


+ 5
- 22
src/ge/graph/manager/util/rt_context_util.cc View File

@@ -19,30 +19,13 @@
#include "framework/common/debug/ge_log.h" #include "framework/common/debug/ge_log.h"


namespace ge { namespace ge {
void RtContextUtil::AddRtContext(uint64_t session_id, rtContext_t context) {
std::lock_guard<std::mutex> lock(ctx_mutex_);
rt_contexts_[session_id].emplace_back(context);
}

void RtContextUtil::DestroyRtContexts(uint64_t session_id) {
std::lock_guard<std::mutex> lock(ctx_mutex_);
auto &contexts = rt_contexts_[session_id];
DestroyRtContexts(session_id, contexts);
}

void RtContextUtil::DestroyAllRtContexts() {
std::lock_guard<std::mutex> lock(ctx_mutex_);
for (auto &ctx_pair : rt_contexts_) {
DestroyRtContexts(ctx_pair.first, ctx_pair.second);
}
rt_contexts_.clear();
}
void RtContextUtil::AddrtContext(rtContext_t context) { rtContexts_.emplace_back(context); }


void RtContextUtil::DestroyRtContexts(uint64_t session_id, std::vector<rtContext_t> &contexts) {
GELOGI("Runtime context handle number of session %lu is %zu.", session_id, contexts.size());
for (auto &rtContext : contexts) {
void RtContextUtil::DestroyrtContexts() {
GELOGI("The size of runtime context handle is %zu.", rtContexts_.size());
for (auto &rtContext : rtContexts_) {
(void)rtCtxDestroy(rtContext); (void)rtCtxDestroy(rtContext);
} }
contexts.clear();
rtContexts_.clear();
} }
} // namespace ge } // namespace ge

+ 3
- 10
src/ge/graph/manager/util/rt_context_util.h View File

@@ -18,8 +18,6 @@
#define GE_GRAPH_MANAGER_UTIL_RT_CONTEXT_UTIL_H_ #define GE_GRAPH_MANAGER_UTIL_RT_CONTEXT_UTIL_H_


#include <vector> #include <vector>
#include <map>
#include <mutex>


#include "runtime/context.h" #include "runtime/context.h"


@@ -31,14 +29,13 @@ class RtContextUtil {
return instance; return instance;
} }


void AddRtContext(uint64_t session_id, rtContext_t context);
void AddrtContext(rtContext_t context);


const rtContext_t GetNormalModeContext() const { return before_prerun_ctx_; } const rtContext_t GetNormalModeContext() const { return before_prerun_ctx_; }


void SetNormalModeContext(rtContext_t context) { before_prerun_ctx_ = context; } void SetNormalModeContext(rtContext_t context) { before_prerun_ctx_ = context; }


void DestroyRtContexts(uint64_t session_id);
void DestroyAllRtContexts();
void DestroyrtContexts();


RtContextUtil &operator=(const RtContextUtil &) = delete; RtContextUtil &operator=(const RtContextUtil &) = delete;
RtContextUtil(const RtContextUtil &RtContextUtil) = delete; RtContextUtil(const RtContextUtil &RtContextUtil) = delete;
@@ -47,12 +44,8 @@ class RtContextUtil {
RtContextUtil() = default; RtContextUtil() = default;
~RtContextUtil() {} ~RtContextUtil() {}


void DestroyRtContexts(uint64_t session_id, std::vector<rtContext_t> &contexts);

std::map<uint64_t, std::vector<rtContext_t>> rt_contexts_;
std::vector<rtContext_t> rtContexts_;
rtContext_t before_prerun_ctx_ = nullptr; rtContext_t before_prerun_ctx_ = nullptr;

std::mutex ctx_mutex_;
}; };
} // namespace ge } // namespace ge




+ 0
- 32
src/ge/graph/optimize/graph_optimize.cc View File

@@ -299,36 +299,4 @@ void GraphOptimize::TranFrameOp(ComputeGraphPtr &compute_graph) {
} }
} }
} }

Status GraphOptimize::IdentifyReference(ComputeGraphPtr &compute_graph) {
for (auto &node : compute_graph->GetAllNodes()) {
GE_CHECK_NOTNULL(node);
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
auto input_name_index = op_desc->GetAllInputName();
bool is_ref = false;
for (const auto &name_index : input_name_index) {
const int out_index = op_desc->GetOutputIndexByName(name_index.first);
if (out_index != -1) {
auto input_desc = op_desc->GetInputDesc(name_index.second);
input_desc.SetRefPortByIndex({name_index.second});
op_desc->UpdateInputDesc(name_index.second, input_desc);
GELOGI("SetRefPort: set op[%s] input desc[%u-%s] ref.", op_desc->GetName().c_str(), name_index.second,
name_index.first.c_str());
auto output_desc = op_desc->GetOutputDesc(static_cast<uint32_t>(out_index));
output_desc.SetRefPortByIndex({name_index.second});
op_desc->UpdateOutputDesc(static_cast<uint32_t>(out_index), output_desc);
GELOGI("SetRefPort: set op[%s] output desc[%u-%s] ref.", op_desc->GetName().c_str(), out_index,
name_index.first.c_str());
is_ref = true;
}
}
if (is_ref) {
AttrUtils::SetBool(op_desc, ATTR_NAME_REFERENCE, is_ref);
GELOGI("param [node] %s is reference node, set attribute %s to be true.", node->GetName().c_str(),
ATTR_NAME_REFERENCE.c_str());
}
}
return SUCCESS;
}
} // namespace ge } // namespace ge

+ 1
- 4
src/ge/graph/optimize/graph_optimize.h View File

@@ -67,9 +67,6 @@ class GraphOptimize {
// handle summary node before preRun graph // handle summary node before preRun graph
Status HandleSummaryOp(ComputeGraphPtr &compute_graph); Status HandleSummaryOp(ComputeGraphPtr &compute_graph);


// Identify reference node before optimize subgraph
Status IdentifyReference(ComputeGraphPtr &compute_graph);

void TranFrameOp(ComputeGraphPtr &compute_graph); void TranFrameOp(ComputeGraphPtr &compute_graph);


private: private:
@@ -88,5 +85,5 @@ class GraphOptimize {
std::map<uint32_t, std::map<string, size_t>> summary_output_indexes_ = {}; std::map<uint32_t, std::map<string, size_t>> summary_output_indexes_ = {};
std::string func_bin_path_; std::string func_bin_path_;
}; };
} // namespace ge
}; // namespace ge
#endif // GE_GRAPH_OPTIMIZE_GRAPH_OPTIMIZE_H_ #endif // GE_GRAPH_OPTIMIZE_GRAPH_OPTIMIZE_H_

+ 1
- 2
src/ge/graph/optimize/summary_optimize.cc View File

@@ -80,8 +80,7 @@ Status GraphOptimize::HandleSummaryOp(ComputeGraphPtr &compute_graph) {
del_nodes.emplace_back(node_ptr); del_nodes.emplace_back(node_ptr);
} }
} }
GE_IF_BOOL_EXEC(!summary_output_indexes.empty(),
summary_output_indexes_.insert({compute_graph->GetGraphID(), summary_output_indexes}));
summary_output_indexes_.insert({compute_graph->GetGraphID(), summary_output_indexes});


// add output nodes for summary // add output nodes for summary
std::vector<std::pair<NodePtr, int32_t>> out_nodes_info; std::vector<std::pair<NodePtr, int32_t>> out_nodes_info;


+ 52
- 55
src/ge/graph/partition/dynamic_shape_partition.cc View File

@@ -62,16 +62,15 @@ Status DynamicShapePartitioner::Partition() {
} }


GELOGD("Start dynamic shape partition graph %s.", root_graph_->GetName().c_str()); GELOGD("Start dynamic shape partition graph %s.", root_graph_->GetName().c_str());
REQUIRE_SUCCESS(MarkUnknownShapeNodes(), "Failed mark unknown shape nodes, root grah name:%s.",
root_graph_->GetName().c_str());
REQUIRE_SUCCESS(MarkUnknownShapeNodes(), "Failed mark unknown shape nodes.");
if (unknown_shape_nodes_.empty()) { if (unknown_shape_nodes_.empty()) {
GELOGD("Skip dynamic shape partition of graph %s as all nodes are known shape.", root_graph_->GetName().c_str()); GELOGD("Skip dynamic shape partition of graph %s as all nodes are known shape.", root_graph_->GetName().c_str());
REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false), REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false),
"Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str());
"Failed set dynamic shape partitioned flag on root graph.");
return SUCCESS; return SUCCESS;
} }
REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, true), REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, true),
"Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str());
"Failed set dynamic shape partitioned flag on root graph.");


DumpGraph("_Before_DSP"); DumpGraph("_Before_DSP");
auto status = PartitionImpl(); auto status = PartitionImpl();
@@ -108,21 +107,21 @@ void DynamicShapePartitioner::PruneUniqueClusters() {
} }


Status DynamicShapePartitioner::BuildPartitionFrame() { Status DynamicShapePartitioner::BuildPartitionFrame() {
for (const auto &cluster : unique_clusters_) {
for (auto cluster : unique_clusters_) {
REQUIRE_SUCCESS(cluster->BuildFrame(), "Failed build frame of cluster[%lu].", cluster->Id()); REQUIRE_SUCCESS(cluster->BuildFrame(), "Failed build frame of cluster[%lu].", cluster->Id());
} }
return SUCCESS; return SUCCESS;
} }


Status DynamicShapePartitioner::CombinePartitionFrame() { Status DynamicShapePartitioner::CombinePartitionFrame() {
for (const auto &cluster : unique_clusters_) {
for (auto cluster : unique_clusters_) {
REQUIRE_SUCCESS(cluster->CombinePartitionFrame(), "Failed combine frame of cluster[%lu].", cluster->Id()); REQUIRE_SUCCESS(cluster->CombinePartitionFrame(), "Failed combine frame of cluster[%lu].", cluster->Id());
} }
return SUCCESS; return SUCCESS;
} }


Status DynamicShapePartitioner::BuildPartitionSubgraph() { Status DynamicShapePartitioner::BuildPartitionSubgraph() {
for (const auto &cluster : unique_clusters_) {
for (auto cluster : unique_clusters_) {
REQUIRE_SUCCESS(cluster->BuildPartitionSubgraph(), "Failed build subgraph of cluster[%lu].", cluster->Id()); REQUIRE_SUCCESS(cluster->BuildPartitionSubgraph(), "Failed build subgraph of cluster[%lu].", cluster->Id());
} }
return SUCCESS; return SUCCESS;
@@ -135,10 +134,10 @@ std::string DynamicShapePartitioner::DebugString() const {
size_t netoutput = 0; size_t netoutput = 0;
std::stringstream ss; std::stringstream ss;
ss << "All unknown shape nodes:" << std::endl; ss << "All unknown shape nodes:" << std::endl;
for (const auto &node : unknown_shape_nodes_) {
for (auto node : unknown_shape_nodes_) {
ss << " [" << node->GetName() << "](" << node->GetType() << ")" << std::endl; ss << " [" << node->GetName() << "](" << node->GetType() << ")" << std::endl;
} }
for (const auto &cluster : unique_clusters_) {
for (auto cluster : unique_clusters_) {
if (cluster->IsUnknownShape()) { if (cluster->IsUnknownShape()) {
unknown++; unknown++;
} else if (cluster->IsKnownShape()) { } else if (cluster->IsKnownShape()) {
@@ -151,7 +150,7 @@ std::string DynamicShapePartitioner::DebugString() const {
} }
ss << "All clusters:" << unique_clusters_.size() << ", data:" << data << ", known:" << known ss << "All clusters:" << unique_clusters_.size() << ", data:" << data << ", known:" << known
<< ", unknown:" << unknown << ", netoutput:" << netoutput << std::endl; << ", unknown:" << unknown << ", netoutput:" << netoutput << std::endl;
for (const auto &cluster : unique_clusters_) {
for (auto cluster : unique_clusters_) {
ss << " " << cluster->DebugString() << std::endl; ss << " " << cluster->DebugString() << std::endl;
} }
return ss.str(); return ss.str();
@@ -159,13 +158,13 @@ std::string DynamicShapePartitioner::DebugString() const {


void DynamicShapePartitioner::DumpGraph(const std::string &suffix) { void DynamicShapePartitioner::DumpGraph(const std::string &suffix) {
GraphUtils::DumpGEGraphToOnnx(*root_graph_, root_graph_->GetName() + suffix); GraphUtils::DumpGEGraphToOnnx(*root_graph_, root_graph_->GetName() + suffix);
for (const auto &sub_graph : root_graph_->GetAllSubgraphs()) {
for (auto sub_graph : root_graph_->GetAllSubgraphs()) {
GraphUtils::DumpGEGraphToOnnx(*sub_graph, sub_graph->GetName() + suffix); GraphUtils::DumpGEGraphToOnnx(*sub_graph, sub_graph->GetName() + suffix);
} }
} }


void DynamicShapePartitioner::ClearResource() { void DynamicShapePartitioner::ClearResource() {
for (const auto &cluster : unique_clusters_) {
for (auto cluster : unique_clusters_) {
cluster->Clear(); cluster->Clear();
} }
node_2_cluster_.clear(); node_2_cluster_.clear();
@@ -176,7 +175,8 @@ void DynamicShapePartitioner::ClearResource() {
} }


Status DynamicShapePartitioner::MarkUnknownShapeNodes() { Status DynamicShapePartitioner::MarkUnknownShapeNodes() {
for (auto &node : root_graph_->GetDirectNode()) {
auto graph = root_graph_;
for (auto &node : graph->GetDirectNode()) {
REQUIRE_SUCCESS(CollectSpreadUnknownShapeNodes(node), "Failed collect spread unknown shape nodes %s.", REQUIRE_SUCCESS(CollectSpreadUnknownShapeNodes(node), "Failed collect spread unknown shape nodes %s.",
node->GetName().c_str()); node->GetName().c_str());
} }
@@ -186,7 +186,7 @@ Status DynamicShapePartitioner::MarkUnknownShapeNodes() {
Status DynamicShapePartitioner::InitClusters() { Status DynamicShapePartitioner::InitClusters() {
auto graph = root_graph_; auto graph = root_graph_;
size_t rank = 0; size_t rank = 0;
for (const auto &node : graph->GetDirectNode()) {
for (const auto node : graph->GetDirectNode()) {
Cluster::Type type = Cluster::DATA; Cluster::Type type = Cluster::DATA;
if (node->GetType() == DATA) { if (node->GetType() == DATA) {
type = Cluster::DATA; type = Cluster::DATA;
@@ -208,7 +208,7 @@ Status DynamicShapePartitioner::InitClusters() {
cluster->AddInput(node_2_cluster_[parent]); cluster->AddInput(node_2_cluster_[parent]);
} }
} }
for (const auto &node : graph->GetDirectNode()) {
for (const auto node : graph->GetDirectNode()) {
GELOGD("Make cluster for node %s : %s.", node->GetName().c_str(), node_2_cluster_[node]->DebugString().c_str()); GELOGD("Make cluster for node %s : %s.", node->GetName().c_str(), node_2_cluster_[node]->DebugString().c_str());
} }
return SUCCESS; return SUCCESS;
@@ -220,8 +220,8 @@ Status DynamicShapePartitioner::TopologicalSortClusters() {
std::queue<ClusterPtr> ready_clusters; std::queue<ClusterPtr> ready_clusters;
std::unordered_map<ClusterPtr, size_t> cluster_pending_count; std::unordered_map<ClusterPtr, size_t> cluster_pending_count;
std::unordered_set<ClusterPtr> seen_clusters; std::unordered_set<ClusterPtr> seen_clusters;
for (auto &iter : node_2_cluster_) {
auto cluster = iter.second;
for (auto iter = node_2_cluster_.begin(); iter != node_2_cluster_.end(); iter++) {
auto cluster = iter->second;
if (seen_clusters.count(cluster) != 0) { if (seen_clusters.count(cluster) != 0) {
continue; continue;
} }
@@ -242,7 +242,7 @@ Status DynamicShapePartitioner::TopologicalSortClusters() {
if (cluster->IsKnownShape()) { if (cluster->IsKnownShape()) {
ordered_cluster_.push_back(cluster); ordered_cluster_.push_back(cluster);
} }
for (const auto &out_cluster : cluster->Outputs()) {
for (auto out_cluster : cluster->Outputs()) {
if (cluster_pending_count[out_cluster] > 0 && --cluster_pending_count[out_cluster] == 0) { if (cluster_pending_count[out_cluster] > 0 && --cluster_pending_count[out_cluster] == 0) {
ready_clusters.push(out_cluster); ready_clusters.push(out_cluster);
} }
@@ -273,16 +273,16 @@ static std::string ToString(const std::vector<ClusterPtr> &clusters) {


Status DynamicShapePartitioner::MergeClusters() { Status DynamicShapePartitioner::MergeClusters() {
// Merge unknown shape clusters // Merge unknown shape clusters
for (const auto &cluster : ordered_cluster_) {
for (const auto &in_cluster : cluster->Inputs()) {
for (auto cluster : ordered_cluster_) {
for (auto in_cluster : cluster->Inputs()) {
if (!in_cluster->IsUnknownShape()) { if (!in_cluster->IsUnknownShape()) {
continue; continue;
} }
auto merged_clusters = cluster->MergeAllPathFrom(in_cluster); auto merged_clusters = cluster->MergeAllPathFrom(in_cluster);
GELOGD("Merge all path cluster from %lu to %lu %s.", in_cluster->Id(), cluster->Id(), GELOGD("Merge all path cluster from %lu to %lu %s.", in_cluster->Id(), cluster->Id(),
ToString(merged_clusters).c_str()); ToString(merged_clusters).c_str());
for (const auto &merged_cluster : merged_clusters) {
for (const auto &node : merged_cluster->Nodes()) {
for (auto merged_cluster : merged_clusters) {
for (auto node : merged_cluster->Nodes()) {
node_2_cluster_[node] = cluster; node_2_cluster_[node] = cluster;
} }
} }
@@ -291,7 +291,7 @@ Status DynamicShapePartitioner::MergeClusters() {


REQUIRE_SUCCESS(TopologicalSortClusters(), "Failed topological sort clusters after merge unknown shape clusters."); REQUIRE_SUCCESS(TopologicalSortClusters(), "Failed topological sort clusters after merge unknown shape clusters.");
// Merge known shape clusters // Merge known shape clusters
for (const auto &cluster : ordered_cluster_) {
for (auto cluster : ordered_cluster_) {
if (cluster->IsRefVariable() && cluster->Inputs().size() == 1) { if (cluster->IsRefVariable() && cluster->Inputs().size() == 1) {
auto in_cluster = *(cluster->Inputs().begin()); auto in_cluster = *(cluster->Inputs().begin());
in_cluster->Merge(cluster); in_cluster->Merge(cluster);
@@ -299,13 +299,13 @@ Status DynamicShapePartitioner::MergeClusters() {
continue; continue;
} }


for (const auto &in_cluster : cluster->Inputs()) {
for (auto in_cluster : cluster->Inputs()) {
if (!in_cluster->IsKnownShape()) { if (!in_cluster->IsKnownShape()) {
continue; continue;
} }
if (cluster->TryMerge(in_cluster)) { if (cluster->TryMerge(in_cluster)) {
GELOGD("Success merge known shape cluster from %lu to %lu.", in_cluster->Id(), cluster->Id()); GELOGD("Success merge known shape cluster from %lu to %lu.", in_cluster->Id(), cluster->Id());
for (const auto &node : in_cluster->Nodes()) {
for (auto node : in_cluster->Nodes()) {
node_2_cluster_[node] = cluster; node_2_cluster_[node] = cluster;
} }
} }
@@ -333,7 +333,7 @@ Status DynamicShapePartitioner::CollectSpreadUnknownShapeNodes(NodePtr node) {
if (IsUnknownShapeTensor(out_tensor)) { if (IsUnknownShapeTensor(out_tensor)) {
GELOGD("Collect node %s as unknown as output %lu is unknown.", node->GetName().c_str(), anchor_index); GELOGD("Collect node %s as unknown as output %lu is unknown.", node->GetName().c_str(), anchor_index);
is_unknown = true; is_unknown = true;
auto anchor = node->GetOutDataAnchor(static_cast<int>(anchor_index));
auto anchor = node->GetOutDataAnchor(anchor_index);
for (const auto peer_anchor : anchor->GetPeerInDataAnchors()) { for (const auto peer_anchor : anchor->GetPeerInDataAnchors()) {
if (peer_anchor != nullptr) { if (peer_anchor != nullptr) {
GELOGD("Collect node %s as has unknown input from %s:%lu.", peer_anchor->GetOwnerNode()->GetName().c_str(), GELOGD("Collect node %s as has unknown input from %s:%lu.", peer_anchor->GetOwnerNode()->GetName().c_str(),
@@ -349,7 +349,7 @@ Status DynamicShapePartitioner::CollectSpreadUnknownShapeNodes(NodePtr node) {
if (IsUnknownShapeTensor(in_tensor)) { if (IsUnknownShapeTensor(in_tensor)) {
GELOGD("Collect node %s as unknown as input %lu is unknown.", node->GetName().c_str(), anchor_index); GELOGD("Collect node %s as unknown as input %lu is unknown.", node->GetName().c_str(), anchor_index);
is_unknown = true; is_unknown = true;
auto anchor = node->GetInDataAnchor(static_cast<int>(anchor_index));
auto anchor = node->GetInDataAnchor(anchor_index);
const auto peer_anchor = anchor->GetPeerOutAnchor(); const auto peer_anchor = anchor->GetPeerOutAnchor();
if (peer_anchor != nullptr) { if (peer_anchor != nullptr) {
GELOGD("Collect node %s as has unknown output to %s:%lu.", peer_anchor->GetOwnerNode()->GetName().c_str(), GELOGD("Collect node %s as has unknown output to %s:%lu.", peer_anchor->GetOwnerNode()->GetName().c_str(),
@@ -453,15 +453,15 @@ std::string Cluster::DebugString() const {
} }
ss << "[" << id_ << "](size:" << nodes_.size() << ")"; ss << "[" << id_ << "](size:" << nodes_.size() << ")";
ss << "(" << min_ << "," << max_ << ")("; ss << "(" << min_ << "," << max_ << ")(";
for (const auto &cluster : in_clusters_) {
for (auto cluster : in_clusters_) {
ss << cluster->id_ << ","; ss << cluster->id_ << ",";
} }
ss << ")->("; ss << ")->(";
for (const auto &cluster : out_clusters_) {
for (auto cluster : out_clusters_) {
ss << cluster->id_ << ","; ss << cluster->id_ << ",";
} }
ss << ")|"; ss << ")|";
for (const auto &node : nodes_) {
for (auto node : nodes_) {
ss << (node->GetName() + "|"); ss << (node->GetName() + "|");
} }
return ss.str(); return ss.str();
@@ -507,12 +507,12 @@ void Cluster::Merge(ClusterPtr other) {
in_clusters_.erase(other); in_clusters_.erase(other);
out_clusters_.erase(other); out_clusters_.erase(other);
auto in_clusters = other->in_clusters_; auto in_clusters = other->in_clusters_;
for (const auto &cluster : in_clusters) {
for (auto cluster : in_clusters) {
cluster->RemoveOutput(other); cluster->RemoveOutput(other);
cluster->AddOutput(shared_from_this()); cluster->AddOutput(shared_from_this());
} }
auto out_clusters = other->out_clusters_; auto out_clusters = other->out_clusters_;
for (const auto &cluster : out_clusters) {
for (auto cluster : out_clusters) {
cluster->RemoveInput(other); cluster->RemoveInput(other);
cluster->AddInput(shared_from_this()); cluster->AddInput(shared_from_this());
} }
@@ -529,7 +529,7 @@ bool Cluster::TryMerge(ClusterPtr other) {
while (!forward_reached.empty()) { while (!forward_reached.empty()) {
auto current_cluster = forward_reached.front(); auto current_cluster = forward_reached.front();
forward_reached.pop(); forward_reached.pop();
for (const auto &cluster : current_cluster->out_clusters_) {
for (auto cluster : current_cluster->out_clusters_) {
if (cluster->max_ == max_ && current_cluster != other) { if (cluster->max_ == max_ && current_cluster != other) {
return false; return false;
} else if (cluster->min_ < max_) { } else if (cluster->min_ < max_) {
@@ -557,7 +557,7 @@ std::vector<ClusterPtr> Cluster::MergeAllPathFrom(ClusterPtr other) {
while (!forward_reached_queue.empty()) { while (!forward_reached_queue.empty()) {
auto current_cluster = forward_reached_queue.front(); auto current_cluster = forward_reached_queue.front();
forward_reached_queue.pop(); forward_reached_queue.pop();
for (const auto &cluster : current_cluster->out_clusters_) {
for (auto cluster : current_cluster->out_clusters_) {
if (cluster->min_ < max_ && cluster->max_ != max_ && forward_reached_clusters.count(cluster) == 0) { if (cluster->min_ < max_ && cluster->max_ != max_ && forward_reached_clusters.count(cluster) == 0) {
forward_reached_clusters.insert(cluster); forward_reached_clusters.insert(cluster);
forward_reached_queue.push(cluster); forward_reached_queue.push(cluster);
@@ -567,7 +567,7 @@ std::vector<ClusterPtr> Cluster::MergeAllPathFrom(ClusterPtr other) {
while (!backward_reached_queue.empty()) { while (!backward_reached_queue.empty()) {
auto current_cluster = backward_reached_queue.front(); auto current_cluster = backward_reached_queue.front();
backward_reached_queue.pop(); backward_reached_queue.pop();
for (const auto &cluster : current_cluster->in_clusters_) {
for (auto cluster : current_cluster->in_clusters_) {
if (cluster->max_ > other->min_ && cluster->max_ != other->max_ && if (cluster->max_ > other->min_ && cluster->max_ != other->max_ &&
backward_reached_clusters.count(cluster) == 0) { backward_reached_clusters.count(cluster) == 0) {
backward_reached_clusters.insert(cluster); backward_reached_clusters.insert(cluster);
@@ -578,7 +578,7 @@ std::vector<ClusterPtr> Cluster::MergeAllPathFrom(ClusterPtr other) {
} }
} }
} }
for (const auto &cluster : path_clusters) {
for (auto cluster : path_clusters) {
Merge(cluster); Merge(cluster);
} }
return path_clusters; return path_clusters;
@@ -598,11 +598,11 @@ void Cluster::AddFrameOutput(OutDataAnchorPtr anchor) {
}; };


InDataAnchorPtr Cluster::GetFrameInDataAnchor(InDataAnchorPtr anchor) { InDataAnchorPtr Cluster::GetFrameInDataAnchor(InDataAnchorPtr anchor) {
return partition_node_->GetInDataAnchor(static_cast<int>(inputs_index_[anchor]));
return partition_node_->GetInDataAnchor(inputs_index_[anchor]);
}; };


OutDataAnchorPtr Cluster::GetFrameOutDataAnchor(OutDataAnchorPtr anchor) { OutDataAnchorPtr Cluster::GetFrameOutDataAnchor(OutDataAnchorPtr anchor) {
return partition_node_->GetOutDataAnchor(static_cast<int>(outputs_index_[anchor]));
return partition_node_->GetOutDataAnchor(outputs_index_[anchor]);
}; };


InControlAnchorPtr Cluster::GetFrameInControlAnchor() { return partition_node_->GetInControlAnchor(); }; InControlAnchorPtr Cluster::GetFrameInControlAnchor() { return partition_node_->GetInControlAnchor(); };
@@ -616,25 +616,22 @@ Status Cluster::BuildFrame() {
auto node = nodes_.front(); auto node = nodes_.front();
auto in_control_anchor = node->GetInControlAnchor(); auto in_control_anchor = node->GetInControlAnchor();
if (in_control_anchor != nullptr) { if (in_control_anchor != nullptr) {
for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
for (auto peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
auto src_cluster = partitioner_->node_2_cluster_[peer_out_control_anchor->GetOwnerNode()]; auto src_cluster = partitioner_->node_2_cluster_[peer_out_control_anchor->GetOwnerNode()];
if (src_cluster->id_ != id_) { if (src_cluster->id_ != id_) {
REQUIRE_GRAPH_SUCCESS(
GraphUtils::RemoveEdge(peer_out_control_anchor, in_control_anchor),
"Failed remove edge from node %s index %d to node %s index %d.",
peer_out_control_anchor->GetOwnerNode()->GetName().c_str(), AnchorUtils::GetIdx(peer_out_control_anchor),
in_control_anchor->GetOwnerNode()->GetName().c_str(), AnchorUtils::GetIdx(in_control_anchor));
auto src_cluster = partitioner_->node_2_cluster_[peer_out_control_anchor->GetOwnerNode()];
GraphUtils::RemoveEdge(peer_out_control_anchor, in_control_anchor);
control_inputs_.insert(src_cluster); control_inputs_.insert(src_cluster);
src_cluster->control_outputs_.insert(peer_out_control_anchor); src_cluster->control_outputs_.insert(peer_out_control_anchor);
} }
} }
} }
if (IsData()) { if (IsData()) {
for (const auto &anchor : node->GetAllOutDataAnchors()) {
for (auto anchor : node->GetAllOutDataAnchors()) {
AddFrameOutput(anchor); AddFrameOutput(anchor);
} }
} else { } else {
for (const auto &anchor : node->GetAllInDataAnchors()) {
for (auto anchor : node->GetAllInDataAnchors()) {
AddFrameInput(anchor); AddFrameInput(anchor);
} }
} }
@@ -663,7 +660,7 @@ Status Cluster::BuildPartitionFrame() {
"Failed set shape flag."); "Failed set shape flag.");
REQUIRE_GRAPH_SUCCESS(GraphUtils::RemoveJustNode(graph, node), "Failed remove root graph node."); REQUIRE_GRAPH_SUCCESS(GraphUtils::RemoveJustNode(graph, node), "Failed remove root graph node.");
REQUIRE_GRAPH_SUCCESS(node->SetOwnerComputeGraph(subgraph_), "Failed set owner graph."); REQUIRE_GRAPH_SUCCESS(node->SetOwnerComputeGraph(subgraph_), "Failed set owner graph.");
for (const auto &anchor : node->GetAllInDataAnchors()) {
for (auto anchor : node->GetAllInDataAnchors()) {
auto peer_out_anchor = anchor->GetPeerOutAnchor(); auto peer_out_anchor = anchor->GetPeerOutAnchor();
if (peer_out_anchor == nullptr) { if (peer_out_anchor == nullptr) {
continue; // Skip overhang input. continue; // Skip overhang input.
@@ -677,7 +674,7 @@ Status Cluster::BuildPartitionFrame() {
} }
auto in_control_anchor = node->GetInControlAnchor(); auto in_control_anchor = node->GetInControlAnchor();
if (in_control_anchor != nullptr) { if (in_control_anchor != nullptr) {
for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
for (auto peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
if (peer_out_control_anchor == nullptr) { if (peer_out_control_anchor == nullptr) {
continue; continue;
} }
@@ -692,9 +689,9 @@ Status Cluster::BuildPartitionFrame() {
} }
} }
} }
for (const auto &anchor : node->GetAllOutDataAnchors()) {
for (auto anchor : node->GetAllOutDataAnchors()) {
auto peer_in_anchors = anchor->GetPeerInDataAnchors(); auto peer_in_anchors = anchor->GetPeerInDataAnchors();
for (const auto &peer_in_anchor : peer_in_anchors) {
for (auto peer_in_anchor : peer_in_anchors) {
auto src_cluster = partitioner_->node_2_cluster_[peer_in_anchor->GetOwnerNode()]; auto src_cluster = partitioner_->node_2_cluster_[peer_in_anchor->GetOwnerNode()];
if (src_cluster->id_ != id_) { if (src_cluster->id_ != id_) {
AddFrameOutput(anchor); AddFrameOutput(anchor);
@@ -720,7 +717,7 @@ Status Cluster::BuildPartitionFrame() {
} }


Status Cluster::CombinePartitionFrame() { Status Cluster::CombinePartitionFrame() {
for (const auto &anchor : inputs_) {
for (auto anchor : inputs_) {
auto peer_out_anchor = anchor->GetPeerOutAnchor(); auto peer_out_anchor = anchor->GetPeerOutAnchor();
auto src_cluster = partitioner_->node_2_cluster_[peer_out_anchor->GetOwnerNode()]; auto src_cluster = partitioner_->node_2_cluster_[peer_out_anchor->GetOwnerNode()];
auto src_anchor = src_cluster->GetFrameOutDataAnchor(peer_out_anchor); auto src_anchor = src_cluster->GetFrameOutDataAnchor(peer_out_anchor);
@@ -732,7 +729,7 @@ Status Cluster::CombinePartitionFrame() {
src_anchor->GetOwnerNode()->GetName().c_str(), src_anchor->GetIdx(), src_anchor->GetOwnerNode()->GetName().c_str(), src_anchor->GetIdx(),
dst_anchor->GetOwnerNode()->GetName().c_str(), dst_anchor->GetIdx()); dst_anchor->GetOwnerNode()->GetName().c_str(), dst_anchor->GetIdx());
} }
for (const auto &src_cluster : control_inputs_) {
for (auto src_cluster : control_inputs_) {
auto src_anchor = src_cluster->GetFrameOutControlAnchor(); auto src_anchor = src_cluster->GetFrameOutControlAnchor();
auto dst_anchor = GetFrameInControlAnchor(); auto dst_anchor = GetFrameInControlAnchor();
REQUIRE_GRAPH_SUCCESS(GraphUtils::AddEdge(src_anchor, dst_anchor), "Failed add edge from %s:%d to %s:%d.", REQUIRE_GRAPH_SUCCESS(GraphUtils::AddEdge(src_anchor, dst_anchor), "Failed add edge from %s:%d to %s:%d.",
@@ -777,8 +774,8 @@ Status Cluster::BuildPartitionSubgraph() {
REQUIRE_NOT_NULL(net_output_node, "Failed add netoutput node to subgraph."); REQUIRE_NOT_NULL(net_output_node, "Failed add netoutput node to subgraph.");
REQUIRE_GRAPH_SUCCESS(net_output_node->SetOwnerComputeGraph(subgraph_), "Failed set owner graph of netoutput node."); REQUIRE_GRAPH_SUCCESS(net_output_node->SetOwnerComputeGraph(subgraph_), "Failed set owner graph of netoutput node.");
parent_node_index = 0; parent_node_index = 0;
for (const auto &anchor : outputs_) {
auto output_desc = anchor->GetOwnerNode()->GetOpDesc()->GetOutputDesc(static_cast<uint32_t>(anchor->GetIdx()));
for (auto anchor : outputs_) {
auto output_desc = anchor->GetOwnerNode()->GetOpDesc()->GetOutputDesc(anchor->GetIdx());
REQUIRE(AttrUtils::SetInt(output_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_node_index), REQUIRE(AttrUtils::SetInt(output_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_node_index),
"Failed set parent_node_index on subgraph netoutput's input."); "Failed set parent_node_index on subgraph netoutput's input.");
REQUIRE_GRAPH_SUCCESS(net_output_op->UpdateInputDesc(parent_node_index, output_desc), REQUIRE_GRAPH_SUCCESS(net_output_op->UpdateInputDesc(parent_node_index, output_desc),
@@ -789,7 +786,7 @@ Status Cluster::BuildPartitionSubgraph() {
anchor->GetIdx()); anchor->GetIdx());
parent_node_index++; parent_node_index++;
} }
for (const auto &anchor : control_outputs_) {
for (auto anchor : control_outputs_) {
REQUIRE_GRAPH_SUCCESS(GraphUtils::AddEdge(anchor, net_output_node->GetInControlAnchor()), REQUIRE_GRAPH_SUCCESS(GraphUtils::AddEdge(anchor, net_output_node->GetInControlAnchor()),
"Faile add control edge from %s:%d to netoutput node.", "Faile add control edge from %s:%d to netoutput node.",
anchor->GetOwnerNode()->GetName().c_str(), anchor->GetIdx()); anchor->GetOwnerNode()->GetName().c_str(), anchor->GetIdx());


+ 1
- 5
src/ge/graph/partition/engine_place.cc View File

@@ -38,7 +38,6 @@ Status EnginePlacer::Run() {
return FAILED; return FAILED;
} }
// Assign engine for each node in the graph // Assign engine for each node in the graph
instance_ptr->DNNEngineManagerObj().InitPerformanceStaistic();
for (const auto &node_ptr : compute_graph_->GetDirectNode()) { for (const auto &node_ptr : compute_graph_->GetDirectNode()) {
GE_CHECK_NOTNULL(node_ptr); GE_CHECK_NOTNULL(node_ptr);
GE_CHECK_NOTNULL(node_ptr->GetOpDesc()); GE_CHECK_NOTNULL(node_ptr->GetOpDesc());
@@ -61,15 +60,12 @@ Status EnginePlacer::Run() {
return FAILED; return FAILED;
} }
} }
for (auto &it : instance_ptr->DNNEngineManagerObj().GetCheckSupportCost()) {
GEEVENT("The time cost of %s::CheckSupported is [%lu] micro second.", it.first.c_str(), it.second);
}
GELOGI("Engine placer ends."); GELOGI("Engine placer ends.");
return SUCCESS; return SUCCESS;
} }


Status EnginePlacer::AssignEngineAndLog(ge::ConstNodePtr node_ptr, const std::string &engine_name) { Status EnginePlacer::AssignEngineAndLog(ge::ConstNodePtr node_ptr, const std::string &engine_name) {
if ((node_ptr == nullptr) || (node_ptr->GetOpDesc() == nullptr)) {
if (node_ptr == nullptr || node_ptr->GetOpDesc() == nullptr) {
GELOGE(FAILED, "node_ptr is null."); GELOGE(FAILED, "node_ptr is null.");
return FAILED; return FAILED;
} }


+ 32
- 35
src/ge/graph/partition/graph_partition.cc View File

@@ -25,7 +25,6 @@
#include "framework/common/types.h" #include "framework/common/types.h"
#include "graph/debug/ge_attr_define.h" #include "graph/debug/ge_attr_define.h"
#include "graph/manager/graph_manager_utils.h" #include "graph/manager/graph_manager_utils.h"
#include "graph/common/ge_call_wrapper.h"
#include "graph/utils/graph_utils.h" #include "graph/utils/graph_utils.h"
#include "graph/utils/op_desc_utils.h" #include "graph/utils/op_desc_utils.h"
#include "graph/utils/type_utils.h" #include "graph/utils/type_utils.h"
@@ -232,33 +231,33 @@ Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_co
ComputeGraphPtr new_sub_graph = MakeShared<ComputeGraph>(original_compute_graph->GetName()); ComputeGraphPtr new_sub_graph = MakeShared<ComputeGraph>(original_compute_graph->GetName());
GE_CHECK_NOTNULL(new_sub_graph); GE_CHECK_NOTNULL(new_sub_graph);
output_merged_compute_graph = new_sub_graph; output_merged_compute_graph = new_sub_graph;
GE_TIMESTAMP_START(MergeSubGraphRemoveNode);
GE_TIMESTAMP_START(MergeGraphRemoveNode);
if (RemoveNodeAndEdgeBetweenEndPld(output_merged_compute_graph, sub_graph_list) != ge::SUCCESS) { if (RemoveNodeAndEdgeBetweenEndPld(output_merged_compute_graph, sub_graph_list) != ge::SUCCESS) {
GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: merging sub-graphs failed"); GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: merging sub-graphs failed");
return FAILED; return FAILED;
} }
GE_TIMESTAMP_END(MergeSubGraphRemoveNode, "GraphPartitioner::MergeGraphRemoveNodeAndEdge");
GE_TIMESTAMP_START(MergeSubGraphTopologicalSorting);
GE_TIMESTAMP_END(MergeGraphRemoveNode, "GraphPartitioner::MergeGraphRemoveNodeAndEdge");
GE_TIMESTAMP_START(MergeGraphTopologicalSorting);
Status ret = output_merged_compute_graph->TopologicalSorting(); Status ret = output_merged_compute_graph->TopologicalSorting();
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(GE_GRAPH_TOPO_SORT_FAILED, "[GraphPartitioner]: output_merged_compute_graph->TopologicalSorting failed"); GELOGE(GE_GRAPH_TOPO_SORT_FAILED, "[GraphPartitioner]: output_merged_compute_graph->TopologicalSorting failed");
return FAILED; return FAILED;
} }
GE_TIMESTAMP_END(MergeSubGraphTopologicalSorting, "GraphPartitioner::MergeGraphTopologicalSorting");
GE_TIMESTAMP_END(MergeGraphTopologicalSorting, "GraphPartitioner::MergeGraphTopologicalSorting");
// flush all nodes' engine of merged graph // flush all nodes' engine of merged graph
GE_TIMESTAMP_START(MergeSubGraphEnginePlacerRun);
GE_TIMESTAMP_START(MergeGraphEnginePlacerRun);
graph_info_.engine_placer_.SetComputeGraph(output_merged_compute_graph); graph_info_.engine_placer_.SetComputeGraph(output_merged_compute_graph);
if (graph_info_.engine_placer_.Run() != SUCCESS) { if (graph_info_.engine_placer_.Run() != SUCCESS) {
GELOGE(GE_GRAPH_INIT_FAILED, "[GraphPartitioner]: engine_placer run failed"); GELOGE(GE_GRAPH_INIT_FAILED, "[GraphPartitioner]: engine_placer run failed");
return FAILED; return FAILED;
} }
GE_TIMESTAMP_END(MergeSubGraphEnginePlacerRun, "GraphPartitioner::MergeGraphEnginePlacerRun");
GE_TIMESTAMP_END(MergeGraphEnginePlacerRun, "GraphPartitioner::MergeGraphEnginePlacerRun");
GELOGI("Graph merge ends."); GELOGI("Graph merge ends.");
return SUCCESS; return SUCCESS;
} }


Status ge::GraphPartitioner::UpdatePldOpDesc(const NodePtr &dst_node, int input_index, OpDescPtr &pld_op_desc) { Status ge::GraphPartitioner::UpdatePldOpDesc(const NodePtr &dst_node, int input_index, OpDescPtr &pld_op_desc) {
if ((dst_node == nullptr) || (pld_op_desc == nullptr) || (dst_node->GetOpDesc() == nullptr)) {
if (dst_node == nullptr || pld_op_desc == nullptr || dst_node->GetOpDesc() == nullptr) {
GELOGE(FAILED, "parameter ptr is null."); GELOGE(FAILED, "parameter ptr is null.");
return FAILED; return FAILED;
} }
@@ -276,7 +275,7 @@ Status ge::GraphPartitioner::UpdatePldOpDesc(const NodePtr &dst_node, int input_
} }


Status ge::GraphPartitioner::UpdateEndOpDesc(const NodePtr &src_node, int output_index, OpDescPtr &end_op_desc) { Status ge::GraphPartitioner::UpdateEndOpDesc(const NodePtr &src_node, int output_index, OpDescPtr &end_op_desc) {
if ((src_node == nullptr) || (end_op_desc == nullptr) || (src_node->GetOpDesc() == nullptr)) {
if (src_node == nullptr || end_op_desc == nullptr || src_node->GetOpDesc() == nullptr) {
GELOGE(FAILED, "parameter ptr is null."); GELOGE(FAILED, "parameter ptr is null.");
return FAILED; return FAILED;
} }
@@ -297,9 +296,9 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
const AnchorPtr &peer_in_anchor, const AnchorPtr &peer_in_anchor,
const ge::ComputeGraphPtr &pld_graph, const ge::ComputeGraphPtr &pld_graph,
const ge::ComputeGraphPtr &end_graph) { const ge::ComputeGraphPtr &end_graph) {
GE_CHECK_NOTNULL(out_anchor);
GE_CHECK_NOTNULL(peer_in_anchor); GE_CHECK_NOTNULL(peer_in_anchor);
GE_CHECK_NOTNULL(pld_graph); GE_CHECK_NOTNULL(pld_graph);
GE_CHECK_NOTNULL(out_anchor);
GE_CHECK_NOTNULL(end_graph); GE_CHECK_NOTNULL(end_graph);
const auto &src_node = out_anchor->GetOwnerNode(); const auto &src_node = out_anchor->GetOwnerNode();
const auto &dst_node = peer_in_anchor->GetOwnerNode(); const auto &dst_node = peer_in_anchor->GetOwnerNode();
@@ -314,7 +313,6 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
GELOGW("SetInt peerIndex failed");) GELOGW("SetInt peerIndex failed");)
GE_IF_BOOL_EXEC(!AttrUtils::SetStr(end_op_desc, "parentOpType", dst_node->GetType()), GE_IF_BOOL_EXEC(!AttrUtils::SetStr(end_op_desc, "parentOpType", dst_node->GetType()),
GELOGW("SetStr parentOpType failed");) GELOGW("SetStr parentOpType failed");)
GE_IF_BOOL_EXEC(!end_op_desc->SetExtAttr("parentNode", dst_node), GELOGW("SetEndExtAttr parentNode failed");)
// replace input_desc of end with owner node's desc // replace input_desc of end with owner node's desc
int output_index = ge::AnchorUtils::GetIdx(out_anchor); int output_index = ge::AnchorUtils::GetIdx(out_anchor);
bool is_need_update_desc = (output_index >= 0) && (graph_info_.mode_ == kPartitioning); bool is_need_update_desc = (output_index >= 0) && (graph_info_.mode_ == kPartitioning);
@@ -363,7 +361,6 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
GELOGW("SetStr parentId failed");) GELOGW("SetStr parentId failed");)
GE_IF_BOOL_EXEC(!AttrUtils::SetInt(pld_op_desc, "anchorIndex", AnchorUtils::GetIdx(out_anchor)), GE_IF_BOOL_EXEC(!AttrUtils::SetInt(pld_op_desc, "anchorIndex", AnchorUtils::GetIdx(out_anchor)),
GELOGW("SetInt anchorIndex failed");) GELOGW("SetInt anchorIndex failed");)
GE_IF_BOOL_EXEC(!pld_op_desc->SetExtAttr("parentNode", src_node), GELOGW("SetPldExtAttr parentNode failed");)
// do not care over flow // do not care over flow
graph_info_.num_of_pld_end_++; graph_info_.num_of_pld_end_++;
// replace output_desc of pld with input node's output desc // replace output_desc of pld with input node's output desc
@@ -398,14 +395,14 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
return FAILED; return FAILED;
} }
graph_info_.index_2_end_[graph_info_.num_of_pld_end_] = new_end_node; graph_info_.index_2_end_[graph_info_.num_of_pld_end_] = new_end_node;
graph_info_.pld_2_end_[new_pld_node] = new_end_node;
graph_info_.end_2_pld_[new_end_node] = new_pld_node; graph_info_.end_2_pld_[new_end_node] = new_pld_node;
graph_info_.pld_2_end_[new_pld_node] = new_end_node;
return SUCCESS; return SUCCESS;
} }


Status ge::GraphPartitioner::LinkInput2EndRemoveOrginalLink(ge::NodePtr input_node, ge::ComputeGraphPtr src_graph, Status ge::GraphPartitioner::LinkInput2EndRemoveOrginalLink(ge::NodePtr input_node, ge::ComputeGraphPtr src_graph,
ge::ComputeGraphPtr dst_graph) { ge::ComputeGraphPtr dst_graph) {
if ((input_node == nullptr) || (src_graph == nullptr) || (dst_graph == nullptr)) {
if (input_node == nullptr || src_graph == nullptr || dst_graph == nullptr) {
GELOGE(FAILED, "parameter ptr is null."); GELOGE(FAILED, "parameter ptr is null.");
return FAILED; return FAILED;
} }
@@ -445,7 +442,7 @@ Status ge::GraphPartitioner::LinkInput2EndRemoveOrginalLink(ge::NodePtr input_no


Status ge::GraphPartitioner::PutInputNodesInSubGraph(const ge::ComputeGraphPtr &src_graph, Status ge::GraphPartitioner::PutInputNodesInSubGraph(const ge::ComputeGraphPtr &src_graph,
const ge::ComputeGraphPtr &dst_graph) { const ge::ComputeGraphPtr &dst_graph) {
if ((src_graph == nullptr) || (dst_graph == nullptr)) {
if (src_graph == nullptr || dst_graph == nullptr) {
GELOGE(FAILED, "parameter ptr is null."); GELOGE(FAILED, "parameter ptr is null.");
return FAILED; return FAILED;
} }
@@ -852,34 +849,34 @@ Status ge::GraphPartitioner::PartitionSubGraph(ge::ComputeGraphPtr compute_graph
GELOGE(GE_GRAPH_TOPO_SORT_FAILED, "[GraphPartitioner]: subGraphPtr->TopologicalSorting failed"); GELOGE(GE_GRAPH_TOPO_SORT_FAILED, "[GraphPartitioner]: subGraphPtr->TopologicalSorting failed");
return FAILED; return FAILED;
} }
GE_TIMESTAMP_START(PartitionSubGraphInitialize);
GE_TIMESTAMP_START(GraphPartitionInitialize);
if (Initialize(compute_graph) != SUCCESS) { if (Initialize(compute_graph) != SUCCESS) {
GELOGE(GE_GRAPH_INIT_FAILED, "[GraphPartitioner]: initialize failed"); GELOGE(GE_GRAPH_INIT_FAILED, "[GraphPartitioner]: initialize failed");
return FAILED; return FAILED;
} }
GE_TIMESTAMP_END(PartitionSubGraphInitialize, "GraphPartitioner::PartitionInitialize");
GE_TIMESTAMP_START(PartitionSubGraphMarkClusters);
GE_TIMESTAMP_END(GraphPartitionInitialize, "GraphPartitioner::PartitionInitialize");
GE_TIMESTAMP_START(GraphPartitionMarkClusters);
MarkClusters(); MarkClusters();
GE_TIMESTAMP_END(PartitionSubGraphMarkClusters, "GraphPartitioner::PartitionMarkClusters");
GE_TIMESTAMP_START(PartitionSubGraphSplitSubGraphs);
GE_TIMESTAMP_END(GraphPartitionMarkClusters, "GraphPartitioner::PartitionMarkClusters");
GE_TIMESTAMP_START(GraphPartitionSplitSubGraphs);
if (SplitSubGraphs(compute_graph) != SUCCESS) { if (SplitSubGraphs(compute_graph) != SUCCESS) {
GELOGE(FAILED, "[GraphPartitioner]: SplitSubGraphs failed"); GELOGE(FAILED, "[GraphPartitioner]: SplitSubGraphs failed");
return FAILED; return FAILED;
} }
GE_TIMESTAMP_END(PartitionSubGraphSplitSubGraphs, "GraphPartitioner::PartitionSplitSubGraphs");
GE_TIMESTAMP_START(PartitionSubGraphSortSubGraphs);
GE_TIMESTAMP_END(GraphPartitionSplitSubGraphs, "GraphPartitioner::PartitionSplitSubGraphs");
GE_TIMESTAMP_START(GraphPartitionSortSubGraphs);
if (SortSubGraphs(compute_graph) != ge::SUCCESS) { if (SortSubGraphs(compute_graph) != ge::SUCCESS) {
GELOGE(GE_GRAPH_TOPO_SORT_FAILED, "Graph Partition SortSubGraphs failed."); GELOGE(GE_GRAPH_TOPO_SORT_FAILED, "Graph Partition SortSubGraphs failed.");
return ge::FAILED; return ge::FAILED;
} }
GE_TIMESTAMP_END(PartitionSubGraphSortSubGraphs, "GraphPartitioner::PartitionSortSubGraphs");
GE_TIMESTAMP_START(PartitionSubGraphAddPartitionsToGraphNode);
GE_TIMESTAMP_END(GraphPartitionSortSubGraphs, "GraphPartitioner::PartitionSortSubGraphs");
GE_TIMESTAMP_START(GraphPartitionAddPartitionsToGraphNode);
vector<ge::SubGraphInfoPtr> output_subgraphs; vector<ge::SubGraphInfoPtr> output_subgraphs;
if (AddPartitionsToGraphNode(output_subgraphs, compute_graph) != ge::SUCCESS) { if (AddPartitionsToGraphNode(output_subgraphs, compute_graph) != ge::SUCCESS) {
GELOGE(GE_GRAPH_EMPTY_PARTITION, "Graph Partition AddPartitionsToGraphNode failed."); GELOGE(GE_GRAPH_EMPTY_PARTITION, "Graph Partition AddPartitionsToGraphNode failed.");
return ge::FAILED; return ge::FAILED;
} }
GE_TIMESTAMP_END(PartitionSubGraphAddPartitionsToGraphNode, "GraphPartitioner::PartitionAddPartitionsToGraphNode");
GE_TIMESTAMP_END(GraphPartitionAddPartitionsToGraphNode, "GraphPartitioner::PartitionAddPartitionsToGraphNode");
GELOGI("Graph Partition ends. Adding partitions to SubGraphInfo, got %zu sub graphs", output_subgraphs.size()); GELOGI("Graph Partition ends. Adding partitions to SubGraphInfo, got %zu sub graphs", output_subgraphs.size());
graph_info_.mode_ = kMerging; graph_info_.mode_ = kMerging;
// do not care over flow // do not care over flow
@@ -926,7 +923,7 @@ Status ge::GraphPartitioner::AddPlaceHolderEnd(const AnchorPtr &out_anchor, cons
Status ge::GraphPartitioner::SortSubGraphs(const ge::ComputeGraphPtr &compute_graph) { Status ge::GraphPartitioner::SortSubGraphs(const ge::ComputeGraphPtr &compute_graph) {
uint32_t rank = kRankOne; // rank 0 for data graph uint32_t rank = kRankOne; // rank 0 for data graph
ComputeGraphPtr new_input_nodes_sub_graph = MakeShared<ComputeGraph>("inputNodeGraph"); ComputeGraphPtr new_input_nodes_sub_graph = MakeShared<ComputeGraph>("inputNodeGraph");
if ((new_input_nodes_sub_graph == nullptr) || (compute_graph == nullptr)) {
if (new_input_nodes_sub_graph == nullptr || compute_graph == nullptr) {
GELOGE(FAILED, "[GraphPartitioner]: new_input_nodes_sub_graph or compute_graph is null."); GELOGE(FAILED, "[GraphPartitioner]: new_input_nodes_sub_graph or compute_graph is null.");
return FAILED; return FAILED;
} }
@@ -968,7 +965,7 @@ Status ge::GraphPartitioner::SortSubGraphs(const ge::ComputeGraphPtr &compute_gr
} }


AnchorPtr ge::GraphPartitioner::GetEndInAnchor(const AnchorPtr &src_anchor, const NodePtr &end_node) { AnchorPtr ge::GraphPartitioner::GetEndInAnchor(const AnchorPtr &src_anchor, const NodePtr &end_node) {
if ((src_anchor == nullptr) || (end_node == nullptr)) {
if (src_anchor == nullptr || end_node == nullptr) {
GELOGE(FAILED, "parameter ptr is null."); GELOGE(FAILED, "parameter ptr is null.");
return nullptr; return nullptr;
} }
@@ -982,7 +979,7 @@ AnchorPtr ge::GraphPartitioner::GetEndInAnchor(const AnchorPtr &src_anchor, cons
} }


AnchorPtr ge::GraphPartitioner::GetPldOutAnchor(const NodePtr &pld_node, const AnchorPtr &dst_anchor) { AnchorPtr ge::GraphPartitioner::GetPldOutAnchor(const NodePtr &pld_node, const AnchorPtr &dst_anchor) {
if ((pld_node == nullptr) || (dst_anchor == nullptr)) {
if (pld_node == nullptr || dst_anchor == nullptr) {
GELOGE(FAILED, "parameter ptr is null."); GELOGE(FAILED, "parameter ptr is null.");
return nullptr; return nullptr;
} }
@@ -995,16 +992,16 @@ AnchorPtr ge::GraphPartitioner::GetPldOutAnchor(const NodePtr &pld_node, const A
return pld_out_anchor; return pld_out_anchor;
} }


void ge::GraphPartitioner::AddEndPldInformationToSubGraphInfo(ge::SubGraphInfoPtr &subgraph_info) {
if (subgraph_info == nullptr) {
void ge::GraphPartitioner::AddEndPldInformationToSubGraphInfo(ge::SubGraphInfoPtr &sub_graph_info) {
if (sub_graph_info == nullptr) {
GELOGE(FAILED, "parameter ptr is null."); GELOGE(FAILED, "parameter ptr is null.");
return; return;
} }
auto subgraph = subgraph_info->GetSubGraph();
GE_CHECK_NOTNULL_JUST_RETURN(subgraph);
auto sub_graph = sub_graph_info->GetSubGraph();
GE_CHECK_NOTNULL_JUST_RETURN(sub_graph);
NodetoNodeMap end_map; NodetoNodeMap end_map;
NodetoNodeMap pld_map; NodetoNodeMap pld_map;
for (const auto &node : subgraph->GetDirectNode()) {
for (const auto &node : sub_graph->GetDirectNode()) {
if (node->GetType() == kEndType) { if (node->GetType() == kEndType) {
end_map[node] = graph_info_.end_2_pld_.at(node); end_map[node] = graph_info_.end_2_pld_.at(node);
} }
@@ -1012,8 +1009,8 @@ void ge::GraphPartitioner::AddEndPldInformationToSubGraphInfo(ge::SubGraphInfoPt
pld_map[node] = graph_info_.pld_2_end_.at(node); pld_map[node] = graph_info_.pld_2_end_.at(node);
} }
} }
subgraph_info->SetEnd2PldMap(end_map);
subgraph_info->SetPld2EndMap(pld_map);
sub_graph_info->SetEnd2PldMap(end_map);
sub_graph_info->SetPld2EndMap(pld_map);
} }


const Graph2SubGraphInfoList &ge::GraphPartitioner::GetSubGraphMap() { return graph_2_subgraph_list_; } const Graph2SubGraphInfoList &ge::GraphPartitioner::GetSubGraphMap() { return graph_2_subgraph_list_; }


+ 15
- 11
src/ge/graph/passes/atomic_addr_clean_pass.cc View File

@@ -22,12 +22,16 @@
#include <sstream> #include <sstream>
#include <vector> #include <vector>


#include "framework/common/debug/ge_log.h"
#include "common/ge_inner_error_codes.h" #include "common/ge_inner_error_codes.h"
#include "common/ge/ge_util.h" #include "common/ge/ge_util.h"
#include "graph/debug/ge_attr_define.h" #include "graph/debug/ge_attr_define.h"
#include "graph/utils/node_utils.h" #include "graph/utils/node_utils.h"
#include "init/gelib.h" #include "init/gelib.h"


namespace {
bool is_loop_graph = false;
}
namespace ge { namespace ge {
namespace { namespace {
bool GraphShouldBeSkip(const ge::ComputeGraphPtr &graph) { bool GraphShouldBeSkip(const ge::ComputeGraphPtr &graph) {
@@ -40,6 +44,7 @@ bool GraphShouldBeSkip(const ge::ComputeGraphPtr &graph) {
} // namespace } // namespace


Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) {
GE_TIMESTAMP_START(AtomicAddrCleanPass);
if (graph == nullptr) { if (graph == nullptr) {
GELOGE(PARAM_INVALID, "param [graph] must not be null."); GELOGE(PARAM_INVALID, "param [graph] must not be null.");
return PARAM_INVALID; return PARAM_INVALID;
@@ -66,10 +71,10 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) {
} }
atomic_node_vec.push_back(node); atomic_node_vec.push_back(node);
} }
if (!is_loop_graph_ && node->GetType() == LOOPCOND) {
if (!is_loop_graph && node->GetType() == LOOPCOND) {
// there is loop in this graph // there is loop in this graph
GELOGD("There is no loop node. It will insert clean node follow atomic node."); GELOGD("There is no loop node. It will insert clean node follow atomic node.");
is_loop_graph_ = true;
is_loop_graph = true;
} }
} }
if (atomic_node_vec.empty()) { if (atomic_node_vec.empty()) {
@@ -78,7 +83,7 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) {
} }
// 2.Insert clean node and link to atomic node // 2.Insert clean node and link to atomic node
Status ret; Status ret;
if (is_loop_graph_) {
if (is_loop_graph) {
ret = HandleLoopGraph(graph, atomic_node_vec); ret = HandleLoopGraph(graph, atomic_node_vec);
if (ret != SUCCESS) { if (ret != SUCCESS) {
return ret; return ret;
@@ -90,6 +95,7 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) {
} }
} }
GELOGD("AtomicAddrCleanPass end."); GELOGD("AtomicAddrCleanPass end.");
GE_TIMESTAMP_END(AtomicAddrCleanPass, "GraphManager::AtomicAddrCleanPass");
return SUCCESS; return SUCCESS;
} }


@@ -166,14 +172,12 @@ NodePtr AtomicAddrCleanPass::InsertAtomicAddrCleanNode(ComputeGraphPtr &graph) {
if (!session_graph_id.empty()) { if (!session_graph_id.empty()) {
(void)AttrUtils::SetStr(op_desc, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); (void)AttrUtils::SetStr(op_desc, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id);
} }
string node_name = op_desc->GetName();
// Only flush subgraph name // Only flush subgraph name
if (graph->GetParentGraph() != nullptr) {
node_name = graph->GetName() + "_" + node_name;
}
string node_name = (graph->GetParentGraph() != nullptr)
? (graph->GetName() + "_" + op_desc->GetName() + session_graph_id)
: (op_desc->GetName() + session_graph_id);


string name = node_name + session_graph_id;
op_desc->SetName(name);
op_desc->SetName(node_name);
GELOGI("Create cleanAddr op:%s.", op_desc->GetName().c_str()); GELOGI("Create cleanAddr op:%s.", op_desc->GetName().c_str());
// To avoid same name between graphs, set session graph id to this node // To avoid same name between graphs, set session graph id to this node
NodePtr clean_addr_node = graph->AddNodeFront(op_desc); NodePtr clean_addr_node = graph->AddNodeFront(op_desc);
@@ -199,7 +203,7 @@ Status AtomicAddrCleanPass::LinkToAtomicNode(const NodePtr &atomic_node, NodePtr
} }
GELOGD("Graph add cleanAddrNode op out ctrl edge, dst node: %s.", atomic_node->GetName().c_str()); GELOGD("Graph add cleanAddrNode op out ctrl edge, dst node: %s.", atomic_node->GetName().c_str());
std::string stream_label; std::string stream_label;
if (is_loop_graph_ && AttrUtils::GetStr(atomic_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label)) {
if (is_loop_graph && AttrUtils::GetStr(atomic_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label)) {
if (!AttrUtils::SetStr(atomic_clean_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label)) { if (!AttrUtils::SetStr(atomic_clean_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label)) {
GELOGW("LinkToAtomicNode: SetStr failed"); GELOGW("LinkToAtomicNode: SetStr failed");
return INTERNAL_ERROR; return INTERNAL_ERROR;
@@ -258,7 +262,7 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) {
return true; return true;
} }
/// ///
/// @brief Clear Status, used for subgraph pass
/// @brief Clear Status, uesd for subgraph pass
/// @return SUCCESS /// @return SUCCESS
/// ///
Status AtomicAddrCleanPass::ClearStatus() { Status AtomicAddrCleanPass::ClearStatus() {


+ 0
- 1
src/ge/graph/passes/atomic_addr_clean_pass.h View File

@@ -75,7 +75,6 @@ class AtomicAddrCleanPass : public GraphPass {
bool IsAtomicOp(const NodePtr &node); bool IsAtomicOp(const NodePtr &node);


vector<NodePtr> hcom_node_vec_; vector<NodePtr> hcom_node_vec_;
bool is_loop_graph_ = false;
}; };
} // namespace ge } // namespace ge




+ 0
- 319
src/ge/graph/passes/attach_stream_label_pass.cc View File

@@ -1,319 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/passes/attach_stream_label_pass.h"
#include "ge/ge_api_types.h"
#include "graph/common/omg_util.h"

namespace ge {
Status AttachStreamLabelPass::Run(ComputeGraphPtr graph) {
GELOGD("AttachStreamLabelPass Enter.");

FindNodes(graph);
for (const auto &node : need_label_nodes_) {
OpDescPtr op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
if (!op_desc->HasAttr(ATTR_NAME_STREAM_LABEL)) {
GE_CHK_STATUS_RET(UpdateCondBranch(node), "Update cond branch failed, start node:%s.", node->GetName().c_str());
}
}
GE_CHK_STATUS_RET(UpdateEnterNode(), "UpdateEnterNode failed.");

GELOGD("AttachStreamLabelPass Leave.");
return SUCCESS;
}

///
/// @brief Clear Status, used for subgraph pass
/// @return
///
Status AttachStreamLabelPass::ClearStatus() {
stream_switch_nodes_.clear();
need_label_nodes_.clear();
enter_nodes_.clear();
branch_head_nodes_.clear();
return SUCCESS;
}

///
/// @brief Find StreamSwitch / StreamMerge / Enter node
/// @param [in] graph
/// @return void
///
void AttachStreamLabelPass::FindNodes(const ComputeGraphPtr &graph) {
for (const NodePtr &node : graph->GetDirectNode()) {
const std::string &type = node->GetType();
if (type == STREAMSWITCH) {
stream_switch_nodes_.emplace_back(node);
} else if (type == STREAMMERGE) {
if ((node->GetOpDesc() != nullptr) && !node->GetOpDesc()->HasAttr(ATTR_NAME_NEXT_ITERATION)) {
need_label_nodes_.emplace_back(node);
}
} else if ((type == ENTER) || (type == REFENTER)) {
enter_nodes_.emplace_back(node);
}
}

for (const auto &node : stream_switch_nodes_) {
for (const auto &out_ctrl_node : node->GetOutControlNodes()) {
MarkHeadNodes(out_ctrl_node, node);
}
need_label_nodes_.emplace_back(node);
}
}

///
/// @brief Mark node as head_node of stream_switch
/// @param [in] node
/// @param [in] stream_switch
/// @return void
///
void AttachStreamLabelPass::MarkHeadNodes(const NodePtr &node, const NodePtr &stream_switch) {
static const std::set<std::string> bypass_type_set = {IDENTITY, IDENTITYN, CAST, TRANSDATA,
TRANSPOSE, TRANSPOSED, RESHAPE};
std::stack<NodePtr> nodes;
nodes.push(node);
std::set<NodePtr> visited;
while (!nodes.empty()) {
NodePtr cur_node = nodes.top();
nodes.pop();
if (visited.count(cur_node) > 0) {
continue;
}
GELOGD("branch_head_node %s of stream_switch %s.", cur_node->GetName().c_str(), stream_switch->GetName().c_str());
branch_head_nodes_[cur_node] = stream_switch;
if (bypass_type_set.count(cur_node->GetType()) > 0) {
for (const auto &out_node : cur_node->GetOutAllNodes()) {
nodes.push(out_node);
}
}
visited.insert(cur_node);
}
}

///
/// @brief update cond branch
/// @param [in] node
/// @return Status
///
Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) {
std::string stream_label;
std::unordered_set<NodePtr> branch_nodes;
std::unordered_set<NodePtr> visited;
std::stack<NodePtr> nodes;
nodes.push(node);

static const std::set<std::string> end_type_set = {STREAMSWITCH, STREAMMERGE, MERGE};
bool merge_flag = false;
bool exit_flag = false;
bool net_output_flag = false;
while (!nodes.empty()) {
NodePtr cur_node = nodes.top();
nodes.pop();
if (visited.count(cur_node) > 0) {
continue;
}
if (AttachFlag(cur_node, stream_label, merge_flag, exit_flag, net_output_flag) != SUCCESS) {
GELOGE(FAILED, "Attach flag for node %s failed.", cur_node->GetName().c_str());
return FAILED;
}

const std::string &type = cur_node->GetType();
for (const auto &out_node : cur_node->GetOutAllNodes()) {
const std::string &out_type = out_node->GetType();
bool stop_flag = (end_type_set.count(out_type) > 0) ||
((branch_head_nodes_.count(out_node) > 0) && (branch_head_nodes_[out_node] != node)) ||
(((type == ENTER) || (type == REFENTER)) && (out_type != STREAMACTIVE));
if (!stop_flag) {
nodes.push(out_node);
GELOGD("Insert branch node %s.", out_node->GetName().c_str());
branch_nodes.insert(out_node);
}
}
visited.insert(cur_node);
}

if (node->GetType() == STREAMSWITCH) {
GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed.");
}

bool attach_flag = (merge_flag || exit_flag) && net_output_flag;
if (attach_flag) {
GELOGI("No need to keep on attaching label.");
return SUCCESS;
}

for (const NodePtr &tmp_node : branch_nodes) {
GELOGD("Attach label %s to node: %s.", stream_label.c_str(), tmp_node->GetName().c_str());
GE_CHK_STATUS_RET(SetStreamLabel(tmp_node, stream_label), "Set stream label failed.");
}

return SUCCESS;
}

///
/// @brief attach flag
/// @param [in] node
/// @param [out] stream_label
/// @param [out] merge_flag
/// @param [out] exit_flag
/// @param [out] net_output_flag
/// @return Status
///
Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag,
bool &exit_flag, bool &net_output_flag) {
const std::string &type = node->GetType();
if (type == STREAMSWITCH) {
if (node->GetInDataNodes().empty()) {
GELOGE(INTERNAL_ERROR, "node %s has no input_data_node.", node->GetName().c_str());
return INTERNAL_ERROR;
}
stream_label = node->GetInDataNodes().at(0)->GetName();
GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed.");
bool value = false;
OpDescPtr op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
GE_CHK_BOOL_EXEC(AttrUtils::GetBool(op_desc, ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG, value), return FAILED,
"StreamSwitch get attr TRUE_BRANCH_STREAM failed.");
stream_label += (value ? "_t" : "_f");
} else if (type == STREAMMERGE) {
stream_label = node->GetName();
GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed.");
merge_flag = true;
} else if ((type == EXIT) || (type == REFEXIT)) {
GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed.");
exit_flag = true;
} else if (type == NETOUTPUT) {
net_output_flag = true;
}

return SUCCESS;
}

///
/// @brief Update stream_label start with enter nodes
/// @return Status
///
Status AttachStreamLabelPass::UpdateEnterNode() {
std::unordered_map<NodePtr, std::vector<NodePtr>> enter_active_map;
for (const auto &enter_node : enter_nodes_) {
for (const auto &out_ctrl_node : enter_node->GetOutControlNodes()) {
if (out_ctrl_node->GetType() != STREAMACTIVE) {
continue;
}
auto iter = enter_active_map.find(out_ctrl_node);
if (iter == enter_active_map.end()) {
enter_active_map[out_ctrl_node] = {enter_node};
} else {
iter->second.emplace_back(enter_node);
}
}
}

for (const auto &pair : enter_active_map) {
if (SetEnterLabel(pair.second, pair.first) != SUCCESS) {
GELOGE(FAILED, "Set stream_label for enter_nodes failed.");
return FAILED;
}

NodePtr active_node = pair.first;
GE_CHECK_NOTNULL(active_node);
std::vector<std::string> active_label_list;
if (!AttrUtils::GetListStr(active_node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, active_label_list) ||
(active_label_list.size() != 1) || active_label_list[0].empty()) {
GELOGE(INTERNAL_ERROR, "Get attr ATTR_NAME_ACTIVE_LABEL_LIST failed, node: %s.", active_node->GetName().c_str());
return INTERNAL_ERROR;
}

std::stack<NodePtr> enter_nodes;
for (const auto &enter_node : pair.second) {
enter_nodes.emplace(enter_node);
}
if (UpdateLoopBranch(enter_nodes, active_label_list[0]) != SUCCESS) {
GELOGE(FAILED, "Update stream_label for loop_branch failed.");
return FAILED;
}
}

return SUCCESS;
}

///
/// @brief Set stream_label for enter_nodes
/// @param [in] enter_nodes
/// @param [in] active_node
/// @return Status
///
Status AttachStreamLabelPass::SetEnterLabel(const std::vector<NodePtr> &enter_nodes, const NodePtr &active_node) {
std::string stream_label;
GE_CHECK_NOTNULL(active_node);
(void)AttrUtils::GetStr(active_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label);

bool same_flag = true;
for (const auto &enter_node : enter_nodes) {
std::string tmp_label;
(void)AttrUtils::GetStr(enter_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, tmp_label);
if (tmp_label.empty() || (stream_label == tmp_label)) {
continue;
}
same_flag = false;
break;
}

if (stream_label.empty()) {
if (same_flag) {
stream_label = active_node->GetName();
} else {
GELOGW("stream_label of enter_active is empty while stream_label of some enter_node is not.");
return SUCCESS;
}
}

for (const auto &enter_node : enter_nodes) {
GE_CHK_STATUS_RET(SetStreamLabel(enter_node, stream_label), "Set stream label failed.");
}
GE_CHK_STATUS_RET(SetStreamLabel(active_node, stream_label), "Set stream label failed.");
return SUCCESS;
}

///
/// @brief Update stream_label for loop_branch
/// @param [in] enter_nodes
/// @param [in] stream_label
/// @return Status
///
Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack<NodePtr> &enter_nodes,
const std::string &stream_label) {
std::stack<NodePtr> nodes(enter_nodes);
NodePtr cur_node = nullptr;
while (!nodes.empty()) {
cur_node = nodes.top();
nodes.pop();
for (const NodePtr &out_node : cur_node->GetOutAllNodes()) {
OpDescPtr out_desc = out_node->GetOpDesc();
GE_CHECK_NOTNULL(out_desc);
std::string out_type = out_desc->GetType();
if (out_desc->HasAttr(ATTR_NAME_STREAM_LABEL) || (out_type == ENTER) || (out_type == REFENTER)) {
continue;
}
GELOGD("Attach label %s to node: %s.", stream_label.c_str(), out_node->GetName().c_str());
GE_CHK_STATUS_RET(SetStreamLabel(out_node, stream_label), "Set stream label failed.");
nodes.push(out_node);
}
}
return SUCCESS;
}
} // namespace ge

+ 0
- 97
src/ge/graph/passes/attach_stream_label_pass.h View File

@@ -1,97 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_PASSES_ATTACH_STREAM_LABEL_PASS_H_
#define GE_GRAPH_PASSES_ATTACH_STREAM_LABEL_PASS_H_

#include <stack>
#include "inc/graph_pass.h"

namespace ge {
class AttachStreamLabelPass : public GraphPass {
public:
Status Run(ComputeGraphPtr graph);

///
/// @brief Clear Status, used for subgraph pass
/// @return
///
Status ClearStatus() override;

private:
///
/// @brief Find StreamSwitch / StreamMerge / Enter node
/// @param [in] graph
/// @return void
///
void FindNodes(const ComputeGraphPtr &graph);

///
/// @brief Mark node as head_node of stream_switch
/// @param [in] node
/// @param [in] stream_switch
/// @return void
///
void MarkHeadNodes(const NodePtr &node, const NodePtr &stream_switch);

///
/// @brief update cond branch
/// @param [in] node
/// @return Status
///
Status UpdateCondBranch(const NodePtr &node);

///
/// @brief attach flag
/// @param [in] node
/// @param [out] stream_label
/// @param [out] merge_flag
/// @param [out] exit_flag
/// @param [out] net_output_flag
/// @return Status
///
static Status AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, bool &exit_flag,
bool &net_output_flag);

///
/// @brief Update stream_label for loop_branch
/// @param [in] enter_nodes
/// @param [in] stream_label
/// @return Status
///
static Status UpdateLoopBranch(const std::stack<NodePtr> &enter_nodes, const std::string &stream_label);

///
/// @brief Update stream_label start with enter nodes
/// @return Status
///
Status UpdateEnterNode();

///
/// @brief Set stream_label for enter_nodes
/// @param [in] enter_nodes
/// @param [in] active_node
/// @return Status
///
static Status SetEnterLabel(const std::vector<NodePtr> &enter_nodes, const NodePtr &active_node);

std::vector<NodePtr> stream_switch_nodes_;
std::vector<NodePtr> need_label_nodes_;
std::vector<NodePtr> enter_nodes_;
std::unordered_map<NodePtr, NodePtr> branch_head_nodes_;
};
} // namespace ge
#endif // GE_GRAPH_PASSES_ATTACH_STREAM_LABEL_PASS_H_

+ 1
- 0
src/ge/graph/passes/cast_remove_pass.cc View File

@@ -69,6 +69,7 @@ bool CastRemovePass::HasSameDataType(OpDescPtr &begin_op_desc, OpDescPtr &end_op


auto begin_out_desc = begin_op_desc->MutableOutputDesc(0); auto begin_out_desc = begin_op_desc->MutableOutputDesc(0);
DataType begin_out_datatype = begin_out_desc->GetDataType(); DataType begin_out_datatype = begin_out_desc->GetDataType();

if (begin_out_datatype == end_out_datatype && (begin_out_datatype == DT_FLOAT16 || begin_out_datatype == DT_FLOAT)) { if (begin_out_datatype == end_out_datatype && (begin_out_datatype == DT_FLOAT16 || begin_out_datatype == DT_FLOAT)) {
type = begin_out_datatype; type = begin_out_datatype;
return true; return true;


+ 0
- 1
src/ge/graph/passes/common_subexpression_elimination_pass.cc View File

@@ -83,7 +83,6 @@ Status CommonSubexpressionEliminationPass::Run(ComputeGraphPtr graph) {
continue; continue;
} }
auto key = GetCseKey(node); auto key = GetCseKey(node);
GELOGD("The node %s cse key %s", node->GetName().c_str(), key.c_str());
auto iter = keys_to_node.find(key); auto iter = keys_to_node.find(key);
if (iter == keys_to_node.end()) { if (iter == keys_to_node.end()) {
keys_to_node[key] = node; keys_to_node[key] = node;


+ 2
- 2
src/ge/graph/passes/compile_nodes_pass.cc View File

@@ -23,7 +23,6 @@
#include "common/ge_inner_error_codes.h" #include "common/ge_inner_error_codes.h"
#include "framework/common/debug/ge_log.h" #include "framework/common/debug/ge_log.h"
#include "graph/debug/ge_attr_define.h" #include "graph/debug/ge_attr_define.h"
#include "graph/common/ge_call_wrapper.h"
#include "graph/op_desc.h" #include "graph/op_desc.h"


using domi::ImplyType; using domi::ImplyType;
@@ -79,7 +78,7 @@ graphStatus CompileNodesPass::Run(ComputeGraphPtr graph) {
return result; return result;
} }
GELOGI("[CompileNodesPass]: Optimize success."); GELOGI("[CompileNodesPass]: Optimize success.");
GE_TIMESTAMP_EVENT_END(CompileNodesPass, "OptimizeStage2::ControlAttrOptimize::CompileNodesPass");
GE_TIMESTAMP_END(CompileNodesPass, "GraphManager::CompileNodesPass");
return GRAPH_SUCCESS; return GRAPH_SUCCESS;
} }


@@ -102,6 +101,7 @@ graphStatus CompileNodesPass::GetSupportedKernel(const NodePtr &node, const std:
} }
} }
OpsKernelInfoStorePtr kernel_info = instance->OpsKernelManagerObj().GetOpsKernelInfoStore(kernel_lib_name); OpsKernelInfoStorePtr kernel_info = instance->OpsKernelManagerObj().GetOpsKernelInfoStore(kernel_lib_name);

if (kernel_info == nullptr) { if (kernel_info == nullptr) {
GELOGE(ge::GE_GRAPH_PARAM_NULLPTR, "Get op %s ops kernel info store failed", node->GetName().c_str()); GELOGE(ge::GE_GRAPH_PARAM_NULLPTR, "Get op %s ops kernel info store failed", node->GetName().c_str());
return ge::GE_GRAPH_PARAM_NULLPTR; return ge::GE_GRAPH_PARAM_NULLPTR;


+ 2
- 2
src/ge/graph/passes/cond_pass.cc View File

@@ -226,7 +226,7 @@ Status CondPass::HandleScalarCond(const ComputeGraphPtr &graph, const OutDataAnc
return FAILED; return FAILED;
} }


if (GraphUtils::InsertNodeAfter(out_anchor, {in_anchor}, cast_node) != GRAPH_SUCCESS) {
if (GraphUtils::InsertNodeBefore(out_anchor, {in_anchor}, cast_node) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Insert Cast node %s between %s->%s failed.", cast_node->GetName().c_str(), GELOGE(FAILED, "Insert Cast node %s between %s->%s failed.", cast_node->GetName().c_str(),
out_anchor->GetOwnerNode()->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); out_anchor->GetOwnerNode()->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());
return FAILED; return FAILED;
@@ -271,7 +271,7 @@ Status CondPass::InsertNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr
} }
AddRePassNode(new_node); AddRePassNode(new_node);


if (GraphUtils::InsertNodeAfter(out_anchor, {in_anchor}, new_node) != GRAPH_SUCCESS) {
if (GraphUtils::InsertNodeBefore(out_anchor, {in_anchor}, new_node) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Insert %s node %s between %s->%s failed.", type.c_str(), new_node->GetName().c_str(), GELOGE(FAILED, "Insert %s node %s between %s->%s failed.", type.c_str(), new_node->GetName().c_str(),
out_anchor->GetOwnerNode()->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); out_anchor->GetOwnerNode()->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());
return FAILED; return FAILED;


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save