@@ -122,9 +122,8 @@ Status ModelHelper::SaveModelTbeKernel(std::shared_ptr<OmFileSaveHelper> &om_fil | |||||
if (tbe_kernel_store.DataSize() > 0) { | if (tbe_kernel_store.DataSize() > 0) { | ||||
GE_CHK_STATUS_RET( | GE_CHK_STATUS_RET( | ||||
SaveModelPartition(om_file_save_helper, ModelPartitionType::TBE_KERNELS, | SaveModelPartition(om_file_save_helper, ModelPartitionType::TBE_KERNELS, | ||||
ge_model->GetTBEKernelStore().Data(), | |||||
ge_model->GetTBEKernelStore().DataSize(), model_index), | |||||
"Add tbe kernel partition failed"); | |||||
ge_model->GetTBEKernelStore().Data(), ge_model->GetTBEKernelStore().DataSize(), | |||||
model_index), "Add tbe kernel partition failed"); | |||||
} | } | ||||
// no need to check value, DATA->NetOutput | // no need to check value, DATA->NetOutput | ||||
(void)tbe_kernel_store.Load(tbe_kernel_store.Data(), tbe_kernel_store.DataSize()); | (void)tbe_kernel_store.Load(tbe_kernel_store.Data(), tbe_kernel_store.DataSize()); | ||||
@@ -21,7 +21,7 @@ | |||||
namespace { | namespace { | ||||
const uint32_t kRangeCeilInterval = 2; | const uint32_t kRangeCeilInterval = 2; | ||||
const uint32_t kLogBase = 2; | const uint32_t kLogBase = 2; | ||||
const int64_t kLargeBlockSize = 8 * 1024 * 1024; // 8M | |||||
const int64_t kLargeBlockSize = 8388608; // 8 * 1024 * 1024 | |||||
const int64_t kLargeBlockRangeSize = 2; | const int64_t kLargeBlockRangeSize = 2; | ||||
} // namespace | } // namespace | ||||
@@ -83,7 +83,7 @@ const uint32_t kAddrLen = sizeof(void *); | |||||
const int kDecimal = 10; | const int kDecimal = 10; | ||||
const int kBytes = 8; | const int kBytes = 8; | ||||
const uint32_t kDataMemAlignSizeCompare = 64; | const uint32_t kDataMemAlignSizeCompare = 64; | ||||
const uint32_t kDumpL1FusionOpMByteSize = 2 * 1024 * 1024; // 2M | |||||
const uint32_t kDumpL1FusionOpMByteSize = 2097152; // 2 * 1024 * 1024 | |||||
const uint32_t kDumpFlagOfL1Fusion = 0; | const uint32_t kDumpFlagOfL1Fusion = 0; | ||||
const char *const kDefaultBatchLable = "Batch_default"; | const char *const kDefaultBatchLable = "Batch_default"; | ||||
const char *const kGetDynamicDimsName = "ascend_mbatch_get_dynamic_dims_node"; | const char *const kGetDynamicDimsName = "ascend_mbatch_get_dynamic_dims_node"; | ||||
@@ -28,8 +28,8 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { | |||||
rtError_t rt_ret = rtMalloc(reinterpret_cast<void **>(&device_args_addr_), sizeof(args), RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(reinterpret_cast<void **>(&device_args_addr_), sizeof(args), RT_MEMORY_HBM); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | return RT_ERROR_TO_GE_STATUS(rt_ret);) | ||||
rt_ret = rtMemcpy(reinterpret_cast<void *>(device_args_addr_), sizeof(args), (void *)args, sizeof(args), | |||||
RT_MEMCPY_HOST_TO_DEVICE); | |||||
rt_ret = rtMemcpy(reinterpret_cast<void *>(device_args_addr_), sizeof(args), reinterpret_cast<void *>(args), | |||||
sizeof(args), RT_MEMCPY_HOST_TO_DEVICE); | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret); | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | return RT_ERROR_TO_GE_STATUS(rt_ret);) | ||||
rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream, | rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream, | ||||
@@ -533,9 +533,8 @@ Status GraphManager::CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_gr | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, | |||||
Graph2SubGraphInfoList &sub_graph_map, | |||||
uint64_t session_id) { | |||||
Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, | |||||
Graph2SubGraphInfoList &sub_graph_map, uint64_t session_id) { | |||||
GE_CHECK_NOTNULL(compute_graph); | GE_CHECK_NOTNULL(compute_graph); | ||||
// use default 16 multi thread | // use default 16 multi thread | ||||
const uint32_t thread_num = 16; | const uint32_t thread_num = 16; | ||||
@@ -549,12 +548,8 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||||
if (!op_compile_strategy.empty()) { | if (!op_compile_strategy.empty()) { | ||||
(void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | ||||
} | } | ||||
std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, | |||||
this, | |||||
compute_graph->GetGraphID(), | |||||
subgraph, | |||||
compute_graph, | |||||
session_id, | |||||
std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | |||||
compute_graph->GetGraphID(), subgraph, compute_graph, session_id, | |||||
GetThreadLocalContext()); | GetThreadLocalContext()); | ||||
if (!f.valid()) { | if (!f.valid()) { | ||||
GELOGE(FAILED, "Future is invalid"); | GELOGE(FAILED, "Future is invalid"); | ||||
@@ -562,7 +557,6 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||||
} | } | ||||
vector_future.emplace_back(std::move(f)); | vector_future.emplace_back(std::move(f)); | ||||
} | } | ||||
for (auto &function_graph : compute_graph->GetAllSubgraphs()) { | for (auto &function_graph : compute_graph->GetAllSubgraphs()) { | ||||
auto subgraph_list = sub_graph_map[function_graph]; | auto subgraph_list = sub_graph_map[function_graph]; | ||||
for (const auto &subgraph : subgraph_list) { | for (const auto &subgraph : subgraph_list) { | ||||
@@ -32,7 +32,8 @@ Debug::~Debug() = default; | |||||
void Debug::DumpProto(const Message &proto, const char *file) { | void Debug::DumpProto(const Message &proto, const char *file) { | ||||
std::string file_path = RealPath(file); | std::string file_path = RealPath(file); | ||||
int fd = mmOpen2(file_path.c_str(), M_WRONLY | M_CREAT | O_TRUNC, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD | M_UMASK_OTHREAD); | |||||
int fd = mmOpen2(file_path.c_str(), M_WRONLY | M_CREAT | O_TRUNC, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD | | |||||
M_UMASK_OTHREAD); | |||||
if (fd == -1) { | if (fd == -1) { | ||||
GELOGW("Write %s failed", file_path.c_str()); | GELOGW("Write %s failed", file_path.c_str()); | ||||
return; | return; | ||||
@@ -93,7 +93,8 @@ bool AtomicAddrCleanPass::CheckAtomicFromOpsKernel(const NodePtr &node) { | |||||
in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) { | in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) { | ||||
auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode(); | auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode(); | ||||
if (peer_in_node->GetType() == DATA) { | if (peer_in_node->GetType() == DATA) { | ||||
GELOGI("Recognized atomic op %s from %s engine and input is DATA.", node->GetName().c_str(), op_info.engine.c_str()); | |||||
GELOGI("Recognized atomic op %s from %s engine and input is DATA.", node->GetName().c_str(), | |||||
op_info.engine.c_str()); | |||||
return false; | return false; | ||||
} | } | ||||
} | } | ||||
@@ -100,8 +100,8 @@ class SsdPriorboxKernel : public Kernel { | |||||
* @return OTHERS: Execution failed | * @return OTHERS: Execution failed | ||||
* @author | * @author | ||||
*/ | */ | ||||
Status GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uint32_t min_sizes_size, uint32_t max_sizes_size, int layer_width, | |||||
int layer_height, int &num_priors, int &dim_size) const; | |||||
Status GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uint32_t min_sizes_size, uint32_t max_sizes_size, | |||||
int layer_width, int layer_height, int &num_priors, int &dim_size) const; | |||||
void DataCalulate(float x, float y, float box_x, float box_y, int img_x, int img_y, vector<float> &result); | void DataCalulate(float x, float y, float box_x, float box_y, int img_x, int img_y, vector<float> &result); | ||||
std::unique_ptr<float[]> BoundaryCalulate(int dim_size, int layer_width, int layer_height, float step_width, | std::unique_ptr<float[]> BoundaryCalulate(int dim_size, int layer_width, int layer_height, float step_width, | ||||
float step_height, int img_width, int img_height, float offset, | float step_height, int img_width, int img_height, float offset, | ||||
@@ -57,7 +57,8 @@ struct GraphExecutionContext { | |||||
do { \ | do { \ | ||||
if ((context != nullptr) && (context)->profiler != nullptr) { \ | if ((context != nullptr) && (context)->profiler != nullptr) { \ | ||||
if (node_name != nullptr) { \ | if (node_name != nullptr) { \ | ||||
context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GeLog::GetTid(), node_name, category, ##__VA_ARGS__);\ | |||||
context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GeLog::GetTid(), node_name, category, \ | |||||
##__VA_ARGS__); \ | |||||
} else { \ | } else { \ | ||||
context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GeLog::GetTid(), category, ##__VA_ARGS__); \ | context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GeLog::GetTid(), category, ##__VA_ARGS__); \ | ||||
}\ | }\ | ||||
@@ -384,8 +384,8 @@ Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs, | |||||
i, outputs[i].length, output_real_size); | i, outputs[i].length, output_real_size); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, | |||||
args.outputs[i].GetData(), output_real_size, RT_MEMCPY_DEVICE_TO_DEVICE)); | |||||
GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, | |||||
RT_MEMCPY_DEVICE_TO_DEVICE)); | |||||
} | } | ||||
outputs[i].length = output_real_size; | outputs[i].length = output_real_size; | ||||
} | } | ||||
@@ -176,7 +176,7 @@ Status OpsKernelManager::ParsePluginOptions(const map<string, string> &options, | |||||
enable_flag = true; | enable_flag = true; | ||||
} else { | } else { | ||||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", | GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", | ||||
plugin_name.c_str(), iter->second.c_str()); | |||||
plugin_name.c_str(), iter->second.c_str()); | |||||
return GE_GRAPH_OPTIONS_INVALID; | return GE_GRAPH_OPTIONS_INVALID; | ||||
} | } | ||||
} catch (std::invalid_argument &) { | } catch (std::invalid_argument &) { | ||||
@@ -24,9 +24,9 @@ | |||||
#include "graph/ge_error_codes.h" | #include "graph/ge_error_codes.h" | ||||
namespace { | namespace { | ||||
#define IR_MAJOR_VERSION (int(1)) | |||||
#define IR_MINOR_VERSION (int(0)) | |||||
#define IR_PATCH_VERSION (int(0)) | |||||
const int IR_MAJOR_VERSION = 1; | |||||
cosnt int IR_MINOR_VERSION = 0; | |||||
const int IR_PATCH_VERSION = 0; | |||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||