| @@ -122,9 +122,8 @@ Status ModelHelper::SaveModelTbeKernel(std::shared_ptr<OmFileSaveHelper> &om_fil | |||||
| if (tbe_kernel_store.DataSize() > 0) { | if (tbe_kernel_store.DataSize() > 0) { | ||||
| GE_CHK_STATUS_RET( | GE_CHK_STATUS_RET( | ||||
| SaveModelPartition(om_file_save_helper, ModelPartitionType::TBE_KERNELS, | SaveModelPartition(om_file_save_helper, ModelPartitionType::TBE_KERNELS, | ||||
| ge_model->GetTBEKernelStore().Data(), | |||||
| ge_model->GetTBEKernelStore().DataSize(), model_index), | |||||
| "Add tbe kernel partition failed"); | |||||
| ge_model->GetTBEKernelStore().Data(), ge_model->GetTBEKernelStore().DataSize(), | |||||
| model_index), "Add tbe kernel partition failed"); | |||||
| } | } | ||||
| // no need to check value, DATA->NetOutput | // no need to check value, DATA->NetOutput | ||||
| (void)tbe_kernel_store.Load(tbe_kernel_store.Data(), tbe_kernel_store.DataSize()); | (void)tbe_kernel_store.Load(tbe_kernel_store.Data(), tbe_kernel_store.DataSize()); | ||||
| @@ -21,7 +21,7 @@ | |||||
| namespace { | namespace { | ||||
| const uint32_t kRangeCeilInterval = 2; | const uint32_t kRangeCeilInterval = 2; | ||||
| const uint32_t kLogBase = 2; | const uint32_t kLogBase = 2; | ||||
| const int64_t kLargeBlockSize = 8 * 1024 * 1024; // 8M | |||||
| const int64_t kLargeBlockSize = 8388608; // 8 * 1024 * 1024 | |||||
| const int64_t kLargeBlockRangeSize = 2; | const int64_t kLargeBlockRangeSize = 2; | ||||
| } // namespace | } // namespace | ||||
| @@ -83,7 +83,7 @@ const uint32_t kAddrLen = sizeof(void *); | |||||
| const int kDecimal = 10; | const int kDecimal = 10; | ||||
| const int kBytes = 8; | const int kBytes = 8; | ||||
| const uint32_t kDataMemAlignSizeCompare = 64; | const uint32_t kDataMemAlignSizeCompare = 64; | ||||
| const uint32_t kDumpL1FusionOpMByteSize = 2 * 1024 * 1024; // 2M | |||||
| const uint32_t kDumpL1FusionOpMByteSize = 2097152; // 2 * 1024 * 1024 | |||||
| const uint32_t kDumpFlagOfL1Fusion = 0; | const uint32_t kDumpFlagOfL1Fusion = 0; | ||||
| const char *const kDefaultBatchLable = "Batch_default"; | const char *const kDefaultBatchLable = "Batch_default"; | ||||
| const char *const kGetDynamicDimsName = "ascend_mbatch_get_dynamic_dims_node"; | const char *const kGetDynamicDimsName = "ascend_mbatch_get_dynamic_dims_node"; | ||||
| @@ -28,8 +28,8 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { | |||||
| rtError_t rt_ret = rtMalloc(reinterpret_cast<void **>(&device_args_addr_), sizeof(args), RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(reinterpret_cast<void **>(&device_args_addr_), sizeof(args), RT_MEMORY_HBM); | ||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret);) | return RT_ERROR_TO_GE_STATUS(rt_ret);) | ||||
| rt_ret = rtMemcpy(reinterpret_cast<void *>(device_args_addr_), sizeof(args), (void *)args, sizeof(args), | |||||
| RT_MEMCPY_HOST_TO_DEVICE); | |||||
| rt_ret = rtMemcpy(reinterpret_cast<void *>(device_args_addr_), sizeof(args), reinterpret_cast<void *>(args), | |||||
| sizeof(args), RT_MEMCPY_HOST_TO_DEVICE); | |||||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret); | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret); | ||||
| return RT_ERROR_TO_GE_STATUS(rt_ret);) | return RT_ERROR_TO_GE_STATUS(rt_ret);) | ||||
| rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream, | rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream, | ||||
| @@ -533,9 +533,8 @@ Status GraphManager::CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_gr | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, | |||||
| Graph2SubGraphInfoList &sub_graph_map, | |||||
| uint64_t session_id) { | |||||
| Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, | |||||
| Graph2SubGraphInfoList &sub_graph_map, uint64_t session_id) { | |||||
| GE_CHECK_NOTNULL(compute_graph); | GE_CHECK_NOTNULL(compute_graph); | ||||
| // use default 16 multi thread | // use default 16 multi thread | ||||
| const uint32_t thread_num = 16; | const uint32_t thread_num = 16; | ||||
| @@ -549,12 +548,8 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||||
| if (!op_compile_strategy.empty()) { | if (!op_compile_strategy.empty()) { | ||||
| (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | ||||
| } | } | ||||
| std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, | |||||
| this, | |||||
| compute_graph->GetGraphID(), | |||||
| subgraph, | |||||
| compute_graph, | |||||
| session_id, | |||||
| std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | |||||
| compute_graph->GetGraphID(), subgraph, compute_graph, session_id, | |||||
| GetThreadLocalContext()); | GetThreadLocalContext()); | ||||
| if (!f.valid()) { | if (!f.valid()) { | ||||
| GELOGE(FAILED, "Future is invalid"); | GELOGE(FAILED, "Future is invalid"); | ||||
| @@ -562,7 +557,6 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||||
| } | } | ||||
| vector_future.emplace_back(std::move(f)); | vector_future.emplace_back(std::move(f)); | ||||
| } | } | ||||
| for (auto &function_graph : compute_graph->GetAllSubgraphs()) { | for (auto &function_graph : compute_graph->GetAllSubgraphs()) { | ||||
| auto subgraph_list = sub_graph_map[function_graph]; | auto subgraph_list = sub_graph_map[function_graph]; | ||||
| for (const auto &subgraph : subgraph_list) { | for (const auto &subgraph : subgraph_list) { | ||||
| @@ -32,7 +32,8 @@ Debug::~Debug() = default; | |||||
| void Debug::DumpProto(const Message &proto, const char *file) { | void Debug::DumpProto(const Message &proto, const char *file) { | ||||
| std::string file_path = RealPath(file); | std::string file_path = RealPath(file); | ||||
| int fd = mmOpen2(file_path.c_str(), M_WRONLY | M_CREAT | O_TRUNC, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD | M_UMASK_OTHREAD); | |||||
| int fd = mmOpen2(file_path.c_str(), M_WRONLY | M_CREAT | O_TRUNC, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD | | |||||
| M_UMASK_OTHREAD); | |||||
| if (fd == -1) { | if (fd == -1) { | ||||
| GELOGW("Write %s failed", file_path.c_str()); | GELOGW("Write %s failed", file_path.c_str()); | ||||
| return; | return; | ||||
| @@ -93,7 +93,8 @@ bool AtomicAddrCleanPass::CheckAtomicFromOpsKernel(const NodePtr &node) { | |||||
| in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) { | in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) { | ||||
| auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode(); | auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode(); | ||||
| if (peer_in_node->GetType() == DATA) { | if (peer_in_node->GetType() == DATA) { | ||||
| GELOGI("Recognized atomic op %s from %s engine and input is DATA.", node->GetName().c_str(), op_info.engine.c_str()); | |||||
| GELOGI("Recognized atomic op %s from %s engine and input is DATA.", node->GetName().c_str(), | |||||
| op_info.engine.c_str()); | |||||
| return false; | return false; | ||||
| } | } | ||||
| } | } | ||||
| @@ -100,8 +100,8 @@ class SsdPriorboxKernel : public Kernel { | |||||
| * @return OTHERS: Execution failed | * @return OTHERS: Execution failed | ||||
| * @author | * @author | ||||
| */ | */ | ||||
| Status GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uint32_t min_sizes_size, uint32_t max_sizes_size, int layer_width, | |||||
| int layer_height, int &num_priors, int &dim_size) const; | |||||
| Status GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uint32_t min_sizes_size, uint32_t max_sizes_size, | |||||
| int layer_width, int layer_height, int &num_priors, int &dim_size) const; | |||||
| void DataCalulate(float x, float y, float box_x, float box_y, int img_x, int img_y, vector<float> &result); | void DataCalulate(float x, float y, float box_x, float box_y, int img_x, int img_y, vector<float> &result); | ||||
| std::unique_ptr<float[]> BoundaryCalulate(int dim_size, int layer_width, int layer_height, float step_width, | std::unique_ptr<float[]> BoundaryCalulate(int dim_size, int layer_width, int layer_height, float step_width, | ||||
| float step_height, int img_width, int img_height, float offset, | float step_height, int img_width, int img_height, float offset, | ||||
| @@ -57,7 +57,8 @@ struct GraphExecutionContext { | |||||
| do { \ | do { \ | ||||
| if ((context != nullptr) && (context)->profiler != nullptr) { \ | if ((context != nullptr) && (context)->profiler != nullptr) { \ | ||||
| if (node_name != nullptr) { \ | if (node_name != nullptr) { \ | ||||
| context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GeLog::GetTid(), node_name, category, ##__VA_ARGS__);\ | |||||
| context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GeLog::GetTid(), node_name, category, \ | |||||
| ##__VA_ARGS__); \ | |||||
| } else { \ | } else { \ | ||||
| context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GeLog::GetTid(), category, ##__VA_ARGS__); \ | context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GeLog::GetTid(), category, ##__VA_ARGS__); \ | ||||
| }\ | }\ | ||||
| @@ -384,8 +384,8 @@ Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs, | |||||
| i, outputs[i].length, output_real_size); | i, outputs[i].length, output_real_size); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, | |||||
| args.outputs[i].GetData(), output_real_size, RT_MEMCPY_DEVICE_TO_DEVICE)); | |||||
| GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, | |||||
| RT_MEMCPY_DEVICE_TO_DEVICE)); | |||||
| } | } | ||||
| outputs[i].length = output_real_size; | outputs[i].length = output_real_size; | ||||
| } | } | ||||
| @@ -176,7 +176,7 @@ Status OpsKernelManager::ParsePluginOptions(const map<string, string> &options, | |||||
| enable_flag = true; | enable_flag = true; | ||||
| } else { | } else { | ||||
| GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", | GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", | ||||
| plugin_name.c_str(), iter->second.c_str()); | |||||
| plugin_name.c_str(), iter->second.c_str()); | |||||
| return GE_GRAPH_OPTIONS_INVALID; | return GE_GRAPH_OPTIONS_INVALID; | ||||
| } | } | ||||
| } catch (std::invalid_argument &) { | } catch (std::invalid_argument &) { | ||||
| @@ -24,9 +24,9 @@ | |||||
| #include "graph/ge_error_codes.h" | #include "graph/ge_error_codes.h" | ||||
| namespace { | namespace { | ||||
| #define IR_MAJOR_VERSION (int(1)) | |||||
| #define IR_MINOR_VERSION (int(0)) | |||||
| #define IR_PATCH_VERSION (int(0)) | |||||
| const int IR_MAJOR_VERSION = 1; | |||||
| cosnt int IR_MINOR_VERSION = 0; | |||||
| const int IR_PATCH_VERSION = 0; | |||||
| } // namespace | } // namespace | ||||
| namespace ge { | namespace ge { | ||||