| @@ -6,17 +6,17 @@ | |||
| "net_name": "ResNet50", | |||
| "mode": 0, | |||
| "iteration": 0, | |||
| "kernels": ["TensorAdd"] | |||
| "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] | |||
| }, | |||
| "DumpSettingsSpec": { | |||
| "enable": "true: dump enable false: dump disable", | |||
| "trans_flag": "true: trans to host format,false: not trans format", | |||
| "enable": "true: dump enable, false: dump disable", | |||
| "trans_flag": "true: trans to host format, false: not trans format", | |||
| "path": "the dump file folder", | |||
| "net_name": "net name eg:ResNet50", | |||
| "mode": "0: dump all kernels 1: dump kernels in kernels list", | |||
| "iteration": "0: all iteration others: specified iteration ", | |||
| "kernels": "kernel name list need to be dump" | |||
| "mode": "0: dump all kernels, 1: dump kernels in kernels list", | |||
| "iteration": "0: all iteration, others: specified iteration ", | |||
| "kernels": "op's full scope name which need to be dump" | |||
| }, | |||
| "other": {} | |||
| } | |||
| @@ -6,17 +6,17 @@ | |||
| "net_name": "ResNet50", | |||
| "mode": 0, | |||
| "iteration": 0, | |||
| "kernels": ["AllReduce","BiasAddGrad","Conv2DBackpropFilter","SparseSoftmaxCrossEntropyWithLogits"] | |||
| "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] | |||
| }, | |||
| "DumpSettingsSpec": { | |||
| "enable": "true: dump enable false: dump disable", | |||
| "trans_flag": "true: trans to host format,false: not trans format", | |||
| "enable": "true: dump enable, false: dump disable", | |||
| "trans_flag": "true: trans to host format, false: not trans format", | |||
| "path": "the dump file folder", | |||
| "net_name": "net name eg:ResNet50", | |||
| "mode": "0: dump all kernels 1: dump kernels in kernels list", | |||
| "iteration": "0: all iteration others: specified iteration ", | |||
| "kernels": "kernel name list need to be dump" | |||
| "mode": "0: dump all kernels, 1: dump kernels in kernels list", | |||
| "iteration": "0: all iteration, others: specified iteration ", | |||
| "kernels": "op's full scope name which need to be dump" | |||
| }, | |||
| "other": {} | |||
| } | |||
| } | |||
| @@ -6,17 +6,17 @@ | |||
| "net_name": "ResNet50", | |||
| "mode": 0, | |||
| "iteration": 0, | |||
| "kernels": ["AllReduce","BiasAddGrad","Conv2DBackpropFilter","SparseSoftmaxCrossEntropyWithLogits"] | |||
| "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] | |||
| }, | |||
| "DumpSettingsSpec": { | |||
| "enable": "true: dump enable false: dump disable", | |||
| "trans_flag": "true: trans to host format,false: not trans format", | |||
| "enable": "true: dump enable, false: dump disable", | |||
| "trans_flag": "true: trans to host format, false: not trans format", | |||
| "path": "the dump file folder", | |||
| "net_name": "net name eg:ResNet50", | |||
| "mode": "0: dump all kernels 1: dump kernels in kernels list", | |||
| "iteration": "0: all iteration others: specified iteration ", | |||
| "kernels": "kernel name list need to be dump" | |||
| "mode": "0: dump all kernels, 1: dump kernels in kernels list", | |||
| "iteration": "0: all iteration, others: specified iteration ", | |||
| "kernels": "op's full scope name which need to be dump" | |||
| }, | |||
| "other": {} | |||
| } | |||
| } | |||
| @@ -53,6 +53,7 @@ enum DataTypeTransMode { | |||
| FROM_INT8_TO_FLOAT, | |||
| FROM_INT8_TO_INT32, | |||
| FROM_INT64_TO_INT32, | |||
| FROM_UINT16_TO_INT32, | |||
| }; | |||
| const std::map<std::pair<TypeId, TypeId>, DataTypeTransMode> mode_map{ | |||
| @@ -68,7 +69,8 @@ const std::map<std::pair<TypeId, TypeId>, DataTypeTransMode> mode_map{ | |||
| {std::pair<TypeId, TypeId>(kNumberTypeUInt8, kNumberTypeInt32), FROM_UINT8_TO_INT32}, | |||
| {std::pair<TypeId, TypeId>(kNumberTypeInt8, kNumberTypeFloat32), FROM_INT8_TO_FLOAT}, | |||
| {std::pair<TypeId, TypeId>(kNumberTypeInt8, kNumberTypeInt32), FROM_INT8_TO_INT32}, | |||
| {std::pair<TypeId, TypeId>(kNumberTypeInt64, kNumberTypeInt32), FROM_INT64_TO_INT32}}; | |||
| {std::pair<TypeId, TypeId>(kNumberTypeInt64, kNumberTypeInt32), FROM_INT64_TO_INT32}, | |||
| {std::pair<TypeId, TypeId>(kNumberTypeUInt16, kNumberTypeInt32), FROM_UINT16_TO_INT32}}; | |||
| template <typename SrcT, typename DstT> | |||
| void TransDataSrc2Dst(const TypeIdArgs &args, void *dst, const size_t data_size) { | |||
| @@ -116,6 +118,9 @@ bool CastKernel(const TypeIdArgs &args, void *dst, const size_t data_size, const | |||
| case FROM_INT64_TO_INT32: | |||
| TransDataSrc2Dst<int64_t, int32_t>(args, dst, data_size); | |||
| break; | |||
| case FROM_UINT16_TO_INT32: | |||
| TransDataSrc2Dst<uint16_t, int32_t>(args, dst, data_size); | |||
| break; | |||
| default: | |||
| MS_LOG(ERROR) << "unsupported datatype trans"; | |||
| return false; | |||
| @@ -106,13 +106,13 @@ bool AscendDeviceAddress::SyncDeviceToHost(const std::vector<int> &shape, size_t | |||
| } else { | |||
| auto shape_size = trans::ShapeSize(host_shape); | |||
| auto host = std::vector<uint8_t>(size_); | |||
| const trans::TypeIdArgs type_args{ptr_, shape_size, type_id_, type}; | |||
| sync_ok = trans::TransDataType(type_args, host.data()); | |||
| SyncMemory(host.data(), ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST); | |||
| const trans::TypeIdArgs type_args{host.data(), shape_size, type_id_, type}; | |||
| sync_ok = trans::TransDataType(type_args, host_ptr); | |||
| if (!sync_ok) { | |||
| MS_LOG(ERROR) << "trans data type failed."; | |||
| return false; | |||
| } | |||
| SyncMemory(host_ptr, host.data(), size, RT_MEMCPY_DEVICE_TO_HOST); | |||
| } | |||
| } else if (format_ == kOpFormat_NC1HWC0 || format_ == kOpFormat_FRAC_Z || format_ == kOpFormat_FRAC_NZ) { | |||
| sync_ok = SyncDeviceToHostAndConvertFormat(shape, size, type, host_ptr); | |||
| @@ -150,9 +150,9 @@ void DumpOutput(mindspore::session::KernelGraph *graph, const string &dump_path, | |||
| auto output_size = AnfAlgo::GetOutputTensorNum(node); | |||
| for (size_t j = 0; j < output_size; ++j) { | |||
| auto addr = AnfAlgo::GetOutputAddr(node, j); | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(node, j); | |||
| auto type = AnfAlgo::GetOutputDeviceDataType(node, j); | |||
| auto format = AnfAlgo::GetOutputFormat(node, j); | |||
| auto shape = AnfAlgo::GetOutputInferShape(node, j); | |||
| auto type = AnfAlgo::GetOutputInferDataType(node, j); | |||
| auto format = kOpFormat_DEFAULT; | |||
| string filepath = dump_path + '/' + kernel_name + '_' + "output_" + std::to_string(j); | |||
| auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr); | |||
| std::vector<int> int_shapes; | |||
| @@ -181,9 +181,9 @@ void DumpParameters(mindspore::session::KernelGraph *graph, const string &dump_p | |||
| continue; | |||
| } | |||
| auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX); | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(item, PRAMATER_OUTPUT_INDEX); | |||
| auto type = AnfAlgo::GetOutputDeviceDataType(item, PRAMATER_OUTPUT_INDEX); | |||
| auto format = AnfAlgo::GetOutputFormat(item, PRAMATER_OUTPUT_INDEX); | |||
| auto shape = AnfAlgo::GetOutputInferShape(item, PRAMATER_OUTPUT_INDEX); | |||
| auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX); | |||
| auto format = kOpFormat_DEFAULT; | |||
| string filepath = dump_path + '/' + parameter_name + '_' + "output_0"; | |||
| auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr); | |||
| std::vector<int> int_shapes; | |||