|
|
|
@@ -1,5 +1,5 @@ |
|
|
|
/** |
|
|
|
* Copyright 2020 Huawei Technologies Co., Ltd |
|
|
|
* Copyright 2020-2021 Huawei Technologies Co., Ltd |
|
|
|
* |
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
|
|
* you may not use this file except in compliance with the License. |
|
|
|
@@ -72,6 +72,7 @@ class BatchNormFold2GradGpuKernel : public GpuKernel { |
|
|
|
cudaMemcpyAsync(current_step_host, global_step, sizeof(int32_t), cudaMemcpyDeviceToHost, |
|
|
|
reinterpret_cast<cudaStream_t>(stream_ptr)), |
|
|
|
"Failed to copy gpu memory."); |
|
|
|
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaDeviceSynchronize(), "cudaDeviceSyncFailed"); |
|
|
|
CHECK_CUDA_RET_WITH_ERROR( |
|
|
|
kernel_node_, |
|
|
|
cudaMemcpyAsync(d_x, dout, x_size, cudaMemcpyDeviceToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)), |
|
|
|
|