diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e51cda..b449cc3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,7 +58,11 @@ if (Heffte_ENABLE_FORTRAN) endif() if (Heffte_ENABLE_CUDA AND NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - set(CMAKE_CUDA_ARCHITECTURES "OFF" CACHE STRING "CUDA architectures to compile, e.g., -DCMAKE_CUDA_ARCHITECTURES=70;72") + if (CMAKE_VERSION VERSION_LESS 3.24) + set(CMAKE_CUDA_ARCHITECTURES "OFF" CACHE STRING "CUDA architectures to compile, e.g., -DCMAKE_CUDA_ARCHITECTURES=70;72") + else() + set(CMAKE_CUDA_ARCHITECTURES "native" CACHE STRING "CUDA architectures to compile, e.g., -DCMAKE_CUDA_ARCHITECTURES=70;72") + endif() endif() if (Heffte_ENABLE_ROCM AND NOT DEFINED CMAKE_HIP_ARCHITECTURES) set(CMAKE_HIP_ARCHITECTURES "OFF" CACHE STRING "HIP architectures to compile, e.g., -DCMAKE_HIP_ARCHITECTURES=gfx803;gfx1100") diff --git a/src/heffte_backend_cuda.cu b/src/heffte_backend_cuda.cu index 9b908a9..ec3f30d 100644 --- a/src/heffte_backend_cuda.cu +++ b/src/heffte_backend_cuda.cu @@ -295,7 +295,7 @@ __global__ void cos1_post_forward_kernel(int N, scalar_type const *fft_signal, s } -// IDCT-I backward kernel for DCT-I. The transform itself doesn't change, since (DCT-I)^-1=DCT-I. +// IDCT-I backward kernel for DCT-I. The transform itself doesn't change, since (DCT-I)^-1=DCT-I. // However, the kernel has slight changes to adapt to the c2r transform. // set imaginary parts to zero; even symmetry // (a b c) -> (a,0 b,0 c,0 b,0 a,0) @@ -470,8 +470,11 @@ heffte_instantiate_packers(long long) template void scale_data(cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor){ + if (num_entries == 0) // empty input/output box is allowed, do nothing + return; thread_grid_1d grid(num_entries, max_threads); simple_scal<<>>(num_entries, data, static_cast(scale_factor)); + check_error( cudaPeekAtLastError(), "scale kernel launch" ); } template void scale_data(cudaStream_t, int num_entries, float *data, double scale_factor); diff --git a/src/heffte_backend_rocm.hip.cpp b/src/heffte_backend_rocm.hip.cpp index 22bd2ef..e705e46 100644 --- a/src/heffte_backend_rocm.hip.cpp +++ b/src/heffte_backend_rocm.hip.cpp @@ -296,7 +296,7 @@ __global__ void cos1_post_forward_kernel(int N, scalar_type const *fft_signal, s } -// IDCT-I backward kernel for DCT-I. The transform itself doesn't change, since (DCT-I)^-1=DCT-I. +// IDCT-I backward kernel for DCT-I. The transform itself doesn't change, since (DCT-I)^-1=DCT-I. // However, the kernel has slight changes to adapt to the c2r transform. // set imaginary parts to zero; even symmetry // (a b c) -> (a,0 b,0 c,0 b,0 a,0) @@ -473,6 +473,8 @@ heffte_instantiate_packers(long long) template void scale_data(hipStream_t stream, index num_entries, scalar_type *data, double scale_factor){ + if (num_entries == 0) // empty input/output box is allowed, do nothing + return; thread_grid_1d grid(num_entries, max_threads); simple_scal<<>>(num_entries, data, static_cast(scale_factor)); }