Skip to content

Commit

Permalink
Merge pull request #74 from mkstoyanov/switch_to_cuda_native
Browse files Browse the repository at this point in the history
* update default cuda-arch to `native` when using newer cmake
  • Loading branch information
mkstoyanov authored Feb 12, 2025
2 parents c61c772 + bcd131e commit b7afa48
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 3 deletions.
6 changes: 5 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ if (Heffte_ENABLE_FORTRAN)
endif()

if (Heffte_ENABLE_CUDA AND NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES "OFF" CACHE STRING "CUDA architectures to compile, e.g., -DCMAKE_CUDA_ARCHITECTURES=70;72")
if (CMAKE_VERSION VERSION_LESS 3.24)
set(CMAKE_CUDA_ARCHITECTURES "OFF" CACHE STRING "CUDA architectures to compile, e.g., -DCMAKE_CUDA_ARCHITECTURES=70;72")
else()
set(CMAKE_CUDA_ARCHITECTURES "native" CACHE STRING "CUDA architectures to compile, e.g., -DCMAKE_CUDA_ARCHITECTURES=70;72")
endif()
endif()
if (Heffte_ENABLE_ROCM AND NOT DEFINED CMAKE_HIP_ARCHITECTURES)
set(CMAKE_HIP_ARCHITECTURES "OFF" CACHE STRING "HIP architectures to compile, e.g., -DCMAKE_HIP_ARCHITECTURES=gfx803;gfx1100")
Expand Down
5 changes: 4 additions & 1 deletion src/heffte_backend_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ __global__ void cos1_post_forward_kernel(int N, scalar_type const *fft_signal, s

}

// IDCT-I backward kernel for DCT-I. The transform itself doesn't change, since (DCT-I)^-1=DCT-I.
// IDCT-I backward kernel for DCT-I. The transform itself doesn't change, since (DCT-I)^-1=DCT-I.
// However, the kernel has slight changes to adapt to the c2r transform.
// set imaginary parts to zero; even symmetry
// (a b c) -> (a,0 b,0 c,0 b,0 a,0)
Expand Down Expand Up @@ -470,8 +470,11 @@ heffte_instantiate_packers(long long)

template<typename scalar_type, typename index>
void scale_data(cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor){
if (num_entries == 0) // empty input/output box is allowed, do nothing
return;
thread_grid_1d grid(num_entries, max_threads);
simple_scal<scalar_type, max_threads><<<grid.blocks, grid.threads, 0, stream>>>(num_entries, data, static_cast<scalar_type>(scale_factor));
check_error( cudaPeekAtLastError(), "scale kernel launch" );
}

template void scale_data<float, int>(cudaStream_t, int num_entries, float *data, double scale_factor);
Expand Down
4 changes: 3 additions & 1 deletion src/heffte_backend_rocm.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ __global__ void cos1_post_forward_kernel(int N, scalar_type const *fft_signal, s

}

// IDCT-I backward kernel for DCT-I. The transform itself doesn't change, since (DCT-I)^-1=DCT-I.
// IDCT-I backward kernel for DCT-I. The transform itself doesn't change, since (DCT-I)^-1=DCT-I.
// However, the kernel has slight changes to adapt to the c2r transform.
// set imaginary parts to zero; even symmetry
// (a b c) -> (a,0 b,0 c,0 b,0 a,0)
Expand Down Expand Up @@ -473,6 +473,8 @@ heffte_instantiate_packers(long long)

template<typename scalar_type, typename index>
void scale_data(hipStream_t stream, index num_entries, scalar_type *data, double scale_factor){
if (num_entries == 0) // empty input/output box is allowed, do nothing
return;
thread_grid_1d grid(num_entries, max_threads);
simple_scal<scalar_type, max_threads><<<grid.blocks, grid.threads, 0, stream>>>(num_entries, data, static_cast<scalar_type>(scale_factor));
}
Expand Down

0 comments on commit b7afa48

Please sign in to comment.