From 1cd178127a98e7c0e6fca17523acd35c4e17594b Mon Sep 17 00:00:00 2001 From: Miroslav Stoyanov Date: Wed, 23 Oct 2024 12:05:43 -0400 Subject: [PATCH 1/4] suppress warnings about virtuals and a dummy null --- include/heffte_r2r_executor.h | 7 ++++++- test/test_units_nompi.cpp | 14 ++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/include/heffte_r2r_executor.h b/include/heffte_r2r_executor.h index 73b7afa..a8ba33c 100644 --- a/include/heffte_r2r_executor.h +++ b/include/heffte_r2r_executor.h @@ -143,7 +143,7 @@ struct cpu_cos1_pre_pos_processor{ static void pre_forward(void*, int length, precision const input[], precision fft_signal[]){ for (int i = 0; i < length-1; i++){ fft_signal[2*i] = input[i]; - fft_signal[2*i+1] = 0.0; + fft_signal[2*i+1] = 0.0; } for (int i = 1; i < length; i++){ fft_signal[4*(length-1)-2*i] = input[i]; @@ -190,6 +190,11 @@ struct cpu_sin1_pre_pos_processor{}; */ template struct real2real_executor : public executor_base{ + //! \brief Will not be used, suppresses compiler warnings + using executor_base::forward; + //! \brief Will not be used, suppresses compiler warnings + using executor_base::backward; + //! \brief Construct a plan for batch 1D transforms. template real2real_executor(typename backend::device_instance::location>::stream_type cstream, box3d const box, int dimension) : diff --git a/test/test_units_nompi.cpp b/test/test_units_nompi.cpp index 382b1e5..76934ef 100644 --- a/test/test_units_nompi.cpp +++ b/test/test_units_nompi.cpp @@ -597,7 +597,13 @@ void test_in_node_transpose(){ auto active_intput = test_traits::load(input); vcontainer result(24); - heffte::reshape3d_transpose(device.stream(), plans[0]).apply(1, active_intput.data(), result.data(), nullptr); + + // when doing out-of-place transpose we do not need a workspace and can use a nullptr instead, + // but the in/out-place is checked at runtime and one of the branches that is never used + // will still be compiled with the hard-coded nullptr, to suppress the warning we are using a dummy-null dnull + scalar_type *dnull = result.data(); + + heffte::reshape3d_transpose(device.stream(), plans[0]).apply(1, active_intput.data(), result.data(), dnull); sassert(match(result, reference)); @@ -605,7 +611,7 @@ void test_in_node_transpose(){ box3d<> destination2(std::array{0, 0, 0}, std::array{1, 2, 3}, std::array{2, 1, 0}); plans.clear(); heffte::compute_overlap_map_transpose_pack(0, 1, destination2, {inbox}, proc, offset, sizes, plans); - heffte::reshape3d_transpose(device.stream(), plans[0]).apply(1, active_intput.data(), result.data(), nullptr); + heffte::reshape3d_transpose(device.stream(), plans[0]).apply(1, active_intput.data(), result.data(), dnull); reference = {1.0, 7.0, 13.0, 19.0, 3.0, 9.0, 15.0, 21.0, 5.0, 11.0, 17.0, 23.0, 2.0, 8.0, 14.0, 20.0, 4.0, 10.0, 16.0, 22.0, 6.0, 12.0, 18.0, 24.0}; @@ -615,14 +621,14 @@ void test_in_node_transpose(){ plans.clear(); heffte::compute_overlap_map_transpose_pack(0, 1, inbox, {destination2}, proc, offset, sizes, plans); auto active_reference = test_traits::load(reference); - heffte::reshape3d_transpose(device.stream(), plans[0]).apply(1, active_reference.data(), result.data(), nullptr); + heffte::reshape3d_transpose(device.stream(), plans[0]).apply(1, active_reference.data(), result.data(), dnull); sassert(match(result, input)); // test 3, transpose the data to order (0, 2, 1) box3d<> destination3(std::array{0, 0, 0}, std::array{1, 2, 3}, std::array{0, 2, 1}); plans.clear(); heffte::compute_overlap_map_transpose_pack(0, 1, destination3, {inbox}, proc, offset, sizes, plans); - heffte::reshape3d_transpose(device.stream(), plans[0]).apply(1, active_intput.data(), result.data(), nullptr); + heffte::reshape3d_transpose(device.stream(), plans[0]).apply(1, active_intput.data(), result.data(), dnull); reference = {1.0, 2.0, 7.0, 8.0, 13.0, 14.0, 19.0, 20.0, 3.0, 4.0, 9.0, 10.0, 15.0, 16.0, 21.0, 22.0, From 0a9ba2f4ee4ee75c55ece4248c990cb215a6b285 Mon Sep 17 00:00:00 2001 From: Miroslav Stoyanov Date: Wed, 23 Oct 2024 12:13:06 -0400 Subject: [PATCH 2/4] post-install test using correct mpirun --- test/post_install_test.cmake.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/test/post_install_test.cmake.sh b/test/post_install_test.cmake.sh index 853a998..6c83787 100644 --- a/test/post_install_test.cmake.sh +++ b/test/post_install_test.cmake.sh @@ -21,6 +21,7 @@ fi -DCMAKE_CXX_FLAGS="@CMAKE_CXX_FLAGS@" \ -DHeffte_DIR=@CMAKE_INSTALL_FULL_LIBDIR@/cmake/Heffte \ -DMPI_CXX_COMPILER="@MPI_CXX_COMPILER@" \ + -DMPIEXEC_EXECUTABLE="@MPIEXEC_EXECUTABLE@" \ $heffte_mpic_compiler \ $heffte_mpif_compiler \ -DMPIEXEC_NUMPROC_FLAG="@MPIEXEC_NUMPROC_FLAG@" \ From 3de7ee218d6cdde8a688bd5083c79b6116b82a3d Mon Sep 17 00:00:00 2001 From: Miroslav Stoyanov Date: Wed, 23 Oct 2024 12:16:41 -0400 Subject: [PATCH 3/4] fixing no-discard warnings in rocm --- test/test_common.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/test_common.h b/test/test_common.h index 84ba870..a7aaf4a 100644 --- a/test/test_common.h +++ b/test/test_common.h @@ -202,11 +202,15 @@ using gpu_backend = heffte::backend::rocfft; hipStream_t make_stream(backend::rocfft){ hipStream_t result; - hipStreamCreateWithFlags(&result, hipStreamNonBlocking); + rocm::check_error( hipStreamCreateWithFlags(&result, hipStreamNonBlocking), "hipStreamCreateWithFlags()"); return result; } -void sync_stream(hipStream_t){ hipDeviceSynchronize(); } -void free_stream(hipStream_t stream){ hipStreamDestroy(stream); } +void sync_stream(hipStream_t){ + rocm::check_error( hipDeviceSynchronize(), "hipDeviceSynchronize()"); +} +void free_stream(hipStream_t stream){ + rocm::check_error( hipStreamDestroy(stream), "hipStreamDestroy()"); +} #endif #ifdef Heffte_ENABLE_ONEAPI using gpu_backend = heffte::backend::onemkl; From 18f41fed6d821b261ebc08a7f9d9af83fbff035f Mon Sep 17 00:00:00 2001 From: Miroslav Stoyanov Date: Wed, 23 Oct 2024 13:49:43 -0400 Subject: [PATCH 4/4] fix oneapi deprecation warning --- include/heffte_backend_oneapi.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/heffte_backend_oneapi.h b/include/heffte_backend_oneapi.h index baf81d0..09972b7 100644 --- a/include/heffte_backend_oneapi.h +++ b/include/heffte_backend_oneapi.h @@ -508,15 +508,15 @@ class onemkl_executor : public executor_base{ }else if (size2 == 0){ plan.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, (MKL_LONG) howmanyffts); plan.set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_INPLACE); - plan.set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, embed.data()); - plan.set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, embed.data()); + plan.set_value(oneapi::mkl::dft::config_param::FWD_STRIDES, embed.data()); + plan.set_value(oneapi::mkl::dft::config_param::BWD_STRIDES, embed.data()); plan.set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, (MKL_LONG) dist); plan.set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, (MKL_LONG) dist); }else{ plan.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, (MKL_LONG) howmanyffts); plan.set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_INPLACE); - plan.set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, embed.data()); - plan.set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, embed.data()); + plan.set_value(oneapi::mkl::dft::config_param::FWD_STRIDES, embed.data()); + plan.set_value(oneapi::mkl::dft::config_param::BWD_STRIDES, embed.data()); plan.set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, (MKL_LONG) dist); plan.set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, (MKL_LONG) dist); } @@ -620,8 +620,8 @@ class onemkl_executor_r2c : public executor_base{ plan.set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_NOT_INPLACE); plan.set_value(oneapi::mkl::dft::config_param::CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX); MKL_LONG slstride[] = {0, static_cast(stride)}; - plan.set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, slstride); - plan.set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, slstride); + plan.set_value(oneapi::mkl::dft::config_param::FWD_STRIDES, slstride); + plan.set_value(oneapi::mkl::dft::config_param::BWD_STRIDES, slstride); plan.set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, (MKL_LONG) rdist); plan.set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, (MKL_LONG) cdist); plan.commit(q);