From c8b4302947192f9df3a5c3a2e46c5c6551b7f4e7 Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Mon, 12 Feb 2024 16:34:46 -0500 Subject: [PATCH] Switch to using Kokkos::Experimental::sort_by_key --- CMakeLists.txt | 12 +- src/cluster/ArborX_Dendrogram.hpp | 2 +- .../detail/ArborX_DistributedTreeUtils.hpp | 2 +- src/kokkos_ext/ArborX_KokkosExtSort.hpp | 171 +----------------- src/misc/ArborX_SortUtils.hpp | 2 +- 5 files changed, 10 insertions(+), 179 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 28765adb3..defbc40ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,14 +26,12 @@ set_target_properties(ArborX PROPERTIES INTERFACE_COMPILE_FEATURES cxx_std_17) add_dependencies(ArborX record_hash) include(CMakeDependentOption) -cmake_dependent_option(ARBORX_ENABLE_ROCTHRUST "Enable rocThrust support" ON "Kokkos_ENABLE_HIP" OFF) -if(Kokkos_ENABLE_HIP AND ARBORX_ENABLE_ROCTHRUST) - find_package(rocthrust REQUIRED CONFIG) - target_link_libraries(ArborX INTERFACE roc::rocthrust) -endif() -if(Kokkos_ENABLE_HIP AND NOT ARBORX_ENABLE_ROCTHRUST) - message(WARNING "rocThrust is NOT enabled.\nThis will negatively impact performance on AMD GPUs.") +if(Kokkos_ENABLE_HIP) + if (NOT Kokkos_ENABLE_ROCTHRUST) + message(WARNING "rocThrust is NOT enabled.\nThis will negatively impact performance on AMD GPUs.") + endif() + set(ARBORX_ENABLE_ROCTHRUST ${Kokkos_ENABLE_ROCTHRUST}) endif() cmake_dependent_option(ARBORX_ENABLE_ONEDPL "Enable oneDPL support" ON "Kokkos_ENABLE_SYCL" OFF) diff --git a/src/cluster/ArborX_Dendrogram.hpp b/src/cluster/ArborX_Dendrogram.hpp index 292553e01..162897585 100644 --- a/src/cluster/ArborX_Dendrogram.hpp +++ b/src/cluster/ArborX_Dendrogram.hpp @@ -67,7 +67,7 @@ struct Dendrogram splitEdges(exec_space, edges, unweighted_edges, _parent_heights); Kokkos::Profiling::pushRegion("ArborX::Dendrogram::sort_edges"); - KokkosExt::sortByKey(exec_space, _parent_heights, unweighted_edges); + KokkosExt::sort_by_key(exec_space, _parent_heights, unweighted_edges); Kokkos::Profiling::popRegion(); using ConstEdges = diff --git a/src/distributed/detail/ArborX_DistributedTreeUtils.hpp b/src/distributed/detail/ArborX_DistributedTreeUtils.hpp index 683444e09..276655490 100644 --- a/src/distributed/detail/ArborX_DistributedTreeUtils.hpp +++ b/src/distributed/detail/ArborX_DistributedTreeUtils.hpp @@ -254,7 +254,7 @@ void forwardQueriesAndCommunicateResults( // Merge results int const n_predicates = predicates.size(); countResults(space, n_predicates, ids, offset); - KokkosExt::sortByKey(space, ids, values); + KokkosExt::sort_by_key(space, ids, values); Kokkos::Profiling::popRegion(); } diff --git a/src/kokkos_ext/ArborX_KokkosExtSort.hpp b/src/kokkos_ext/ArborX_KokkosExtSort.hpp index f76005e2e..8e5eef7e8 100644 --- a/src/kokkos_ext/ArborX_KokkosExtSort.hpp +++ b/src/kokkos_ext/ArborX_KokkosExtSort.hpp @@ -12,179 +12,12 @@ #ifndef ARBORX_KOKKOS_EXT_SORT_HPP #define ARBORX_KOKKOS_EXT_SORT_HPP -#include // ARBORX_ENABLE_ROCTHRUST - -#include - -#include +#include #include -// clang-format off -#if defined(KOKKOS_ENABLE_CUDA) -# if defined(KOKKOS_COMPILER_CLANG) - -// Older Thrust (or CUB to be more precise) versions use __shfl instead of -// __shfl_sync for clang which was removed in PTX ISA version 6.4, also see -// https://github.com/NVIDIA/cub/pull/170. -#include -#if defined(CUB_VERSION) && (CUB_VERSION < 101100) && !defined(CUB_USE_COOPERATIVE_GROUPS) -#define CUB_USE_COOPERATIVE_GROUPS -#endif - -// Some versions of Clang fail to compile Thrust, failing with errors like -// this: -// /thrust/system/cuda/detail/core/agent_launcher.h:557:11: -// error: use of undeclared identifier 'va_printf' -// The exact combination of versions for Clang and Thrust (or CUDA) for this -// failure was not investigated, however even very recent version combination -// (Clang 10.0.0 and Cuda 10.0) demonstrated failure. -// -// Defining _CubLog here allows us to avoid that code path, however disabling -// some debugging diagnostics -// -// If _CubLog is already defined, we save it into ARBORX_CubLog_save, and -// restore it at the end -# ifdef _CubLog -# define ARBORX_CubLog_save _CubLog -# endif -# define _CubLog -# include -# include -# undef _CubLog -# ifdef ARBORX_CubLog_save -# define _CubLog ARBORX_CubLog_save -# undef ARBORX_CubLog_save -# endif -# else // #if defined(KOKKOS_COMPILER_CLANG) -# include -# include -# endif // #if defined(KOKKOS_COMPILER_CLANG) -#endif // #if defined(KOKKOS_ENABLE_CUDA) -// clang-format on - -#if defined(KOKKOS_ENABLE_HIP) && defined(ARBORX_ENABLE_ROCTHRUST) -#include -#include -#endif - -#if defined(KOKKOS_ENABLE_SYCL) && defined(ARBORX_ENABLE_ONEDPL) -#include -#include -#include -#endif - namespace ArborX::Details::KokkosExt { - -template -void sortByKey(ExecutionSpace const &space, Keys &keys, Values &values) -{ - Kokkos::Profiling::ScopedRegion guard("ArborX::KokkosExt::sortByKey::Kokkos"); - - static_assert(Kokkos::is_view::value); - static_assert(Kokkos::is_view::value); - static_assert(Keys::rank() == 1); - static_assert(Values::rank() == 1); - static_assert(KokkosExt::is_accessible_from::value); - static_assert(KokkosExt::is_accessible_from::value); - auto const n = keys.size(); - ARBORX_ASSERT(values.size() == n); - - if (n == 0) - return; - - auto [min_val, max_val] = minmax_reduce(space, keys); - if (min_val == max_val) - return; - - using SizeType = unsigned int; - using CompType = Kokkos::BinOp1D; - - Kokkos::BinSort - bin_sort(space, keys, CompType(n / 2, min_val, max_val), true); - bin_sort.create_permute_vector(space); - bin_sort.sort(space, keys); - bin_sort.sort(space, values); -} - -#if defined(KOKKOS_ENABLE_CUDA) || \ - (defined(KOKKOS_ENABLE_HIP) && defined(ARBORX_ENABLE_ROCTHRUST)) -template -void sortByKey( -#if defined(KOKKOS_ENABLE_CUDA) - Kokkos::Cuda const &space, -#else - Kokkos::HIP const &space, -#endif - Keys &keys, Values &values) -{ - Kokkos::Profiling::ScopedRegion guard("ArborX::KokkosExt::sortByKey::Thrust"); - - using ExecutionSpace = std::decay_t; - static_assert(Kokkos::is_view::value); - static_assert(Kokkos::is_view::value); - static_assert(Keys::rank() == 1); - static_assert(Values::rank() == 1); - static_assert(KokkosExt::is_accessible_from::value); - static_assert(KokkosExt::is_accessible_from::value); - auto const n = keys.size(); - ARBORX_ASSERT(values.size() == n); - - if (n == 0) - return; - -#if defined(KOKKOS_ENABLE_CUDA) - auto const execution_policy = thrust::cuda::par.on(space.cuda_stream()); -#else - auto const execution_policy = thrust::hip::par.on(space.hip_stream()); -#endif - - thrust::sort_by_key(execution_policy, keys.data(), keys.data() + n, - values.data()); -} -#endif - -#if defined(KOKKOS_ENABLE_SYCL) && defined(ARBORX_ENABLE_ONEDPL) -template -void sortByKey(Kokkos::Experimental::SYCL const &space, Keys &keys, - Values &values) -{ - Kokkos::Profiling::ScopedRegion guard("ArborX::KokkosExt::sortByKey::OneDPL"); - - using ExecutionSpace = std::decay_t; - static_assert(Kokkos::is_view::value); - static_assert(Kokkos::is_view::value); - static_assert(Keys::rank() == 1); - static_assert(Values::rank() == 1); - static_assert(KokkosExt::is_accessible_from::value); - static_assert(KokkosExt::is_accessible_from::value); - auto const n = keys.size(); - ARBORX_ASSERT(values.size() == n); - - if (n == 0) - return; - - oneapi::dpl::execution::device_policy policy( - *space.impl_internal_space_instance()->m_queue); -#if ONEDPL_VERSION_MAJOR > 2022 || \ - (ONEDPL_VERSION_MAJOR == 2022 && ONEDPL_VERSION_MINOR >= 2) - oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data()); -#else - auto zipped_begin = - oneapi::dpl::make_zip_iterator(keys.data(), values.data()); - oneapi::dpl::sort( - policy, zipped_begin, zipped_begin + n, - [](auto lhs, auto rhs) { return std::get<0>(lhs) < std::get<0>(rhs); }); -#endif +using Kokkos::Experimental::sort_by_key; } -#endif - -} // namespace ArborX::Details::KokkosExt #endif diff --git a/src/misc/ArborX_SortUtils.hpp b/src/misc/ArborX_SortUtils.hpp index d68c42347..98bd69e22 100644 --- a/src/misc/ArborX_SortUtils.hpp +++ b/src/misc/ArborX_SortUtils.hpp @@ -35,7 +35,7 @@ auto sortObjects(ExecutionSpace const &space, ViewType &view) view.extent(0)); KokkosExt::iota(space, permute); - KokkosExt::sortByKey(space, view, permute); + KokkosExt::sort_by_key(space, view, permute); Kokkos::Profiling::popRegion();