Skip to content

Commit

Permalink
Improve: Call target directly from unsafe_ifunc macro whenever possible.
Browse files Browse the repository at this point in the history
  • Loading branch information
Sewer56 committed Oct 9, 2024
1 parent a26dd5b commit da7bef6
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 57 deletions.
130 changes: 77 additions & 53 deletions src/arch/x86_64/memchr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,7 @@ macro_rules! unsafe_ifunc {
) => {{
#![allow(unused_unsafe)]

use core::sync::atomic::{AtomicPtr, Ordering};

type Fn = *mut ();
type RealFn = $fnty;
static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn);

// Define reusable functions for AVX2, SSE2, and fallback.
#[cfg(target_feature = "sse2")]
#[target_feature(enable = "sse2", enable = "avx2")]
unsafe fn find_avx2(
Expand Down Expand Up @@ -103,58 +98,87 @@ macro_rules! unsafe_ifunc {
$hay_end: *const u8,
) -> $retty {
use crate::arch::all::memchr::$memchrty;
$memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end)
$memchrty::new($($needle),+)
.$memchrfind($hay_start, $hay_end)
}

unsafe fn detect(
$($needle: u8),+,
$hay_start: *const u8,
$hay_end: *const u8,
) -> $retty {
let fun = {
#[cfg(not(target_feature = "sse2"))]
{
debug!(
"no sse2 feature available, using fallback for {}",
stringify!($memchrty),
);
find_fallback as RealFn
}
#[cfg(target_feature = "sse2")]
{
use crate::arch::x86_64::{sse2, avx2};
if avx2::memchr::$memchrty::is_available() {
debug!("chose AVX2 for {}", stringify!($memchrty));
find_avx2 as RealFn
} else if sse2::memchr::$memchrty::is_available() {
debug!("chose SSE2 for {}", stringify!($memchrty));
find_sse2 as RealFn
} else {
debug!("chose fallback for {}", stringify!($memchrty));
find_fallback as RealFn
}
}
};
FN.store(fun as Fn, Ordering::Relaxed);
// SAFETY: The only thing we need to uphold here is the
// `#[target_feature]` requirements. Since we check is_available
// above before using the corresponding implementation, we are
// guaranteed to only call code that is supported on the current
// CPU.
fun($($needle),+, $hay_start, $hay_end)
// If `std` is disabled and CPU features are known at compile-time,
// directly call the appropriate implementation without function pointer indirection.
#[cfg(all(not(feature = "std"), target_feature = "avx2"))]
unsafe {
find_avx2($($needle),+, $hay_start, $hay_end)
}

// SAFETY: By virtue of the caller contract, RealFn is a function
// pointer, which is always safe to transmute with a *mut (). Also,
// since we use $memchrty::is_available, it is guaranteed to be safe
// to call $memchrty::$memchrfind.
#[cfg(all(not(feature = "std"), target_feature = "sse2", not(target_feature = "avx2")))]
unsafe {
let fun = FN.load(Ordering::Relaxed);
core::mem::transmute::<Fn, RealFn>(fun)(
$($needle),+,
$hay_start,
$hay_end,
)
find_sse2($($needle),+, $hay_start, $hay_end)
}

#[cfg(all(not(feature = "std"), not(target_feature = "sse2")))]
unsafe {
find_fallback($($needle),+, $hay_start, $hay_end)
}

// For runtime detection when `std` is enabled or features are not known at compile-time.
#[cfg(any(feature = "std", not(any(target_feature = "avx2", target_feature = "sse2"))))]
{
use core::sync::atomic::{AtomicPtr, Ordering};

type Fn = *mut ();
type RealFn = $fnty;
static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn);

// Function to perform runtime detection of CPU features and choose the appropriate implementation.
unsafe fn detect(
$($needle: u8),+,
$hay_start: *const u8,
$hay_end: *const u8,
) -> $retty {
let fun = {
#[cfg(not(target_feature = "sse2"))]
{
debug!(
"no sse2 feature available, using fallback for {}",
stringify!($memchrty),
);
find_fallback as RealFn
}
#[cfg(target_feature = "sse2")]
{
use crate::arch::x86_64::{sse2, avx2};
if avx2::memchr::$memchrty::is_available() {
debug!("chose AVX2 for {}", stringify!($memchrty));
find_avx2 as RealFn
} else if sse2::memchr::$memchrty::is_available() {
debug!("chose SSE2 for {}", stringify!($memchrty));
find_sse2 as RealFn
} else {
debug!("chose fallback for {}", stringify!($memchrty));
find_fallback as RealFn
}
}
};
FN.store(fun as Fn, Ordering::Relaxed);
// SAFETY: The only thing we need to uphold here is the
// `#[target_feature]` requirements. Since we check is_available
// above before using the corresponding implementation, we are
// guaranteed to only call code that is supported on the current
// CPU.
fun($($needle),+, $hay_start, $hay_end)
}

// SAFETY: By virtue of the caller contract, RealFn is a function
// pointer, which is always safe to transmute with a *mut (). Also,
// since we use $memchrty::is_available, it is guaranteed to be safe
// to call $memchrty::$memchrfind.
unsafe {
let fun = FN.load(Ordering::Relaxed);
core::mem::transmute::<Fn, RealFn>(fun)(
$($needle),+,
$hay_start,
$hay_end
)
}
}
}};
}
Expand Down
8 changes: 4 additions & 4 deletions src/memchr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -508,10 +508,10 @@ unsafe fn memchr_raw(
) -> Option<*const u8> {
#[cfg(target_arch = "x86_64")]
{
// x86_64 does CPU feature detection at runtime in order to use AVX2
// instructions even when the `avx2` feature isn't enabled at compile
// time. This function also handles using a fallback if neither AVX2
// nor SSE2 (unusual) are available.
// x86_64 does CPU feature detection at runtime (when std feature enabled)
// in order to use AVX2 instructions even when the `avx2` feature isn't
// enabled at compile time. This function also handles using a fallback if
// neither AVX2 nor SSE2 (unusual) are available.
crate::arch::x86_64::memchr::memchr_raw(needle, start, end)
}
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
Expand Down

0 comments on commit da7bef6

Please sign in to comment.