Skip to content

Commit

Permalink
Kernel: Clean up sys$futex and add support for cross-process futexes
Browse files Browse the repository at this point in the history
  • Loading branch information
IdanHo authored and awesomekling committed Jul 21, 2022
1 parent 55c7496 commit 9db1088
Show file tree
Hide file tree
Showing 9 changed files with 164 additions and 70 deletions.
3 changes: 2 additions & 1 deletion Kernel/API/POSIX/futex.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ extern "C" {
#define FUTEX_WAKE_BITSET 10

#define FUTEX_CLOCK_REALTIME (1 << 8)
#define FUTEX_CMD_MASK ~(FUTEX_CLOCK_REALTIME)
#define FUTEX_PRIVATE_FLAG (1 << 9)
#define FUTEX_CMD_MASK ~(FUTEX_CLOCK_REALTIME | FUTEX_PRIVATE_FLAG)

#define FUTEX_BITSET_MATCH_ANY 0xffffffff

Expand Down
30 changes: 26 additions & 4 deletions Kernel/Process.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,22 @@ enum class VeilState {
Locked,
};

using FutexQueues = HashMap<FlatPtr, NonnullRefPtr<FutexQueue>>;
static constexpr FlatPtr futex_key_private_flag = 0b1;
union GlobalFutexKey {
struct {
Memory::VMObject const* vmobject;
FlatPtr offset;
} shared;
struct {
Memory::AddressSpace const* address_space;
FlatPtr user_address;
} private_;
struct {
FlatPtr parent;
FlatPtr offset;
} raw;
};
static_assert(sizeof(GlobalFutexKey) == (sizeof(FlatPtr) * 2));

struct LoadResult;

Expand Down Expand Up @@ -575,6 +590,8 @@ class Process final
void clear_signal_handlers_for_exec();
void clear_futex_queues_on_exec();

ErrorOr<GlobalFutexKey> get_futex_key(FlatPtr user_address, bool shared);

ErrorOr<void> remap_range_as_stack(FlatPtr address, size_t size);

ErrorOr<FlatPtr> read_impl(int fd, Userspace<u8*> buffer, size_t size);
Expand Down Expand Up @@ -829,9 +846,6 @@ class Process final

OwnPtr<PerformanceEventBuffer> m_perf_event_buffer;

FutexQueues m_futex_queues;
Spinlock m_futex_lock;

// This member is used in the implementation of ptrace's PT_TRACEME flag.
// If it is set to true, the process will stop at the next execve syscall
// and wait for a tracer to attach.
Expand Down Expand Up @@ -1040,3 +1054,11 @@ struct AK::Formatter<Kernel::Process> : AK::Formatter<FormatString> {
return AK::Formatter<FormatString>::format(builder, "{}({})"sv, value.name(), value.pid().value());
}
};

namespace AK {
template<>
struct Traits<Kernel::GlobalFutexKey> : public GenericTraits<Kernel::GlobalFutexKey> {
static unsigned hash(Kernel::GlobalFutexKey const& futex_key) { return pair_int_hash(ptr_hash(futex_key.raw.parent), ptr_hash(futex_key.raw.offset)); }
static bool equals(Kernel::GlobalFutexKey const& a, Kernel::GlobalFutexKey const& b) { return a.raw.parent == b.raw.parent && a.raw.offset == b.raw.offset; }
};
};
165 changes: 118 additions & 47 deletions Kernel/Syscalls/futex.cpp
Original file line number Diff line number Diff line change
@@ -1,25 +1,89 @@
/*
* Copyright (c) 2018-2021, Andreas Kling <[email protected]>
* Copyright (c) 2022, Idan Horowitz <[email protected]>
*
* SPDX-License-Identifier: BSD-2-Clause
*/

#include <AK/Singleton.h>
#include <Kernel/Debug.h>
#include <Kernel/Memory/InodeVMObject.h>
#include <Kernel/Memory/MemoryManager.h>
#include <Kernel/Process.h>

namespace Kernel {

static Singleton<SpinlockProtected<HashMap<GlobalFutexKey, NonnullRefPtr<FutexQueue>>>> s_global_futex_queues;

void Process::clear_futex_queues_on_exec()
{
SpinlockLocker lock(m_futex_lock);
for (auto& it : m_futex_queues) {
bool did_wake_all;
it.value->wake_all(did_wake_all);
VERIFY(did_wake_all); // No one should be left behind...
s_global_futex_queues->with([this](auto& queues) {
auto const* address_space = &this->address_space();
queues.remove_all_matching([address_space](auto& futex_key, auto& futex_queue) {
if ((futex_key.raw.offset & futex_key_private_flag) == 0)
return false;
if (futex_key.private_.address_space != address_space)
return false;
bool did_wake_all;
futex_queue->wake_all(did_wake_all);
VERIFY(did_wake_all); // No one should be left behind...
return true;
});
});
}

ErrorOr<GlobalFutexKey> Process::get_futex_key(FlatPtr user_address, bool shared)
{
if (user_address & 0b11) // user_address points to a u32, so must be 4byte aligned
return EINVAL;

auto range = Memory::VirtualRange { VirtualAddress(user_address), sizeof(u32) };

if (!Kernel::Memory::is_user_range(range))
return EFAULT;

if (!shared) { // If this is thread-shared, we can skip searching the matching region
return GlobalFutexKey {
.private_ = {
.address_space = &address_space(),
.user_address = user_address | futex_key_private_flag,
}
};
}

auto* matching_region = address_space().find_region_containing(range);
if (!matching_region)
return EFAULT;

// The user wants to share this futex, but if the address doesn't point to a shared resource, there's not
// much sharing to be done, so let's mark this as private
if (!matching_region->is_shared()) {
return GlobalFutexKey {
.private_ = {
.address_space = &address_space(),
.user_address = user_address | futex_key_private_flag,
}
};
}
m_futex_queues.clear();

// This address is backed by a shared VMObject, if it's an AnonymousVMObject, it can be shared between processes
// via forking, and shared regions that are cloned during a fork retain their original AnonymousVMObject.
// On the other hand, if it's a SharedInodeVMObject, it can be shared by two processes mapping the same file as
// MAP_SHARED, but since they are deduplicated based on the inode, in all cases the VMObject pointer should be
// a unique global identifier.
// NOTE: This assumes that a program will not unmap the only region keeping the vmobject alive while waiting on it,
// if it does, it will get stuck waiting forever until interrupted by a signal, but since that use case is defined as
// a programmer error, we are fine with it.

auto const& vmobject = matching_region->vmobject();
if (vmobject.is_inode())
VERIFY(vmobject.is_shared_inode());

return GlobalFutexKey {
.shared = {
.vmobject = &vmobject,
.offset = matching_region->offset_in_vmobject_from_vaddr(range.base()) }
};
}

ErrorOr<FlatPtr> Process::sys$futex(Userspace<Syscall::SC_futex_params const*> user_params)
Expand All @@ -35,6 +99,8 @@ ErrorOr<FlatPtr> Process::sys$futex(Userspace<Syscall::SC_futex_params const*> u
return ENOSYS;
}

bool shared = (params.futex_op & FUTEX_PRIVATE_FLAG) == 0;

switch (cmd) {
case FUTEX_WAIT:
case FUTEX_WAIT_BITSET:
Expand All @@ -56,41 +122,44 @@ ErrorOr<FlatPtr> Process::sys$futex(Userspace<Syscall::SC_futex_params const*> u
}
}

auto find_futex_queue = [&](FlatPtr user_address, bool create_if_not_found, bool* did_create = nullptr) -> ErrorOr<RefPtr<FutexQueue>> {
auto find_futex_queue = [&](GlobalFutexKey futex_key, bool create_if_not_found, bool* did_create = nullptr) -> ErrorOr<RefPtr<FutexQueue>> {
VERIFY(!create_if_not_found || did_create != nullptr);
auto it = m_futex_queues.find(user_address);
if (it != m_futex_queues.end())
return it->value;
if (create_if_not_found) {
return s_global_futex_queues->with([&](auto& queues) -> ErrorOr<RefPtr<FutexQueue>> {
auto it = queues.find(futex_key);
if (it != queues.end())
return it->value;
if (!create_if_not_found)
return nullptr;
*did_create = true;
auto futex_queue = TRY(adopt_nonnull_ref_or_enomem(new (nothrow) FutexQueue));
auto result = TRY(m_futex_queues.try_set(user_address, futex_queue));
auto result = TRY(queues.try_set(futex_key, futex_queue));
VERIFY(result == AK::HashSetResult::InsertedNewEntry);
return futex_queue;
}
return nullptr;
});
};

auto remove_futex_queue = [&](FlatPtr user_address) {
if (auto it = m_futex_queues.find(user_address); it != m_futex_queues.end()) {
if (it->value->try_remove()) {
m_futex_queues.remove(it);
}
}
auto remove_futex_queue = [&](GlobalFutexKey futex_key) {
return s_global_futex_queues->with([&](auto& queues) {
auto it = queues.find(futex_key);
if (it == queues.end())
return;
if (it->value->try_remove())
queues.remove(it);
});
};

auto do_wake = [&](FlatPtr user_address, u32 count, Optional<u32> bitmask) -> ErrorOr<int> {
auto do_wake = [&](FlatPtr user_address, u32 count, Optional<u32> const& bitmask) -> ErrorOr<int> {
if (count == 0)
return 0;
SpinlockLocker locker(m_futex_lock);
auto futex_queue = TRY(find_futex_queue(user_address, false));
auto futex_key = TRY(get_futex_key(user_address, shared));
auto futex_queue = TRY(find_futex_queue(futex_key, false));
if (!futex_queue)
return 0;
bool is_empty;
u32 woke_count = futex_queue->wake_n(count, bitmask, is_empty);
if (is_empty) {
// If there are no more waiters, we want to get rid of the futex!
remove_futex_queue(user_address);
remove_futex_queue(futex_key);
}
return (int)woke_count;
};
Expand All @@ -101,6 +170,7 @@ ErrorOr<FlatPtr> Process::sys$futex(Userspace<Syscall::SC_futex_params const*> u
auto do_wait = [&](u32 bitset) -> ErrorOr<FlatPtr> {
bool did_create;
RefPtr<FutexQueue> futex_queue;
auto futex_key = TRY(get_futex_key(user_address, shared));
do {
auto user_value = user_atomic_load_relaxed(params.userspace_address);
if (!user_value.has_value())
Expand All @@ -111,9 +181,8 @@ ErrorOr<FlatPtr> Process::sys$futex(Userspace<Syscall::SC_futex_params const*> u
}
atomic_thread_fence(AK::MemoryOrder::memory_order_acquire);

SpinlockLocker locker(m_futex_lock);
did_create = false;
futex_queue = TRY(find_futex_queue(user_address, true, &did_create));
futex_queue = TRY(find_futex_queue(futex_key, true, &did_create));
VERIFY(futex_queue);
// We need to try again if we didn't create this queue and the existing queue
// was removed before we were able to queue an imminent wait.
Expand All @@ -124,10 +193,9 @@ ErrorOr<FlatPtr> Process::sys$futex(Userspace<Syscall::SC_futex_params const*> u

Thread::BlockResult block_result = futex_queue->wait_on(timeout, bitset);

SpinlockLocker locker(m_futex_lock);
if (futex_queue->is_empty_and_no_imminent_waits()) {
// If there are no more waiters, we want to get rid of the futex!
remove_futex_queue(user_address);
remove_futex_queue(futex_key);
}
if (block_result == Thread::BlockResult::InterruptedByTimeout) {
return ETIMEDOUT;
Expand All @@ -143,25 +211,28 @@ ErrorOr<FlatPtr> Process::sys$futex(Userspace<Syscall::SC_futex_params const*> u
return EAGAIN;
atomic_thread_fence(AK::MemoryOrder::memory_order_acquire);

int woken_or_requeued = 0;
SpinlockLocker locker(m_futex_lock);
if (auto futex_queue = TRY(find_futex_queue(user_address, false))) {
RefPtr<FutexQueue> target_futex_queue;
bool is_empty, is_target_empty;
woken_or_requeued = TRY(futex_queue->wake_n_requeue(
params.val, [&]() -> ErrorOr<FutexQueue*> {
// NOTE: futex_queue's lock is being held while this callback is called
// The reason we're doing this in a callback is that we don't want to always
// create a target queue, only if we actually have anything to move to it!
target_futex_queue = TRY(find_futex_queue(user_address2, true));
return target_futex_queue.ptr();
},
params.val2, is_empty, is_target_empty));
if (is_empty)
remove_futex_queue(user_address);
if (is_target_empty && target_futex_queue)
remove_futex_queue(user_address2);
}
auto futex_key = TRY(get_futex_key(user_address, shared));
auto futex_queue = TRY(find_futex_queue(futex_key, false));
if (!futex_queue)
return 0;

RefPtr<FutexQueue> target_futex_queue;
bool is_empty = false;
bool is_target_empty = false;
auto futex_key2 = TRY(get_futex_key(user_address2, shared));
auto woken_or_requeued = TRY(futex_queue->wake_n_requeue(
params.val, [&]() -> ErrorOr<FutexQueue*> {
// NOTE: futex_queue's lock is being held while this callback is called
// The reason we're doing this in a callback is that we don't want to always
// create a target queue, only if we actually have anything to move to it!
target_futex_queue = TRY(find_futex_queue(futex_key2, true));
return target_futex_queue.ptr();
},
params.val2, is_empty, is_target_empty));
if (is_empty)
remove_futex_queue(futex_key);
if (is_target_empty && target_futex_queue)
remove_futex_queue(futex_key2);
return woken_or_requeued;
};

Expand Down
6 changes: 3 additions & 3 deletions Userland/Libraries/LibC/pthread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ static int rwlock_rdlock_maybe_timed(u32* lockp, const struct timespec* timeout

// Seems like someone is writing (or is interested in writing and we let them have the lock)
// wait until they're done.
auto rc = futex(lockp, FUTEX_WAIT_BITSET, current, timeout, nullptr, reader_wake_mask);
auto rc = futex(lockp, FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG, current, timeout, nullptr, reader_wake_mask);
if (rc < 0 && errno == ETIMEDOUT && timeout) {
return value_if_timeout;
}
Expand Down Expand Up @@ -703,7 +703,7 @@ static int rwlock_wrlock_maybe_timed(pthread_rwlock_t* lockval_p, const struct t

// Seems like someone is writing (or is interested in writing and we let them have the lock)
// wait until they're done.
auto rc = futex(lockp, FUTEX_WAIT_BITSET, current, timeout, nullptr, writer_wake_mask);
auto rc = futex(lockp, FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG, current, timeout, nullptr, writer_wake_mask);
if (rc < 0 && errno == ETIMEDOUT && timeout) {
return value_if_timeout;
}
Expand Down Expand Up @@ -794,7 +794,7 @@ int pthread_rwlock_unlock(pthread_rwlock_t* lockval_p)
auto desired = current & ~(writer_locked_mask | writer_intent_mask);
AK::atomic_store(lockp, desired, AK::MemoryOrder::memory_order_release);
// Then wake both readers and writers, if any.
auto rc = futex(lockp, FUTEX_WAKE_BITSET, current, nullptr, nullptr, (current & writer_wake_mask) | reader_wake_mask);
auto rc = futex(lockp, FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG, current, nullptr, nullptr, (current & writer_wake_mask) | reader_wake_mask);
if (rc < 0)
return errno;
return 0;
Expand Down
6 changes: 3 additions & 3 deletions Userland/Libraries/LibC/pthread_cond.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ int pthread_cond_timedwait(pthread_cond_t* cond, pthread_mutex_t* mutex, const s
// value might change as soon as we unlock it.
u32 value = AK::atomic_fetch_or(&cond->value, NEED_TO_WAKE_ONE | NEED_TO_WAKE_ALL, AK::memory_order_release) | NEED_TO_WAKE_ONE | NEED_TO_WAKE_ALL;
pthread_mutex_unlock(mutex);
int rc = futex_wait(&cond->value, value, abstime, cond->clockid);
int rc = futex_wait(&cond->value, value, abstime, cond->clockid, false);
if (rc < 0 && errno != EAGAIN)
return errno;

Expand Down Expand Up @@ -129,7 +129,7 @@ int pthread_cond_signal(pthread_cond_t* cond)
if (!(value & NEED_TO_WAKE_ONE)) [[likely]]
return 0;
// ...try to wake someone...
int rc = futex_wake(&cond->value, 1);
int rc = futex_wake(&cond->value, 1, false);
VERIFY(rc >= 0);
// ...and if we have woken someone, put the flag back.
if (rc > 0)
Expand All @@ -152,7 +152,7 @@ int pthread_cond_broadcast(pthread_cond_t* cond)
pthread_mutex_t* mutex = AK::atomic_load(&cond->mutex, AK::memory_order_relaxed);
VERIFY(mutex);

int rc = futex(&cond->value, FUTEX_REQUEUE, 1, nullptr, &mutex->lock, INT_MAX);
int rc = futex(&cond->value, FUTEX_REQUEUE | FUTEX_PRIVATE_FLAG, 1, nullptr, &mutex->lock, INT_MAX);
VERIFY(rc >= 0);
return 0;
}
6 changes: 3 additions & 3 deletions Userland/Libraries/LibC/pthread_integration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ int pthread_mutex_lock(pthread_mutex_t* mutex)
value = AK::atomic_exchange(&mutex->lock, MUTEX_LOCKED_NEED_TO_WAKE, AK::memory_order_acquire);

while (value != MUTEX_UNLOCKED) {
futex_wait(&mutex->lock, value, nullptr, 0);
futex_wait(&mutex->lock, value, nullptr, 0, false);
value = AK::atomic_exchange(&mutex->lock, MUTEX_LOCKED_NEED_TO_WAKE, AK::memory_order_acquire);
}

Expand All @@ -172,7 +172,7 @@ int __pthread_mutex_lock_pessimistic_np(pthread_mutex_t* mutex)
// because we know we don't. Used in the condition variable implementation.
u32 value = AK::atomic_exchange(&mutex->lock, MUTEX_LOCKED_NEED_TO_WAKE, AK::memory_order_acquire);
while (value != MUTEX_UNLOCKED) {
futex_wait(&mutex->lock, value, nullptr, 0);
futex_wait(&mutex->lock, value, nullptr, 0, false);
value = AK::atomic_exchange(&mutex->lock, MUTEX_LOCKED_NEED_TO_WAKE, AK::memory_order_acquire);
}

Expand All @@ -195,7 +195,7 @@ int pthread_mutex_unlock(pthread_mutex_t* mutex)

u32 value = AK::atomic_exchange(&mutex->lock, MUTEX_UNLOCKED, AK::memory_order_release);
if (value == MUTEX_LOCKED_NEED_TO_WAKE) [[unlikely]] {
int rc = futex_wake(&mutex->lock, 1);
int rc = futex_wake(&mutex->lock, 1, false);
VERIFY(rc >= 0);
}

Expand Down
Loading

0 comments on commit 9db1088

Please sign in to comment.