Skip to content

Commit

Permalink
LibC: Implement posix_memalign(3) and aligned_alloc(3)
Browse files Browse the repository at this point in the history
Some ports linked against posix_memalign, but didn't use it, and others
used it if it was Available. So I decided to implement posix_memalign.

My implementation adds almost no overhead to regular mallocs. However,
if an alignment is specified, it will use the smallest ChunkedBlock, for
which aligned chunks exist, and simply use one of the chunks that is
aligned. If it cannot use a ChunkedBlock, for size or alignment reasons,
it will use a BigAllocationBlock, and return a pointer to the first
aligned address past the start of the block. This implementation
supports alignments up to 32768, due to the limitations of the
BigAllocationBlock technique.
  • Loading branch information
petelliott authored and awesomekling committed May 20, 2022
1 parent 830f5c6 commit 4e0adb6
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 60 deletions.
3 changes: 0 additions & 3 deletions Userland/Libraries/LibC/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,6 @@ set(SOURCES ${LIBC_SOURCES} ${AK_SOURCES} ${ELF_SOURCES} ${ASM_SOURCES})
# Prevent GCC from removing null checks by marking the `FILE*` argument non-null
set_source_files_properties(stdio.cpp PROPERTIES COMPILE_FLAGS "-fno-builtin-fputc -fno-builtin-fputs -fno-builtin-fwrite")

# Add in the `posix_memalign` symbol to avoid breaking existing binaries.
set_source_files_properties(stdlib.cpp PROPERTIES COMPILE_FLAGS "-DSERENITY_LIBC_SHOW_POSIX_MEMALIGN")

# Prevent naively implemented string functions (like strlen) from being "optimized" into a call to themselves.
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set_source_files_properties(string.cpp wchar.cpp PROPERTIES COMPILE_FLAGS "-fno-tree-loop-distribution -fno-tree-loop-distribute-patterns")
Expand Down
212 changes: 171 additions & 41 deletions Userland/Libraries/LibC/malloc.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2018-2021, Andreas Kling <[email protected]>
* Copyright (c) 2022, Peter Elliott <[email protected]>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
Expand Down Expand Up @@ -153,12 +154,75 @@ static inline BigAllocator (&big_allocators())[1]
return reinterpret_cast<BigAllocator(&)[1]>(g_big_allocators_storage);
}

static Allocator* allocator_for_size(size_t size, size_t& good_size)
// --- BEGIN MATH ---
// This stuff is only used for checking if there exists an aligned block in a
// chunk. It has no bearing on the rest of the allocator, especially for
// regular malloc.

static inline unsigned long modulo(long a, long b)
{
return (b + (a % b)) % b;
}

struct EuclideanResult {
long x;
long y;
long gcd;
};

// Returns x, y, gcd.
static inline EuclideanResult extended_euclid(long a, long b)
{
EuclideanResult old = { 1, 0, a };
EuclideanResult current = { 0, 1, b };

while (current.gcd != 0) {
long quotient = old.gcd / current.gcd;

EuclideanResult next = {
old.x - quotient * current.x,
old.y - quotient * current.y,
old.gcd - quotient * current.gcd,
};

old = current;
current = next;
}

return old;
}

static inline bool block_has_aligned_chunk(long align, long bytes_per_chunk, long chunk_capacity)
{
// Never do math on a normal malloc.
if ((size_t)align <= sizeof(ChunkedBlock))
return true;

// Solve the linear congruence n*bytes_per_chunk = -sizeof(ChunkedBlock) (mod align).
auto [x, y, gcd] = extended_euclid(bytes_per_chunk % align, align);
long constant = modulo(-sizeof(ChunkedBlock), align);
if (constant % gcd != 0)
// No solution. Chunk size is probably a multiple of align.
return false;

long n = modulo(x * (constant / gcd), align);
if (x < 0)
n = (n + align / gcd) % align;

// Don't ask me to prove this.
VERIFY(n > 0);
return n < chunk_capacity;
}

// --- END MATH ---

static Allocator* allocator_for_size(size_t size, size_t& good_size, size_t align = 1)
{
for (size_t i = 0; size_classes[i]; ++i) {
if (size <= size_classes[i]) {
auto& allocator = allocators()[i];
if (size <= size_classes[i] && block_has_aligned_chunk(align, allocator.size, (ChunkedBlock::block_size - sizeof(ChunkedBlock)) / allocator.size)) {
good_size = size_classes[i];
return &allocators()[i];
return &allocator;
}
}
good_size = PAGE_ROUND_UP(size);
Expand All @@ -176,7 +240,7 @@ static BigAllocator* big_allocator_for_size(size_t size)

extern "C" {

static void* os_alloc(size_t size, char const* name)
static ErrorOr<void*> os_alloc(size_t size, char const* name)
{
int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_PURGEABLE;
#if ARCH(X86_64)
Expand All @@ -185,8 +249,7 @@ static void* os_alloc(size_t size, char const* name)
auto* ptr = serenity_mmap(nullptr, size, PROT_READ | PROT_WRITE, flags, 0, 0, ChunkedBlock::block_size, name);
VERIFY(ptr != nullptr);
if (ptr == MAP_FAILED) {
errno = ENOMEM;
return nullptr;
return ENOMEM;
}
return ptr;
}
Expand All @@ -197,6 +260,31 @@ static void os_free(void* ptr, size_t size)
assert(rc == 0);
}

static void* try_allocate_chunk_aligned(size_t align, ChunkedBlock& block)
{
// These loops are guaranteed to run only once for a standard-aligned malloc.
for (FreelistEntry** entry = &(block.m_freelist); *entry != nullptr; entry = &((*entry)->next)) {
if ((reinterpret_cast<uintptr_t>(*entry) & (align - 1)) == 0) {
--block.m_free_chunks;
void* ptr = *entry;
*entry = (*entry)->next; // Delete the entry.
return ptr;
}
}
for (; block.m_next_lazy_freelist_index < block.chunk_capacity(); block.m_next_lazy_freelist_index++) {
void* ptr = block.m_slot + block.m_next_lazy_freelist_index * block.m_size;
if ((reinterpret_cast<uintptr_t>(ptr) & (align - 1)) == 0) {
--block.m_free_chunks;
block.m_next_lazy_freelist_index++;
return ptr;
}
auto* entry = (FreelistEntry*)ptr;
entry->next = block.m_freelist;
block.m_freelist = entry;
}
return nullptr;
}

enum class CallerWillInitializeMemory {
No,
Yes,
Expand All @@ -208,12 +296,20 @@ enum class CallerWillInitializeMemory {
__thread bool s_allocation_enabled;
#endif

static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_initialize_memory)
static ErrorOr<void*> malloc_impl(size_t size, size_t align, CallerWillInitializeMemory caller_will_initialize_memory)
{
#ifndef NO_TLS
VERIFY(s_allocation_enabled);
#endif

// Align must be a power of 2.
if (popcount(align) != 1)
return EINVAL;

// FIXME: Support larger than 32KiB alignments (if you dare).
if (sizeof(BigAllocationBlock) + align >= ChunkedBlock::block_size)
return EINVAL;

if (s_log_malloc)
dbgln("LibC: malloc({})", size);

Expand All @@ -226,16 +322,15 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini
g_malloc_stats.number_of_malloc_calls++;

size_t good_size;
auto* allocator = allocator_for_size(size, good_size);
auto* allocator = allocator_for_size(size, good_size, align);

PthreadMutexLocker locker(s_malloc_mutex);

if (!allocator) {
size_t real_size = round_up_to_power_of_two(sizeof(BigAllocationBlock) + size, ChunkedBlock::block_size);
size_t real_size = round_up_to_power_of_two(sizeof(BigAllocationBlock) + size + ((align > 16) ? align : 0), ChunkedBlock::block_size);
if (real_size < size) {
dbgln_if(MALLOC_DEBUG, "LibC: Detected overflow trying to do big allocation of size {} for {}", real_size, size);
errno = ENOMEM;
return nullptr;
return ENOMEM;
}
#ifdef RECYCLE_BIG_ALLOCATIONS
if (auto* allocator = big_allocator_for_size(real_size)) {
Expand All @@ -257,27 +352,31 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini
new (block) BigAllocationBlock(real_size);
}

ue_notify_malloc(&block->m_slot[0], size);
return &block->m_slot[0];
void* ptr = reinterpret_cast<void*>(round_up_to_power_of_two(reinterpret_cast<uintptr_t>(&block->m_slot[0]), align));

ue_notify_malloc(ptr, size);
return ptr;
}
}
#endif
auto* block = (BigAllocationBlock*)os_alloc(real_size, "malloc: BigAllocationBlock");
if (block == nullptr) {
dbgln_if(MALLOC_DEBUG, "LibC: Failed to do big allocation of size {} for {}", real_size, size);
return nullptr;
}
auto* block = (BigAllocationBlock*)TRY(os_alloc(real_size, "malloc: BigAllocationBlock"));
g_malloc_stats.number_of_big_allocs++;
new (block) BigAllocationBlock(real_size);
ue_notify_malloc(&block->m_slot[0], size);
return &block->m_slot[0];

void* ptr = reinterpret_cast<void*>(round_up_to_power_of_two(reinterpret_cast<uintptr_t>(&block->m_slot[0]), align));
ue_notify_malloc(ptr, size);
return ptr;
}

ChunkedBlock* block = nullptr;
void* ptr = nullptr;
for (auto& current : allocator->usable_blocks) {
if (current.free_chunks()) {
block = &current;
break;
ptr = try_allocate_chunk_aligned(align, current);
if (ptr) {
block = &current;
break;
}
}
}

Expand Down Expand Up @@ -321,23 +420,16 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini
g_malloc_stats.number_of_block_allocs++;
char buffer[64];
snprintf(buffer, sizeof(buffer), "malloc: ChunkedBlock(%zu)", good_size);
block = (ChunkedBlock*)os_alloc(ChunkedBlock::block_size, buffer);
if (block == nullptr) {
return nullptr;
}
block = (ChunkedBlock*)TRY(os_alloc(ChunkedBlock::block_size, buffer));
new (block) ChunkedBlock(good_size);
allocator->usable_blocks.append(*block);
++allocator->block_count;
}

--block->m_free_chunks;
void* ptr = block->m_freelist;
if (ptr) {
block->m_freelist = block->m_freelist->next;
} else {
ptr = block->m_slot + block->m_next_lazy_freelist_index * block->m_size;
block->m_next_lazy_freelist_index++;
if (!ptr) {
ptr = try_allocate_chunk_aligned(align, *block);
}

VERIFY(ptr);
if (block->is_full()) {
g_malloc_stats.number_of_blocks_full++;
Expand Down Expand Up @@ -451,14 +543,21 @@ static void free_impl(void* ptr)
void* malloc(size_t size)
{
MemoryAuditingSuppressor suppressor;
void* ptr = malloc_impl(size, CallerWillInitializeMemory::No);
auto ptr_or_error = malloc_impl(size, 16, CallerWillInitializeMemory::No);

if (ptr_or_error.is_error()) {
errno = ptr_or_error.error().code();
return nullptr;
}

if (s_profiling)
perf_event(PERF_EVENT_MALLOC, size, reinterpret_cast<FlatPtr>(ptr));
return ptr;
perf_event(PERF_EVENT_MALLOC, size, reinterpret_cast<FlatPtr>(ptr_or_error.value()));

return ptr_or_error.value();
}

// This is a Microsoft extension, and is not found on other Unix-like systems.
// FIXME: Implement aligned_alloc() instead
// FIXME: Remove this when all patches have been switched to aligned_alloc()
//
// This is used in libc++ to implement C++17 aligned new/delete.
//
Expand Down Expand Up @@ -513,10 +612,41 @@ void* calloc(size_t count, size_t size)
return nullptr;
}
size_t new_size = count * size;
auto* ptr = malloc_impl(new_size, CallerWillInitializeMemory::Yes);
if (ptr)
memset(ptr, 0, new_size);
return ptr;
auto ptr_or_error = malloc_impl(new_size, 16, CallerWillInitializeMemory::Yes);

if (ptr_or_error.is_error()) {
errno = ptr_or_error.error().code();
return nullptr;
}

memset(ptr_or_error.value(), 0, new_size);
return ptr_or_error.value();
}

// https://pubs.opengroup.org/onlinepubs/9699919799/functions/posix_memalign.html
int posix_memalign(void** memptr, size_t alignment, size_t size)
{
MemoryAuditingSuppressor suppressor;
auto ptr_or_error = malloc_impl(size, alignment, CallerWillInitializeMemory::No);

if (ptr_or_error.is_error())
return ptr_or_error.error().code();

*memptr = ptr_or_error.value();
return 0;
}

void* aligned_alloc(size_t alignment, size_t size)
{
MemoryAuditingSuppressor suppressor;
auto ptr_or_error = malloc_impl(size, alignment, CallerWillInitializeMemory::No);

if (ptr_or_error.is_error()) {
errno = ptr_or_error.error().code();
return nullptr;
}

return ptr_or_error.value();
}

size_t malloc_size(void const* ptr)
Expand Down
11 changes: 0 additions & 11 deletions Userland/Libraries/LibC/stdlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1335,14 +1335,3 @@ void _Exit(int status)
{
_exit(status);
}

#ifdef SERENITY_LIBC_SHOW_POSIX_MEMALIGN
// https://pubs.opengroup.org/onlinepubs/9699919799/functions/posix_memalign.html
int posix_memalign(void** memptr, size_t alignment, size_t size)
{
(void)memptr;
(void)alignment;
(void)size;
TODO();
}
#endif
6 changes: 1 addition & 5 deletions Userland/Libraries/LibC/stdlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,7 @@ int posix_openpt(int flags);
int grantpt(int fd);
int unlockpt(int fd);

// FIXME: Remove the ifdef once we have a working memalign implementation.
// This is hidden by default until then because many applications prefer
// `posix_memalign` over other implementations of aligned memory.
#ifdef SERENITY_LIBC_SHOW_POSIX_MEMALIGN
int posix_memalign(void**, size_t alignment, size_t size);
#endif
__attribute__((malloc, alloc_size(2), alloc_align(1))) void* aligned_alloc(size_t alignment, size_t size);

__END_DECLS

0 comments on commit 4e0adb6

Please sign in to comment.