diff --git a/Userland/Libraries/LibC/CMakeLists.txt b/Userland/Libraries/LibC/CMakeLists.txt index 9485cc602dd663..2f8a603cb6f668 100644 --- a/Userland/Libraries/LibC/CMakeLists.txt +++ b/Userland/Libraries/LibC/CMakeLists.txt @@ -139,9 +139,6 @@ set(SOURCES ${LIBC_SOURCES} ${AK_SOURCES} ${ELF_SOURCES} ${ASM_SOURCES}) # Prevent GCC from removing null checks by marking the `FILE*` argument non-null set_source_files_properties(stdio.cpp PROPERTIES COMPILE_FLAGS "-fno-builtin-fputc -fno-builtin-fputs -fno-builtin-fwrite") -# Add in the `posix_memalign` symbol to avoid breaking existing binaries. -set_source_files_properties(stdlib.cpp PROPERTIES COMPILE_FLAGS "-DSERENITY_LIBC_SHOW_POSIX_MEMALIGN") - # Prevent naively implemented string functions (like strlen) from being "optimized" into a call to themselves. if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set_source_files_properties(string.cpp wchar.cpp PROPERTIES COMPILE_FLAGS "-fno-tree-loop-distribution -fno-tree-loop-distribute-patterns") diff --git a/Userland/Libraries/LibC/malloc.cpp b/Userland/Libraries/LibC/malloc.cpp index 5b3e7add2ba072..05381069dcff72 100644 --- a/Userland/Libraries/LibC/malloc.cpp +++ b/Userland/Libraries/LibC/malloc.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2018-2021, Andreas Kling + * Copyright (c) 2022, Peter Elliott * * SPDX-License-Identifier: BSD-2-Clause */ @@ -153,12 +154,75 @@ static inline BigAllocator (&big_allocators())[1] return reinterpret_cast(g_big_allocators_storage); } -static Allocator* allocator_for_size(size_t size, size_t& good_size) +// --- BEGIN MATH --- +// This stuff is only used for checking if there exists an aligned block in a +// chunk. It has no bearing on the rest of the allocator, especially for +// regular malloc. + +static inline unsigned long modulo(long a, long b) +{ + return (b + (a % b)) % b; +} + +struct EuclideanResult { + long x; + long y; + long gcd; +}; + +// Returns x, y, gcd. +static inline EuclideanResult extended_euclid(long a, long b) +{ + EuclideanResult old = { 1, 0, a }; + EuclideanResult current = { 0, 1, b }; + + while (current.gcd != 0) { + long quotient = old.gcd / current.gcd; + + EuclideanResult next = { + old.x - quotient * current.x, + old.y - quotient * current.y, + old.gcd - quotient * current.gcd, + }; + + old = current; + current = next; + } + + return old; +} + +static inline bool block_has_aligned_chunk(long align, long bytes_per_chunk, long chunk_capacity) +{ + // Never do math on a normal malloc. + if ((size_t)align <= sizeof(ChunkedBlock)) + return true; + + // Solve the linear congruence n*bytes_per_chunk = -sizeof(ChunkedBlock) (mod align). + auto [x, y, gcd] = extended_euclid(bytes_per_chunk % align, align); + long constant = modulo(-sizeof(ChunkedBlock), align); + if (constant % gcd != 0) + // No solution. Chunk size is probably a multiple of align. + return false; + + long n = modulo(x * (constant / gcd), align); + if (x < 0) + n = (n + align / gcd) % align; + + // Don't ask me to prove this. + VERIFY(n > 0); + return n < chunk_capacity; +} + +// --- END MATH --- + +static Allocator* allocator_for_size(size_t size, size_t& good_size, size_t align = 1) { for (size_t i = 0; size_classes[i]; ++i) { - if (size <= size_classes[i]) { + auto& allocator = allocators()[i]; + if (size <= size_classes[i] && block_has_aligned_chunk(align, allocator.size, (ChunkedBlock::block_size - sizeof(ChunkedBlock)) / allocator.size)) { good_size = size_classes[i]; - return &allocators()[i]; + return &allocator; } } good_size = PAGE_ROUND_UP(size); @@ -176,7 +240,7 @@ static BigAllocator* big_allocator_for_size(size_t size) extern "C" { -static void* os_alloc(size_t size, char const* name) +static ErrorOr os_alloc(size_t size, char const* name) { int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_PURGEABLE; #if ARCH(X86_64) @@ -185,8 +249,7 @@ static void* os_alloc(size_t size, char const* name) auto* ptr = serenity_mmap(nullptr, size, PROT_READ | PROT_WRITE, flags, 0, 0, ChunkedBlock::block_size, name); VERIFY(ptr != nullptr); if (ptr == MAP_FAILED) { - errno = ENOMEM; - return nullptr; + return ENOMEM; } return ptr; } @@ -197,6 +260,31 @@ static void os_free(void* ptr, size_t size) assert(rc == 0); } +static void* try_allocate_chunk_aligned(size_t align, ChunkedBlock& block) +{ + // These loops are guaranteed to run only once for a standard-aligned malloc. + for (FreelistEntry** entry = &(block.m_freelist); *entry != nullptr; entry = &((*entry)->next)) { + if ((reinterpret_cast(*entry) & (align - 1)) == 0) { + --block.m_free_chunks; + void* ptr = *entry; + *entry = (*entry)->next; // Delete the entry. + return ptr; + } + } + for (; block.m_next_lazy_freelist_index < block.chunk_capacity(); block.m_next_lazy_freelist_index++) { + void* ptr = block.m_slot + block.m_next_lazy_freelist_index * block.m_size; + if ((reinterpret_cast(ptr) & (align - 1)) == 0) { + --block.m_free_chunks; + block.m_next_lazy_freelist_index++; + return ptr; + } + auto* entry = (FreelistEntry*)ptr; + entry->next = block.m_freelist; + block.m_freelist = entry; + } + return nullptr; +} + enum class CallerWillInitializeMemory { No, Yes, @@ -208,12 +296,20 @@ enum class CallerWillInitializeMemory { __thread bool s_allocation_enabled; #endif -static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_initialize_memory) +static ErrorOr malloc_impl(size_t size, size_t align, CallerWillInitializeMemory caller_will_initialize_memory) { #ifndef NO_TLS VERIFY(s_allocation_enabled); #endif + // Align must be a power of 2. + if (popcount(align) != 1) + return EINVAL; + + // FIXME: Support larger than 32KiB alignments (if you dare). + if (sizeof(BigAllocationBlock) + align >= ChunkedBlock::block_size) + return EINVAL; + if (s_log_malloc) dbgln("LibC: malloc({})", size); @@ -226,16 +322,15 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini g_malloc_stats.number_of_malloc_calls++; size_t good_size; - auto* allocator = allocator_for_size(size, good_size); + auto* allocator = allocator_for_size(size, good_size, align); PthreadMutexLocker locker(s_malloc_mutex); if (!allocator) { - size_t real_size = round_up_to_power_of_two(sizeof(BigAllocationBlock) + size, ChunkedBlock::block_size); + size_t real_size = round_up_to_power_of_two(sizeof(BigAllocationBlock) + size + ((align > 16) ? align : 0), ChunkedBlock::block_size); if (real_size < size) { dbgln_if(MALLOC_DEBUG, "LibC: Detected overflow trying to do big allocation of size {} for {}", real_size, size); - errno = ENOMEM; - return nullptr; + return ENOMEM; } #ifdef RECYCLE_BIG_ALLOCATIONS if (auto* allocator = big_allocator_for_size(real_size)) { @@ -257,27 +352,31 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini new (block) BigAllocationBlock(real_size); } - ue_notify_malloc(&block->m_slot[0], size); - return &block->m_slot[0]; + void* ptr = reinterpret_cast(round_up_to_power_of_two(reinterpret_cast(&block->m_slot[0]), align)); + + ue_notify_malloc(ptr, size); + return ptr; } } #endif - auto* block = (BigAllocationBlock*)os_alloc(real_size, "malloc: BigAllocationBlock"); - if (block == nullptr) { - dbgln_if(MALLOC_DEBUG, "LibC: Failed to do big allocation of size {} for {}", real_size, size); - return nullptr; - } + auto* block = (BigAllocationBlock*)TRY(os_alloc(real_size, "malloc: BigAllocationBlock")); g_malloc_stats.number_of_big_allocs++; new (block) BigAllocationBlock(real_size); - ue_notify_malloc(&block->m_slot[0], size); - return &block->m_slot[0]; + + void* ptr = reinterpret_cast(round_up_to_power_of_two(reinterpret_cast(&block->m_slot[0]), align)); + ue_notify_malloc(ptr, size); + return ptr; } ChunkedBlock* block = nullptr; + void* ptr = nullptr; for (auto& current : allocator->usable_blocks) { if (current.free_chunks()) { - block = ¤t; - break; + ptr = try_allocate_chunk_aligned(align, current); + if (ptr) { + block = ¤t; + break; + } } } @@ -321,23 +420,16 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini g_malloc_stats.number_of_block_allocs++; char buffer[64]; snprintf(buffer, sizeof(buffer), "malloc: ChunkedBlock(%zu)", good_size); - block = (ChunkedBlock*)os_alloc(ChunkedBlock::block_size, buffer); - if (block == nullptr) { - return nullptr; - } + block = (ChunkedBlock*)TRY(os_alloc(ChunkedBlock::block_size, buffer)); new (block) ChunkedBlock(good_size); allocator->usable_blocks.append(*block); ++allocator->block_count; } - --block->m_free_chunks; - void* ptr = block->m_freelist; - if (ptr) { - block->m_freelist = block->m_freelist->next; - } else { - ptr = block->m_slot + block->m_next_lazy_freelist_index * block->m_size; - block->m_next_lazy_freelist_index++; + if (!ptr) { + ptr = try_allocate_chunk_aligned(align, *block); } + VERIFY(ptr); if (block->is_full()) { g_malloc_stats.number_of_blocks_full++; @@ -451,14 +543,21 @@ static void free_impl(void* ptr) void* malloc(size_t size) { MemoryAuditingSuppressor suppressor; - void* ptr = malloc_impl(size, CallerWillInitializeMemory::No); + auto ptr_or_error = malloc_impl(size, 16, CallerWillInitializeMemory::No); + + if (ptr_or_error.is_error()) { + errno = ptr_or_error.error().code(); + return nullptr; + } + if (s_profiling) - perf_event(PERF_EVENT_MALLOC, size, reinterpret_cast(ptr)); - return ptr; + perf_event(PERF_EVENT_MALLOC, size, reinterpret_cast(ptr_or_error.value())); + + return ptr_or_error.value(); } // This is a Microsoft extension, and is not found on other Unix-like systems. -// FIXME: Implement aligned_alloc() instead +// FIXME: Remove this when all patches have been switched to aligned_alloc() // // This is used in libc++ to implement C++17 aligned new/delete. // @@ -513,10 +612,41 @@ void* calloc(size_t count, size_t size) return nullptr; } size_t new_size = count * size; - auto* ptr = malloc_impl(new_size, CallerWillInitializeMemory::Yes); - if (ptr) - memset(ptr, 0, new_size); - return ptr; + auto ptr_or_error = malloc_impl(new_size, 16, CallerWillInitializeMemory::Yes); + + if (ptr_or_error.is_error()) { + errno = ptr_or_error.error().code(); + return nullptr; + } + + memset(ptr_or_error.value(), 0, new_size); + return ptr_or_error.value(); +} + +// https://pubs.opengroup.org/onlinepubs/9699919799/functions/posix_memalign.html +int posix_memalign(void** memptr, size_t alignment, size_t size) +{ + MemoryAuditingSuppressor suppressor; + auto ptr_or_error = malloc_impl(size, alignment, CallerWillInitializeMemory::No); + + if (ptr_or_error.is_error()) + return ptr_or_error.error().code(); + + *memptr = ptr_or_error.value(); + return 0; +} + +void* aligned_alloc(size_t alignment, size_t size) +{ + MemoryAuditingSuppressor suppressor; + auto ptr_or_error = malloc_impl(size, alignment, CallerWillInitializeMemory::No); + + if (ptr_or_error.is_error()) { + errno = ptr_or_error.error().code(); + return nullptr; + } + + return ptr_or_error.value(); } size_t malloc_size(void const* ptr) diff --git a/Userland/Libraries/LibC/stdlib.cpp b/Userland/Libraries/LibC/stdlib.cpp index bd21c87fa7c808..695c9c35e813ae 100644 --- a/Userland/Libraries/LibC/stdlib.cpp +++ b/Userland/Libraries/LibC/stdlib.cpp @@ -1335,14 +1335,3 @@ void _Exit(int status) { _exit(status); } - -#ifdef SERENITY_LIBC_SHOW_POSIX_MEMALIGN -// https://pubs.opengroup.org/onlinepubs/9699919799/functions/posix_memalign.html -int posix_memalign(void** memptr, size_t alignment, size_t size) -{ - (void)memptr; - (void)alignment; - (void)size; - TODO(); -} -#endif diff --git a/Userland/Libraries/LibC/stdlib.h b/Userland/Libraries/LibC/stdlib.h index 2b38dc6590b0d9..bdf9a562523870 100644 --- a/Userland/Libraries/LibC/stdlib.h +++ b/Userland/Libraries/LibC/stdlib.h @@ -101,11 +101,7 @@ int posix_openpt(int flags); int grantpt(int fd); int unlockpt(int fd); -// FIXME: Remove the ifdef once we have a working memalign implementation. -// This is hidden by default until then because many applications prefer -// `posix_memalign` over other implementations of aligned memory. -#ifdef SERENITY_LIBC_SHOW_POSIX_MEMALIGN int posix_memalign(void**, size_t alignment, size_t size); -#endif +__attribute__((malloc, alloc_size(2), alloc_align(1))) void* aligned_alloc(size_t alignment, size_t size); __END_DECLS