forked from SerenityOS/serenity
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
LibC: Implement posix_memalign(3) and aligned_alloc(3)
Some ports linked against posix_memalign, but didn't use it, and others used it if it was Available. So I decided to implement posix_memalign. My implementation adds almost no overhead to regular mallocs. However, if an alignment is specified, it will use the smallest ChunkedBlock, for which aligned chunks exist, and simply use one of the chunks that is aligned. If it cannot use a ChunkedBlock, for size or alignment reasons, it will use a BigAllocationBlock, and return a pointer to the first aligned address past the start of the block. This implementation supports alignments up to 32768, due to the limitations of the BigAllocationBlock technique.
- Loading branch information
1 parent
830f5c6
commit 4e0adb6
Showing
4 changed files
with
172 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
/* | ||
* Copyright (c) 2018-2021, Andreas Kling <[email protected]> | ||
* Copyright (c) 2022, Peter Elliott <[email protected]> | ||
* | ||
* SPDX-License-Identifier: BSD-2-Clause | ||
*/ | ||
|
@@ -153,12 +154,75 @@ static inline BigAllocator (&big_allocators())[1] | |
return reinterpret_cast<BigAllocator(&)[1]>(g_big_allocators_storage); | ||
} | ||
|
||
static Allocator* allocator_for_size(size_t size, size_t& good_size) | ||
// --- BEGIN MATH --- | ||
// This stuff is only used for checking if there exists an aligned block in a | ||
// chunk. It has no bearing on the rest of the allocator, especially for | ||
// regular malloc. | ||
|
||
static inline unsigned long modulo(long a, long b) | ||
{ | ||
return (b + (a % b)) % b; | ||
} | ||
|
||
struct EuclideanResult { | ||
long x; | ||
long y; | ||
long gcd; | ||
}; | ||
|
||
// Returns x, y, gcd. | ||
static inline EuclideanResult extended_euclid(long a, long b) | ||
{ | ||
EuclideanResult old = { 1, 0, a }; | ||
EuclideanResult current = { 0, 1, b }; | ||
|
||
while (current.gcd != 0) { | ||
long quotient = old.gcd / current.gcd; | ||
|
||
EuclideanResult next = { | ||
old.x - quotient * current.x, | ||
old.y - quotient * current.y, | ||
old.gcd - quotient * current.gcd, | ||
}; | ||
|
||
old = current; | ||
current = next; | ||
} | ||
|
||
return old; | ||
} | ||
|
||
static inline bool block_has_aligned_chunk(long align, long bytes_per_chunk, long chunk_capacity) | ||
{ | ||
// Never do math on a normal malloc. | ||
if ((size_t)align <= sizeof(ChunkedBlock)) | ||
return true; | ||
|
||
// Solve the linear congruence n*bytes_per_chunk = -sizeof(ChunkedBlock) (mod align). | ||
auto [x, y, gcd] = extended_euclid(bytes_per_chunk % align, align); | ||
long constant = modulo(-sizeof(ChunkedBlock), align); | ||
if (constant % gcd != 0) | ||
// No solution. Chunk size is probably a multiple of align. | ||
return false; | ||
|
||
long n = modulo(x * (constant / gcd), align); | ||
if (x < 0) | ||
n = (n + align / gcd) % align; | ||
|
||
// Don't ask me to prove this. | ||
VERIFY(n > 0); | ||
return n < chunk_capacity; | ||
} | ||
|
||
// --- END MATH --- | ||
|
||
static Allocator* allocator_for_size(size_t size, size_t& good_size, size_t align = 1) | ||
{ | ||
for (size_t i = 0; size_classes[i]; ++i) { | ||
if (size <= size_classes[i]) { | ||
auto& allocator = allocators()[i]; | ||
if (size <= size_classes[i] && block_has_aligned_chunk(align, allocator.size, (ChunkedBlock::block_size - sizeof(ChunkedBlock)) / allocator.size)) { | ||
good_size = size_classes[i]; | ||
return &allocators()[i]; | ||
return &allocator; | ||
} | ||
} | ||
good_size = PAGE_ROUND_UP(size); | ||
|
@@ -176,7 +240,7 @@ static BigAllocator* big_allocator_for_size(size_t size) | |
|
||
extern "C" { | ||
|
||
static void* os_alloc(size_t size, char const* name) | ||
static ErrorOr<void*> os_alloc(size_t size, char const* name) | ||
{ | ||
int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_PURGEABLE; | ||
#if ARCH(X86_64) | ||
|
@@ -185,8 +249,7 @@ static void* os_alloc(size_t size, char const* name) | |
auto* ptr = serenity_mmap(nullptr, size, PROT_READ | PROT_WRITE, flags, 0, 0, ChunkedBlock::block_size, name); | ||
VERIFY(ptr != nullptr); | ||
if (ptr == MAP_FAILED) { | ||
errno = ENOMEM; | ||
return nullptr; | ||
return ENOMEM; | ||
} | ||
return ptr; | ||
} | ||
|
@@ -197,6 +260,31 @@ static void os_free(void* ptr, size_t size) | |
assert(rc == 0); | ||
} | ||
|
||
static void* try_allocate_chunk_aligned(size_t align, ChunkedBlock& block) | ||
{ | ||
// These loops are guaranteed to run only once for a standard-aligned malloc. | ||
for (FreelistEntry** entry = &(block.m_freelist); *entry != nullptr; entry = &((*entry)->next)) { | ||
if ((reinterpret_cast<uintptr_t>(*entry) & (align - 1)) == 0) { | ||
--block.m_free_chunks; | ||
void* ptr = *entry; | ||
*entry = (*entry)->next; // Delete the entry. | ||
return ptr; | ||
} | ||
} | ||
for (; block.m_next_lazy_freelist_index < block.chunk_capacity(); block.m_next_lazy_freelist_index++) { | ||
void* ptr = block.m_slot + block.m_next_lazy_freelist_index * block.m_size; | ||
if ((reinterpret_cast<uintptr_t>(ptr) & (align - 1)) == 0) { | ||
--block.m_free_chunks; | ||
block.m_next_lazy_freelist_index++; | ||
return ptr; | ||
} | ||
auto* entry = (FreelistEntry*)ptr; | ||
entry->next = block.m_freelist; | ||
block.m_freelist = entry; | ||
} | ||
return nullptr; | ||
} | ||
|
||
enum class CallerWillInitializeMemory { | ||
No, | ||
Yes, | ||
|
@@ -208,12 +296,20 @@ enum class CallerWillInitializeMemory { | |
__thread bool s_allocation_enabled; | ||
#endif | ||
|
||
static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_initialize_memory) | ||
static ErrorOr<void*> malloc_impl(size_t size, size_t align, CallerWillInitializeMemory caller_will_initialize_memory) | ||
{ | ||
#ifndef NO_TLS | ||
VERIFY(s_allocation_enabled); | ||
#endif | ||
|
||
// Align must be a power of 2. | ||
if (popcount(align) != 1) | ||
return EINVAL; | ||
|
||
// FIXME: Support larger than 32KiB alignments (if you dare). | ||
if (sizeof(BigAllocationBlock) + align >= ChunkedBlock::block_size) | ||
return EINVAL; | ||
|
||
if (s_log_malloc) | ||
dbgln("LibC: malloc({})", size); | ||
|
||
|
@@ -226,16 +322,15 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini | |
g_malloc_stats.number_of_malloc_calls++; | ||
|
||
size_t good_size; | ||
auto* allocator = allocator_for_size(size, good_size); | ||
auto* allocator = allocator_for_size(size, good_size, align); | ||
|
||
PthreadMutexLocker locker(s_malloc_mutex); | ||
|
||
if (!allocator) { | ||
size_t real_size = round_up_to_power_of_two(sizeof(BigAllocationBlock) + size, ChunkedBlock::block_size); | ||
size_t real_size = round_up_to_power_of_two(sizeof(BigAllocationBlock) + size + ((align > 16) ? align : 0), ChunkedBlock::block_size); | ||
if (real_size < size) { | ||
dbgln_if(MALLOC_DEBUG, "LibC: Detected overflow trying to do big allocation of size {} for {}", real_size, size); | ||
errno = ENOMEM; | ||
return nullptr; | ||
return ENOMEM; | ||
} | ||
#ifdef RECYCLE_BIG_ALLOCATIONS | ||
if (auto* allocator = big_allocator_for_size(real_size)) { | ||
|
@@ -257,27 +352,31 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini | |
new (block) BigAllocationBlock(real_size); | ||
} | ||
|
||
ue_notify_malloc(&block->m_slot[0], size); | ||
return &block->m_slot[0]; | ||
void* ptr = reinterpret_cast<void*>(round_up_to_power_of_two(reinterpret_cast<uintptr_t>(&block->m_slot[0]), align)); | ||
|
||
ue_notify_malloc(ptr, size); | ||
return ptr; | ||
} | ||
} | ||
#endif | ||
auto* block = (BigAllocationBlock*)os_alloc(real_size, "malloc: BigAllocationBlock"); | ||
if (block == nullptr) { | ||
dbgln_if(MALLOC_DEBUG, "LibC: Failed to do big allocation of size {} for {}", real_size, size); | ||
return nullptr; | ||
} | ||
auto* block = (BigAllocationBlock*)TRY(os_alloc(real_size, "malloc: BigAllocationBlock")); | ||
g_malloc_stats.number_of_big_allocs++; | ||
new (block) BigAllocationBlock(real_size); | ||
ue_notify_malloc(&block->m_slot[0], size); | ||
return &block->m_slot[0]; | ||
|
||
void* ptr = reinterpret_cast<void*>(round_up_to_power_of_two(reinterpret_cast<uintptr_t>(&block->m_slot[0]), align)); | ||
ue_notify_malloc(ptr, size); | ||
return ptr; | ||
} | ||
|
||
ChunkedBlock* block = nullptr; | ||
void* ptr = nullptr; | ||
for (auto& current : allocator->usable_blocks) { | ||
if (current.free_chunks()) { | ||
block = ¤t; | ||
break; | ||
ptr = try_allocate_chunk_aligned(align, current); | ||
if (ptr) { | ||
block = ¤t; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
|
@@ -321,23 +420,16 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini | |
g_malloc_stats.number_of_block_allocs++; | ||
char buffer[64]; | ||
snprintf(buffer, sizeof(buffer), "malloc: ChunkedBlock(%zu)", good_size); | ||
block = (ChunkedBlock*)os_alloc(ChunkedBlock::block_size, buffer); | ||
if (block == nullptr) { | ||
return nullptr; | ||
} | ||
block = (ChunkedBlock*)TRY(os_alloc(ChunkedBlock::block_size, buffer)); | ||
new (block) ChunkedBlock(good_size); | ||
allocator->usable_blocks.append(*block); | ||
++allocator->block_count; | ||
} | ||
|
||
--block->m_free_chunks; | ||
void* ptr = block->m_freelist; | ||
if (ptr) { | ||
block->m_freelist = block->m_freelist->next; | ||
} else { | ||
ptr = block->m_slot + block->m_next_lazy_freelist_index * block->m_size; | ||
block->m_next_lazy_freelist_index++; | ||
if (!ptr) { | ||
ptr = try_allocate_chunk_aligned(align, *block); | ||
} | ||
|
||
VERIFY(ptr); | ||
if (block->is_full()) { | ||
g_malloc_stats.number_of_blocks_full++; | ||
|
@@ -451,14 +543,21 @@ static void free_impl(void* ptr) | |
void* malloc(size_t size) | ||
{ | ||
MemoryAuditingSuppressor suppressor; | ||
void* ptr = malloc_impl(size, CallerWillInitializeMemory::No); | ||
auto ptr_or_error = malloc_impl(size, 16, CallerWillInitializeMemory::No); | ||
|
||
if (ptr_or_error.is_error()) { | ||
errno = ptr_or_error.error().code(); | ||
return nullptr; | ||
} | ||
|
||
if (s_profiling) | ||
perf_event(PERF_EVENT_MALLOC, size, reinterpret_cast<FlatPtr>(ptr)); | ||
return ptr; | ||
perf_event(PERF_EVENT_MALLOC, size, reinterpret_cast<FlatPtr>(ptr_or_error.value())); | ||
|
||
return ptr_or_error.value(); | ||
} | ||
|
||
// This is a Microsoft extension, and is not found on other Unix-like systems. | ||
// FIXME: Implement aligned_alloc() instead | ||
// FIXME: Remove this when all patches have been switched to aligned_alloc() | ||
// | ||
// This is used in libc++ to implement C++17 aligned new/delete. | ||
// | ||
|
@@ -513,10 +612,41 @@ void* calloc(size_t count, size_t size) | |
return nullptr; | ||
} | ||
size_t new_size = count * size; | ||
auto* ptr = malloc_impl(new_size, CallerWillInitializeMemory::Yes); | ||
if (ptr) | ||
memset(ptr, 0, new_size); | ||
return ptr; | ||
auto ptr_or_error = malloc_impl(new_size, 16, CallerWillInitializeMemory::Yes); | ||
|
||
if (ptr_or_error.is_error()) { | ||
errno = ptr_or_error.error().code(); | ||
return nullptr; | ||
} | ||
|
||
memset(ptr_or_error.value(), 0, new_size); | ||
return ptr_or_error.value(); | ||
} | ||
|
||
// https://pubs.opengroup.org/onlinepubs/9699919799/functions/posix_memalign.html | ||
int posix_memalign(void** memptr, size_t alignment, size_t size) | ||
{ | ||
MemoryAuditingSuppressor suppressor; | ||
auto ptr_or_error = malloc_impl(size, alignment, CallerWillInitializeMemory::No); | ||
|
||
if (ptr_or_error.is_error()) | ||
return ptr_or_error.error().code(); | ||
|
||
*memptr = ptr_or_error.value(); | ||
return 0; | ||
} | ||
|
||
void* aligned_alloc(size_t alignment, size_t size) | ||
{ | ||
MemoryAuditingSuppressor suppressor; | ||
auto ptr_or_error = malloc_impl(size, alignment, CallerWillInitializeMemory::No); | ||
|
||
if (ptr_or_error.is_error()) { | ||
errno = ptr_or_error.error().code(); | ||
return nullptr; | ||
} | ||
|
||
return ptr_or_error.value(); | ||
} | ||
|
||
size_t malloc_size(void const* ptr) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters