Skip to content

Commit

Permalink
[hip] Re-land #19082 and #19074 (#19101)
Browse files Browse the repository at this point in the history
Fixes an issue with the async allocation/deallocation where we did not
issue an execution to kick off the worker thread. Depending on workload
ordering this sometimes could cause a deadlock.

---------

Signed-off-by: Andrew Woloszyn <[email protected]>
  • Loading branch information
AWoloszyn authored Nov 11, 2024
1 parent c0dff68 commit 915b06b
Show file tree
Hide file tree
Showing 11 changed files with 610 additions and 102 deletions.
2 changes: 2 additions & 0 deletions runtime/src/iree/hal/drivers/cuda/cuda_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,8 @@ static const iree_hal_deferred_work_queue_device_interface_vtable_t
iree_hal_cuda_deferred_work_queue_device_interface_create_stream_command_buffer,
.submit_command_buffer =
iree_hal_cuda_deferred_work_queue_device_interface_submit_command_buffer,
.async_alloc = NULL,
.async_dealloc = NULL,
};

static const iree_hal_stream_tracing_device_interface_vtable_t
Expand Down
1 change: 1 addition & 0 deletions runtime/src/iree/hal/drivers/hip/dynamic_symbol_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMallocFromPoolAsync, void **, size_t,
hipMemPool_t, hipStream_t)
IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMallocManaged, hipDeviceptr_t *, size_t,
unsigned int)
IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMallocAsync, void **, size_t, hipStream_t)
IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMemcpy, void *, const void *, size_t,
hipMemcpyKind)
IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMemcpyAsync, void *, const void *, size_t,
Expand Down
51 changes: 51 additions & 0 deletions runtime/src/iree/hal/drivers/hip/hip_allocator.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ static void iree_hal_hip_allocator_destroy(
IREE_TRACE_ZONE_END(z0);
}

bool iree_hal_hip_allocator_isa(iree_hal_allocator_t* base_value) {
return iree_hal_resource_is(base_value, &iree_hal_hip_allocator_vtable);
}

static iree_allocator_t iree_hal_hip_allocator_host_allocator(
const iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
iree_hal_hip_allocator_t* allocator =
Expand Down Expand Up @@ -590,6 +594,53 @@ static iree_status_t iree_hal_hip_allocator_export_buffer(
}
}

iree_status_t iree_hal_hip_allocator_alloc_async(
iree_hal_allocator_t* base_allocator, hipStream_t stream,
iree_hal_buffer_t* buffer) {
iree_hal_hip_allocator_t* allocator =
iree_hal_hip_allocator_cast(base_allocator);

hipDeviceptr_t ptr = NULL;
iree_status_t status = IREE_HIP_RESULT_TO_STATUS(
allocator->symbols,
hipMallocAsync(&ptr, (size_t)iree_hal_buffer_allocation_size(buffer),
stream),
"hipMallocAsync");
if (iree_status_is_ok(status)) {
iree_hal_hip_buffer_set_device_pointer(buffer, ptr);
IREE_TRACE_ALLOC_NAMED(IREE_HAL_HIP_ALLOCATOR_ID, (void*)ptr,
iree_hal_buffer_allocation_size(buffer));
IREE_STATISTICS(iree_hal_allocator_statistics_record_alloc(
&allocator->statistics, iree_hal_buffer_memory_type(buffer),
iree_hal_buffer_allocation_size(buffer)));
} else {
iree_hal_hip_buffer_set_allocation_empty(buffer);
}

return status;
}

iree_status_t iree_hal_hip_allocator_free_async(
iree_hal_allocator_t* base_allocator, hipStream_t stream,
iree_hal_buffer_t* buffer) {
iree_hal_hip_allocator_t* allocator =
iree_hal_hip_allocator_cast(base_allocator);
hipDeviceptr_t device_ptr = iree_hal_hip_buffer_device_pointer(buffer);
if (!device_ptr) {
return iree_ok_status();
}

IREE_RETURN_IF_ERROR(IREE_HIP_RESULT_TO_STATUS(
allocator->symbols, hipFreeAsync(device_ptr, stream), "hipFreeAsync"));
iree_hal_hip_buffer_set_allocation_empty(buffer);

IREE_TRACE_FREE_NAMED(IREE_HAL_HIP_ALLOCATOR_ID, (void*)device_ptr);
IREE_STATISTICS(iree_hal_allocator_statistics_record_free(
&allocator->statistics, iree_hal_buffer_memory_type(buffer),
iree_hal_buffer_allocation_size(buffer)));
return iree_ok_status();
}

static const iree_hal_allocator_vtable_t iree_hal_hip_allocator_vtable = {
.destroy = iree_hal_hip_allocator_destroy,
.host_allocator = iree_hal_hip_allocator_host_allocator,
Expand Down
10 changes: 10 additions & 0 deletions runtime/src/iree/hal/drivers/hip/hip_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ iree_status_t iree_hal_hip_allocator_create(
hipStream_t stream, iree_hal_hip_memory_pools_t* pools,
iree_allocator_t host_allocator, iree_hal_allocator_t** out_allocator);

bool iree_hal_hip_allocator_isa(iree_hal_allocator_t* base_value);

iree_status_t iree_hal_hip_allocator_alloc_async(
iree_hal_allocator_t* base_allocator, hipStream_t stream,
iree_hal_buffer_t* buffer);

iree_status_t iree_hal_hip_allocator_free_async(iree_hal_allocator_t* allocator,
hipStream_t stream,
iree_hal_buffer_t* buffer);

#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
Expand Down
45 changes: 42 additions & 3 deletions runtime/src/iree/hal/drivers/hip/hip_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <string.h>

#include "iree/base/api.h"
#include "iree/base/internal/synchronization.h"
#include "iree/base/tracing.h"

typedef struct iree_hal_hip_buffer_t {
Expand All @@ -19,6 +20,9 @@ typedef struct iree_hal_hip_buffer_t {
void* host_ptr;
hipDeviceptr_t device_ptr;
iree_hal_buffer_release_callback_t release_callback;
iree_slim_mutex_t device_ptr_lock;
iree_notification_t device_ptr_notification;
bool empty;
} iree_hal_hip_buffer_t;

static const iree_hal_buffer_vtable_t iree_hal_hip_buffer_vtable;
Expand Down Expand Up @@ -65,13 +69,36 @@ iree_status_t iree_hal_hip_buffer_wrap(
buffer->host_ptr = host_ptr;
buffer->device_ptr = device_ptr;
buffer->release_callback = release_callback;
buffer->empty = false;
iree_slim_mutex_initialize(&buffer->device_ptr_lock);
iree_notification_initialize(&buffer->device_ptr_notification);
*out_buffer = &buffer->base;
}

IREE_TRACE_ZONE_END(z0);
return status;
}

void iree_hal_hip_buffer_set_device_pointer(iree_hal_buffer_t* base_buffer,
hipDeviceptr_t pointer) {
iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
IREE_ASSERT(buffer->device_ptr == NULL,
"Cannot set a device_ptr to a buffer that already has one");
iree_slim_mutex_lock(&buffer->device_ptr_lock);
buffer->device_ptr = pointer;
iree_slim_mutex_unlock(&buffer->device_ptr_lock);
iree_notification_post(&buffer->device_ptr_notification, IREE_ALL_WAITERS);
}

void iree_hal_hip_buffer_set_allocation_empty(iree_hal_buffer_t* base_buffer) {
iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
iree_slim_mutex_lock(&buffer->device_ptr_lock);
buffer->empty = true;
buffer->device_ptr = NULL;
iree_slim_mutex_unlock(&buffer->device_ptr_lock);
iree_notification_post(&buffer->device_ptr_notification, IREE_ALL_WAITERS);
}

static void iree_hal_hip_buffer_destroy(iree_hal_buffer_t* base_buffer) {
iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
iree_allocator_t host_allocator = base_buffer->host_allocator;
Expand All @@ -80,6 +107,8 @@ static void iree_hal_hip_buffer_destroy(iree_hal_buffer_t* base_buffer) {
buffer->release_callback.fn(buffer->release_callback.user_data,
base_buffer);
}
iree_slim_mutex_deinitialize(&buffer->device_ptr_lock);
iree_notification_deinitialize(&buffer->device_ptr_notification);
iree_allocator_free(host_allocator, buffer);
IREE_TRACE_ZONE_END(z0);
}
Expand Down Expand Up @@ -143,10 +172,20 @@ iree_hal_hip_buffer_type_t iree_hal_hip_buffer_type(
return buffer->type;
}

static bool iree_hal_hip_buffer_has_device_ptr(void* arg) {
iree_hal_hip_buffer_t* buffer = (iree_hal_hip_buffer_t*)arg;
iree_slim_mutex_lock(&buffer->device_ptr_lock);
bool has_ptr_or_error = buffer->device_ptr || buffer->empty;
iree_slim_mutex_unlock(&buffer->device_ptr_lock);
return has_ptr_or_error;
}

hipDeviceptr_t iree_hal_hip_buffer_device_pointer(
const iree_hal_buffer_t* base_buffer) {
const iree_hal_hip_buffer_t* buffer =
iree_hal_hip_buffer_const_cast(base_buffer);
iree_hal_buffer_t* base_buffer) {
iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
iree_notification_await(&buffer->device_ptr_notification,
iree_hal_hip_buffer_has_device_ptr, buffer,
iree_infinite_timeout());
return buffer->device_ptr;
}

Expand Down
12 changes: 10 additions & 2 deletions runtime/src/iree/hal/drivers/hip/hip_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,16 @@ iree_hal_hip_buffer_type_t iree_hal_hip_buffer_type(
// Returns the HIP base pointer for the given |buffer|.
// This is the entire allocated_buffer and must be offset by the buffer
// byte_offset and byte_length when used.
hipDeviceptr_t iree_hal_hip_buffer_device_pointer(
const iree_hal_buffer_t* buffer);
hipDeviceptr_t iree_hal_hip_buffer_device_pointer(iree_hal_buffer_t* buffer);

// Sets the HIP base pointer for the given |buffer|.
// This is the entire allocated_buffer and must be offset by the buffer
// byte_offset and byte_length when used.
void iree_hal_hip_buffer_set_device_pointer(iree_hal_buffer_t* buffer,
hipDeviceptr_t pointer);

// Marks the buffer as having an intentionally empty allocation.
void iree_hal_hip_buffer_set_allocation_empty(iree_hal_buffer_t* buffer);

// Returns the HIP host pointer for the given |buffer|, if available.
void* iree_hal_hip_buffer_host_pointer(const iree_hal_buffer_t* buffer);
Expand Down
Loading

0 comments on commit 915b06b

Please sign in to comment.