Skip to content

Commit

Permalink
Adding fill/update/copy HAL ops. (#19026)
Browse files Browse the repository at this point in the history
These ops use the newer style of 64-bit flags. TODOs were added to
hal.imports.mlir for future cleanup to existing ops whenever we want to
bump the version.
  • Loading branch information
benvanik authored Nov 5, 2024
1 parent c2a5478 commit 9650bfe
Show file tree
Hide file tree
Showing 10 changed files with 605 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,50 @@ class DeviceQueryI64OpConversion
mutable IREE::VM::ImportOp importOp;
};

class DeviceQueueFillOpConversion
: public OpConversionPattern<IREE::HAL::DeviceQueueFillOp> {
public:
DeviceQueueFillOpConversion(MLIRContext *context, SymbolTable &importSymbols,
TypeConverter &typeConverter,
StringRef importName)
: OpConversionPattern(context) {
importOp = importSymbols.lookup<IREE::VM::ImportOp>(importName);
assert(importOp);
}

LogicalResult
matchAndRewrite(IREE::HAL::DeviceQueueFillOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto importType = importOp.getFunctionType();
auto i64Type = rewriter.getI64Type();
auto patternLength = rewriter.create<IREE::VM::ConstI32Op>(
op.getLoc(),
llvm::divideCeil(op.getPattern().getType().getIntOrFloatBitWidth(), 8));
auto flags =
rewriter.create<IREE::VM::ConstI64Op>(op.getLoc(), op.getFlags());
std::array<Value, 10> callOperands = {
adaptor.getDevice(),
castToImportType(adaptor.getQueueAffinity(), i64Type, rewriter),
adaptor.getWaitFence(),
adaptor.getSignalFence(),
adaptor.getTargetBuffer(),
castToImportType(adaptor.getTargetOffset(), i64Type, rewriter),
castToImportType(adaptor.getLength(), i64Type, rewriter),
castToImportType(adaptor.getPattern(), i64Type, rewriter),
patternLength,
flags,
};
auto callOp = rewriter.replaceOpWithNewOp<IREE::VM::CallOp>(
op, SymbolRefAttr::get(importOp), importType.getResults(),
callOperands);
copyImportAttrs(importOp, callOp);
return success();
}

private:
mutable IREE::VM::ImportOp importOp;
};

class DeviceQueueExecuteIndirectOpConversion
: public OpConversionPattern<IREE::HAL::DeviceQueueExecuteIndirectOp> {
public:
Expand Down Expand Up @@ -185,6 +229,12 @@ void populateHALDeviceToVMPatterns(MLIRContext *context,
context, importSymbols, typeConverter, "hal.device.queue.alloca");
patterns.insert<VMImportOpConversion<IREE::HAL::DeviceQueueDeallocaOp>>(
context, importSymbols, typeConverter, "hal.device.queue.dealloca");
patterns.insert<DeviceQueueFillOpConversion>(
context, importSymbols, typeConverter, "hal.device.queue.fill");
patterns.insert<VMImportOpConversion<IREE::HAL::DeviceQueueUpdateOp>>(
context, importSymbols, typeConverter, "hal.device.queue.update");
patterns.insert<VMImportOpConversion<IREE::HAL::DeviceQueueCopyOp>>(
context, importSymbols, typeConverter, "hal.device.queue.copy");
patterns.insert<VMImportOpConversion<IREE::HAL::DeviceQueueReadOp>>(
context, importSymbols, typeConverter, "hal.device.queue.read");
patterns.insert<VMImportOpConversion<IREE::HAL::DeviceQueueWriteOp>>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,148 @@ util.func public @device_queue_dealloca(

// -----

// CHECK-LABEL: @device_queue_fill_i8
util.func public @device_queue_fill_i8(
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64,
%device: !hal.device, %affinity: i64,
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL_FENCE:.+]]: !vm.ref<!hal.fence>,
%wait_fence: !hal.fence, %signal_fence: !hal.fence,
// CHECK-SAME: %[[PATTERN_I8_I32:.+]]: i32,
%pattern_i8: i8,
// CHECK-SAME: %[[TARGET_BUFFER:.+]]: !vm.ref<!hal.buffer>)
%target_buffer: !hal.buffer) {
// CHECK-DAG: %[[TARGET_OFFSET:.+]] = vm.const.i64 200
%target_offset = arith.constant 200 : index
// CHECK-DAG: %[[LENGTH:.+]] = vm.const.i64 300
%length = arith.constant 300 : index
// CHECK-DAG: %[[PATTERN_LENGTH:.+]] = vm.const.i32 1
// CHECK-DAG: %[[FLAGS:.+]] = vm.const.i64.zero
// CHECK-DAG: %[[PATTERN_I8_I64:.+]] = vm.ext.i32.i64.s %[[PATTERN_I8_I32]]
// CHECK: vm.call @hal.device.queue.fill(
// CHECK-SAME: %[[DEVICE]], %[[AFFINITY]],
// CHECK-SAME: %[[WAIT_FENCE]], %[[SIGNAL_FENCE]],
// CHECK-SAME: %[[TARGET_BUFFER]], %[[TARGET_OFFSET]],
// CHECK-SAME: %[[LENGTH]],
// CHECK-SAME: %[[PATTERN_I8_I64]], %[[PATTERN_LENGTH]],
// CHECK-SAME: %[[FLAGS]])
hal.device.queue.fill<%device : !hal.device>
affinity(%affinity)
wait(%wait_fence) signal(%signal_fence)
target(%target_buffer : !hal.buffer)[%target_offset]
length(%length)
pattern(%pattern_i8 : i8)
flags(0)
util.return
}

// -----

// CHECK-LABEL: @device_queue_fill_i32
util.func public @device_queue_fill_i32(
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64,
%device: !hal.device, %affinity: i64,
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL_FENCE:.+]]: !vm.ref<!hal.fence>,
%wait_fence: !hal.fence, %signal_fence: !hal.fence,
// CHECK-SAME: %[[PATTERN_I32:.+]]: i32,
%pattern_i32: i32,
// CHECK-SAME: %[[TARGET_BUFFER:.+]]: !vm.ref<!hal.buffer>)
%target_buffer: !hal.buffer) {
// CHECK-DAG: %[[TARGET_OFFSET:.+]] = vm.const.i64 200
%target_offset = arith.constant 200 : index
// CHECK-DAG: %[[LENGTH:.+]] = vm.const.i64 300
%length = arith.constant 300 : index
// CHECK-DAG: %[[PATTERN_LENGTH:.+]] = vm.const.i32 4
// CHECK-DAG: %[[FLAGS:.+]] = vm.const.i64.zero
// CHECK-DAG: %[[PATTERN_I32_I64:.+]] = vm.ext.i32.i64.s %[[PATTERN_I32]]
// CHECK: vm.call @hal.device.queue.fill(
// CHECK-SAME: %[[DEVICE]], %[[AFFINITY]],
// CHECK-SAME: %[[WAIT_FENCE]], %[[SIGNAL_FENCE]],
// CHECK-SAME: %[[TARGET_BUFFER]], %[[TARGET_OFFSET]],
// CHECK-SAME: %[[LENGTH]],
// CHECK-SAME: %[[PATTERN_I32_I64]], %[[PATTERN_LENGTH]],
// CHECK-SAME: %[[FLAGS]])
hal.device.queue.fill<%device : !hal.device>
affinity(%affinity)
wait(%wait_fence) signal(%signal_fence)
target(%target_buffer : !hal.buffer)[%target_offset]
length(%length)
pattern(%pattern_i32 : i32)
flags(0)
util.return
}

// -----

// CHECK-LABEL: @device_queue_update
util.func public @device_queue_update(
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64,
%device: !hal.device, %affinity: i64,
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL_FENCE:.+]]: !vm.ref<!hal.fence>,
%wait_fence: !hal.fence, %signal_fence: !hal.fence,
// CHECK-SAME: %[[SOURCE_BUFFER:.+]]: !vm.buffer,
%source_buffer: !util.buffer,
// CHECK-SAME: %[[TARGET_BUFFER:.+]]: !vm.ref<!hal.buffer>)
%target_buffer: !hal.buffer) {
// CHECK-DAG: %[[SOURCE_OFFSET:.+]] = vm.const.i64 100
%source_offset = arith.constant 100 : index
// CHECK-DAG: %[[TARGET_OFFSET:.+]] = vm.const.i64 200
%target_offset = arith.constant 200 : index
// CHECK-DAG: %[[LENGTH:.+]] = vm.const.i64 300
%length = arith.constant 300 : index
// CHECK-DAG: %[[FLAGS:.+]] = vm.const.i64.zero
// CHECK: vm.call @hal.device.queue.update(
// CHECK-SAME: %[[DEVICE]], %[[AFFINITY]],
// CHECK-SAME: %[[WAIT_FENCE]], %[[SIGNAL_FENCE]],
// CHECK-SAME: %[[SOURCE_BUFFER]], %[[SOURCE_OFFSET]],
// CHECK-SAME: %[[TARGET_BUFFER]], %[[TARGET_OFFSET]],
// CHECK-SAME: %[[LENGTH]], %[[FLAGS]])
hal.device.queue.update<%device : !hal.device>
affinity(%affinity)
wait(%wait_fence) signal(%signal_fence)
source(%source_buffer : !util.buffer)[%source_offset]
target(%target_buffer : !hal.buffer)[%target_offset]
length(%length)
flags(0)
util.return
}

// -----

// CHECK-LABEL: @device_queue_copy
util.func public @device_queue_copy(
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64,
%device: !hal.device, %affinity: i64,
// CHECK-SAME: %[[WAIT_FENCE:.+]]: !vm.ref<!hal.fence>, %[[SIGNAL_FENCE:.+]]: !vm.ref<!hal.fence>,
%wait_fence: !hal.fence, %signal_fence: !hal.fence,
// CHECK-SAME: %[[SOURCE_BUFFER:.+]]: !vm.ref<!hal.buffer>,
%source_buffer: !hal.buffer,
// CHECK-SAME: %[[TARGET_BUFFER:.+]]: !vm.ref<!hal.buffer>)
%target_buffer: !hal.buffer) {
// CHECK-DAG: %[[SOURCE_OFFSET:.+]] = vm.const.i64 100
%source_offset = arith.constant 100 : index
// CHECK-DAG: %[[TARGET_OFFSET:.+]] = vm.const.i64 200
%target_offset = arith.constant 200 : index
// CHECK-DAG: %[[LENGTH:.+]] = vm.const.i64 300
%length = arith.constant 300 : index
// CHECK-DAG: %[[FLAGS:.+]] = vm.const.i64.zero
// CHECK: vm.call @hal.device.queue.copy(
// CHECK-SAME: %[[DEVICE]], %[[AFFINITY]],
// CHECK-SAME: %[[WAIT_FENCE]], %[[SIGNAL_FENCE]],
// CHECK-SAME: %[[SOURCE_BUFFER]], %[[SOURCE_OFFSET]],
// CHECK-SAME: %[[TARGET_BUFFER]], %[[TARGET_OFFSET]],
// CHECK-SAME: %[[LENGTH]], %[[FLAGS]])
hal.device.queue.copy<%device : !hal.device>
affinity(%affinity)
wait(%wait_fence) signal(%signal_fence)
source(%source_buffer : !hal.buffer)[%source_offset]
target(%target_buffer : !hal.buffer)[%target_offset]
length(%length)
flags(0)
util.return
}

// -----

// CHECK-LABEL: @device_queue_read
util.func public @device_queue_read(
// CHECK-SAME: (%[[DEVICE:.+]]: !vm.ref<!hal.device>, %[[AFFINITY:.+]]: i64,
Expand Down
12 changes: 12 additions & 0 deletions compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1319,6 +1319,18 @@ LogicalResult DeviceQueueDeallocaOp::verify() {
return verifyDeviceQueueFences(*this, getWaitFence(), getSignalFence());
}

LogicalResult DeviceQueueFillOp::verify() {
return verifyDeviceQueueFences(*this, getWaitFence(), getSignalFence());
}

LogicalResult DeviceQueueUpdateOp::verify() {
return verifyDeviceQueueFences(*this, getWaitFence(), getSignalFence());
}

LogicalResult DeviceQueueCopyOp::verify() {
return verifyDeviceQueueFences(*this, getWaitFence(), getSignalFence());
}

LogicalResult DeviceQueueReadOp::verify() {
return verifyDeviceQueueFences(*this, getWaitFence(), getSignalFence());
}
Expand Down
132 changes: 132 additions & 0 deletions compiler/src/iree/compiler/Dialect/HAL/IR/HALOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1851,6 +1851,138 @@ def HAL_DeviceQueueDeallocaOp : HAL_Op<"device.queue.dealloca"> {
let hasVerifier = 1;
}

def HAL_DeviceQueueFillOp : HAL_Op<"device.queue.fill"> {
let summary = [{fills a buffer with a repeating pattern}];
let description = [{
The target buffer must be visible to the device queue performing the update.
In most cases the queue affinity should be set to where the target buffer
will be consumed so that it has a chance of being cached.

Note that individual queue transfer operations have a high overhead and they
should be batched with other operations in command buffers.
}];

let arguments = (ins
HAL_Device:$device,
HAL_DeviceQueueAffinity:$queue_affinity,
HAL_Fence:$wait_fence,
HAL_Fence:$signal_fence,
HAL_Buffer:$target_buffer,
HAL_DeviceSize:$target_offset,
HAL_DeviceSize:$length,
HAL_FillPatternType:$pattern,
I64Attr:$flags
);
let results = (outs);

let assemblyFormat = [{
`<` $device `:` type($device) `>`
`affinity` `(` $queue_affinity `)`
`wait` `(` $wait_fence `)`
`signal` `(` $signal_fence `)`
`target` `(` $target_buffer `:` type($target_buffer) `)`
`` `[` $target_offset `]`
`length` `(` $length `)`
`pattern` `(` $pattern `:` type($pattern) `)`
`flags` `(` $flags `)`
attr-dict-with-keyword
}];

let hasVerifier = 1;
}

def HAL_DeviceQueueUpdateOp : HAL_Op<"device.queue.update"> {
let summary = [{updates a buffer with the contents of a host buffer}];
let description = [{
The provided host source buffer will be captured and need not remain live or
unchanged while the operation is queued. The target buffer must be visible
to the device queue performing the update. In most cases the queue affinity
should be set to where the target buffer will be consumed so that it has a
chance of being cached.

Some implementations may have limits on the size of the update or may
perform poorly if the size is larger than an implementation-defined limit.
Updates should be kept as small and infrequent as possible.

Note that individual queue transfer operations have a high overhead and they
should be batched with other operations in command buffers.
}];

let arguments = (ins
HAL_Device:$device,
HAL_DeviceQueueAffinity:$queue_affinity,
HAL_Fence:$wait_fence,
HAL_Fence:$signal_fence,
Util_BufferType:$source_buffer,
HAL_DeviceSize:$source_offset,
HAL_Buffer:$target_buffer,
HAL_DeviceSize:$target_offset,
HAL_DeviceSize:$length,
I64Attr:$flags
);
let results = (outs);

let assemblyFormat = [{
`<` $device `:` type($device) `>`
`affinity` `(` $queue_affinity `)`
`wait` `(` $wait_fence `)`
`signal` `(` $signal_fence `)`
`source` `(` $source_buffer `:` type($source_buffer) `)`
`` `[` $source_offset `]`
`target` `(` $target_buffer `:` type($target_buffer) `)`
`` `[` $target_offset `]`
`length` `(` $length `)`
`flags` `(` $flags `)`
attr-dict-with-keyword
}];

let hasVerifier = 1;
}

def HAL_DeviceQueueCopyOp : HAL_Op<"device.queue.copy"> {
let summary = [{copies one device-visible buffer to another}];
let description = [{
The source buffer and target buffer must both be visible to the device
queue performing the copy. In most cases the queue affinity should be set to
where the target buffer will be consumed so that it has a chance of being
cached. The source buffer must have transfer-source usage and the target
buffer must have transfer-target usage.

Note that individual queue transfer operations have a high overhead and they
should be batched with other operations in command buffers.
}];

let arguments = (ins
HAL_Device:$device,
HAL_DeviceQueueAffinity:$queue_affinity,
HAL_Fence:$wait_fence,
HAL_Fence:$signal_fence,
HAL_Buffer:$source_buffer,
HAL_DeviceSize:$source_offset,
HAL_Buffer:$target_buffer,
HAL_DeviceSize:$target_offset,
HAL_DeviceSize:$length,
I64Attr:$flags
);
let results = (outs);

let assemblyFormat = [{
`<` $device `:` type($device) `>`
`affinity` `(` $queue_affinity `)`
`wait` `(` $wait_fence `)`
`signal` `(` $signal_fence `)`
`source` `(` $source_buffer `:` type($source_buffer) `)`
`` `[` $source_offset `]`
`target` `(` $target_buffer `:` type($target_buffer) `)`
`` `[` $target_offset `]`
`length` `(` $length `)`
`flags` `(` $flags `)`
attr-dict-with-keyword
}];

let hasVerifier = 1;
}

def HAL_DeviceQueueReadOp : HAL_Op<"device.queue.read"> {
let summary = [{reads a segment from a file into a device buffer}];
let description = [{
Expand Down
Loading

0 comments on commit 9650bfe

Please sign in to comment.