diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx1100.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx1100.mlir index f6e944544ec6..2a6b9c62d6c3 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx1100.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx1100.mlir @@ -50,8 +50,8 @@ func.func @matmul_lowering_WMMA_F32_16x16x16_F16() { // CHECK-DAG: %[[LHS_BINDING:.+]] = hal.interface.binding.subspan {{.+}} binding(0) // CHECK-DAG: %[[RHS_BINDING:.+]] = hal.interface.binding.subspan {{.+}} binding(1) // CHECK-DAG: %[[ACC_BINDING:.+]] = hal.interface.binding.subspan {{.+}} binding(2) -// CHECK-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]{{.+}} -> tensor -// CHECK-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]]{{.+}} -> tensor +// CHECK-DAG: %[[LHS:.+]] = flow.dispatch.tensor.load %[[LHS_BINDING]]{{.+}} -> tensor +// CHECK-DAG: %[[RHS:.+]] = flow.dispatch.tensor.load %[[RHS_BINDING]]{{.+}} -> tensor // CHECK-DAG: %[[ACC:.+]] = flow.dispatch.tensor.load %[[ACC_BINDING]]{{.+}} -> tensor // CHECK: %[[MMA:.+]] = iree_gpu.multi_mma %[[LHS]], %[[RHS]], %[[ACC]] // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]], diff --git a/compiler/src/iree/compiler/Codegen/Common/TileSwizzle.h b/compiler/src/iree/compiler/Codegen/Common/TileSwizzle.h index 738bb6a43e94..82eff5965521 100644 --- a/compiler/src/iree/compiler/Codegen/Common/TileSwizzle.h +++ b/compiler/src/iree/compiler/Codegen/Common/TileSwizzle.h @@ -49,6 +49,10 @@ struct TileSwizzle { // The size of the dimension. int16_t size = 0; + + // Support constructing from any size type. + template + Dim(Kind kind, T size) : kind(kind), size(size) {} }; using ExpandShapeDimVectorType = llvm::SmallVector; diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/GPUTileSwizzleUtils.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/GPUTileSwizzleUtils.cpp index ae9d5d9b6188..1171b1e88c2f 100644 --- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/GPUTileSwizzleUtils.cpp +++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/GPUTileSwizzleUtils.cpp @@ -3,85 +3,77 @@ // Licensed under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - #include "iree/compiler/Codegen/Dialect/GPU/IR/GPUTileSwizzleUtils.h" - namespace mlir::iree_compiler { -// Given an `expandShape` vector-of-vectors describing the mapping from source -// dimensions to expanded dimensions, returns the index of the first expanded -// dimension corresponding to the given source dimension index. -static int64_t -getExpandedDimFirstIdx(const TileSwizzle::ExpandShapeType &expandShape, - int64_t srcIndex) { - int dstIndexFirst = 0; - for (int i = 0; i < srcIndex; ++i) { - dstIndexFirst += expandShape[i].size(); +using Kind = TileSwizzle::Dim::Kind; + +// Returns the index of the first destination dimension corresponding to the +// given source dimension `srcIdx`. +static int64_t expandedDimIdx(const TileSwizzle::ExpandShapeType &expandShape, + int srcIdx) { + int dstIdx = 0; + for (int i = 0; i < srcIdx; ++i) { + dstIdx += expandShape[i].size(); } - return dstIndexFirst; + return dstIdx; } -void unroll(TileSwizzle &swizzle, int srcIndex, int unrollFactor, - TileSwizzle::Dim::Kind kind) { - assert(unrollFactor > 1); - int dstIndexFirst = getExpandedDimFirstIdx(swizzle.expandShape, srcIndex); - TileSwizzle::Dim unrollDim; - unrollDim.size = unrollFactor; - unrollDim.kind = kind; +// Pushes `dim` to the front of `swizzle.expandShape[srcIdx]`, and updates +// `swizzle.permutation` to make the new dimension outer-most among the dims in +// `swizzle.expandShape[srcIdx]`. +// +// This can be used to unroll a kernel with kind = CrossIntrinsic, +// or to expand a kernel to multiple subgroups with kind = CrossThread. +// +// Example: +// Input swizzle = { expandShape = [[16], [4]], permutation = [1, 0] } +// Input srcIdx = 1 +// Input dim.size = 4 +// -> Output swizzle = { expandShape = [[16], [4, 4]], permutation = [1, 2, 0] } +// +static void expand(TileSwizzle &swizzle, int srcIdx, TileSwizzle::Dim dim) { + int dstIdx = expandedDimIdx(swizzle.expandShape, srcIdx); // The new unrolling dimension is inserted at the start of the expandShape - // dimensions group corresponding to srcIndex. - swizzle.expandShape[srcIndex].insert(swizzle.expandShape[srcIndex].begin(), - unrollDim); + // dimensions group corresponding to srcIdx. + swizzle.expandShape[srcIdx].insert(swizzle.expandShape[srcIdx].begin(), dim); // Since we are not interleaving here, generating side-by-side copies of the // original layout, the new unrolling dimension is the new outermost // dimension. Existing entries get shifted to make room for it. for (auto &p : swizzle.permutation) { - p += (p >= dstIndexFirst); + p += (p >= dstIdx); } - swizzle.permutation.insert(swizzle.permutation.begin(), dstIndexFirst); + swizzle.permutation.insert(swizzle.permutation.begin(), dstIdx); } -void interleave(TileSwizzle &swizzle, int srcIndex, - int expandedDimIndexToInterleaveAt) { - // Compute which inner dimension to permute the current outer dimension into. - int dstIndexFirst = getExpandedDimFirstIdx(swizzle.expandShape, srcIndex); - int dstIndexToInterleaveAt = dstIndexFirst + expandedDimIndexToInterleaveAt; - +// Interleaves the layout in `swizzle` by mutating `swizzle.permutation` to +// move permutation[0], the outer-most dimension (which the unroll() function +// created to be the unrolling dimension), to the inner dimension given by +// `expandedIdx`. +// +// Example: +// Input swizzle = { expandShape = [[16], [4, 4]], permutation = [1, 2, 0] } +// Input srcIdx = 1 +// Input expandedIdx = 1 +// -> Output swizzle = { expandShape = [[16], [4, 4]], permutation = [2, 0, 1] } +// +static void interleave(TileSwizzle &swizzle, int srcIdx, int expandedIdx) { + int dstIdx = expandedDimIdx(swizzle.expandShape, srcIdx) + expandedIdx; SmallVector outPermutation(swizzle.permutation.size()); // The leading dimension, permutation[0], gets moved inwards to the - // position that we just computed, dstIndexToInterleaveAt. - outPermutation[dstIndexToInterleaveAt] = swizzle.permutation[0]; + // position that we just computed, dstIdx. + outPermutation[dstIdx] = swizzle.permutation[0]; // Outer dimensions get shifted outwards to fill the gap. - for (int i = 0; i < dstIndexToInterleaveAt; ++i) { + for (int i = 0; i < dstIdx; ++i) { outPermutation[i] = swizzle.permutation[i + 1]; } - // Inner dimensions don't change. That is to say that we only interleave - // at `targetInterleavedElements` granularity, we don't swizzle further - // internally to that. - for (int i = dstIndexToInterleaveAt + 1; i < outPermutation.size(); ++i) { + // Inner dimensions don't change. + for (int i = dstIdx + 1; i < outPermutation.size(); ++i) { outPermutation[i] = swizzle.permutation[i]; } swizzle.permutation = outPermutation; } -// Returns the permutation of indices that sorts `v` with the given comparator. -template