Skip to content

Commit

Permalink
[tuner]:Add a utility function to query supported MMA intrinsics
Browse files Browse the repository at this point in the history
Signed-off-by: Bangtian Liu <[email protected]>
  • Loading branch information
bangtianliu committed Nov 12, 2024
1 parent fa6aa1c commit 8d86018
Show file tree
Hide file tree
Showing 9 changed files with 146 additions and 0 deletions.
1 change: 1 addition & 0 deletions compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ iree_compiler_cc_library(
"ROCDLKernelConfig.cpp",
"ROCDLLowerExecutableTarget.cpp",
"ROCDLSelectLoweringStrategy.cpp",
"TestLLVMGPUQueryMMAPass.cpp",
"Verifiers.cpp",
],
hdrs = [
Expand Down
1 change: 1 addition & 0 deletions compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ iree_cc_library(
"ROCDLKernelConfig.cpp"
"ROCDLLowerExecutableTarget.cpp"
"ROCDLSelectLoweringStrategy.cpp"
"TestLLVMGPUQueryMMAPass.cpp"
"Verifiers.cpp"
DEPS
::PassHeaders
Expand Down
5 changes: 5 additions & 0 deletions compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,9 @@ def TestLLVMGPUScalarizeMathOpPass :
let summary = "Test pass for several legalization patterns.";
}

def TestLLVMGPUQueryMMAPass :
Pass<"iree-test-llvmgpu-query-mma", "ModuleOp"> {
let summary = "Test pass for querying the supported mma intrinsic instructions.";
}

#endif // IREE_CODEGEN_LLVMGPU_PASSES
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright 2024 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "iree/compiler/Codegen/LLVMGPU/Passes.h"
#include "iree/compiler/Codegen/Utils/GPUUtils.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"

#include "llvm/Support/Debug.h"

#define DEBUG_TYPE "iree-test-llvmgpu-query-mma"

namespace mlir::iree_compiler {

#define GEN_PASS_DEF_TESTLLVMGPUQUERYMMAPASS
#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc"

static void printMMAVector(SmallVector<IREE::GPU::MMAAttr> &mmaAttrs,
const std::string &extraMessage = {}) {
llvm::outs() << "Printing MMA Collection" << extraMessage
<< ", size: " << mmaAttrs.size() << "\n";
for (const auto &mma : mmaAttrs) {
llvm::outs() << mma << " ";
}
llvm::outs() << "\n";
}

namespace {

struct TestLLVMGPUQueryMMAPass final
: impl::TestLLVMGPUQueryMMAPassBase<TestLLVMGPUQueryMMAPass> {
void runOnOperation() override {
ModuleOp moduleOp = getOperation();
SmallVector<IREE::GPU::MMAAttr> mmaCollecton;
// Print mma vector before collection.
printMMAVector(mmaCollecton,
" Before querying supported mma instrinsic instructions");
// Collect mma intrinsic instructions.
QueryMMAIntrinsics(moduleOp, mmaCollecton);
// Print mma vector after collection.
printMMAVector(mmaCollecton,
" After querying supported mma instrinsic instructions");
}
};
} // namespace
} // namespace mlir::iree_compiler
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ iree_lit_test_suite(
"promote_matmul_to_fit_mma.mlir",
"tensor_pad.mlir",
"tensorcore_vectorization.mlir",
"test_query_mma.mlir",
"transform_dialect_bufferize.mlir",
"transform_dialect_eliminate_gpu_barriers.mlir",
"transform_dialect_hoist_allocs.mlir",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ iree_lit_test_suite(
"rocdl_pipeline_test.mlir"
"tensor_pad.mlir"
"tensorcore_vectorization.mlir"
"test_query_mma.mlir"
"transform_dialect_bufferize.mlir"
"transform_dialect_eliminate_gpu_barriers.mlir"
"transform_dialect_hoist_allocs.mlir"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// RUN: iree-opt --split-input-file --iree-test-llvmgpu-query-mma %s | FileCheck %s

#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb",
{abi = "hip", iree.gpu.target = #iree_gpu.target<arch = "gfx942", features = "",
wgp = <compute = fp64|fp32|fp16|int64|int32|int16|int8, storage = b64|b32|b16|b8,
subgroup = shuffle|arithmetic, dot = dp4xi8toi32,
mma = [<MFMA_F32_16x16x4_F32>, <MFMA_F32_16x16x16_F16>, <MFMA_F32_32x32x8_F16>,
<MFMA_F32_16x16x16_BF16>, <MFMA_F32_32x32x8_BF16>, <MFMA_F32_16x16x32_F8E4M3FNUZ>,
<MFMA_F32_16x16x32_F8E5M2FNUZ>, <MFMA_I32_16x16x32_I8>, <MFMA_I32_32x32x16_I8>],
subgroup_size_choices = [64], max_workgroup_sizes = [1024, 1024, 1024],
max_thread_count_per_workgroup = 1024, max_workgroup_memory_bytes = 65536,
max_workgroup_counts = [2147483647, 2147483647, 2147483647], max_load_instruction_bits = 128,
simds_per_wgp = 4, vgpr_space_bits = 16384>>, ukernels = "none", waves_per_eu = 2 : i64}>
#pipeline_layout = #hal.pipeline.layout<bindings = [#hal.pipeline.binding<storage_buffer>]>
module {
hal.executable private @main {
hal.executable.variant public @main target(#executable_target_rocm_hsaco_fb) {
hal.executable.export public @entry_point layout(#pipeline_layout)
builtin.module {
func.func @fn() {
return
}
}
}
}
}

// CHECK: Printing MMA Collection Before querying supported mma instrinsic instructions, size: 0
// CHECK: Printing MMA Collection After querying supported mma instrinsic instructions, size: 9
// CHECK: MFMA_F32_16x16x4_F32
// CHECK-SAME: MFMA_F32_16x16x16_F16
// CHECK-SAME: MFMA_F32_32x32x8_F16
// CHECK-SAME: MFMA_F32_16x16x16_BF16
// CHECK-SAME: MFMA_F32_32x32x8_BF16
// CHECK-SAME: MFMA_F32_16x16x32_F8E4M3FNUZ
// CHECK-SAME: MFMA_F32_16x16x32_F8E5M2FNUZ
// CHECK-SAME: MFMA_I32_16x16x32_I8
// CHECK-SAME: MFMA_I32_32x32x16_I8
// CHECK-LABEL: func.func @fn

// -----

#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb">
#pipeline_layout = #hal.pipeline.layout<bindings = [#hal.pipeline.binding<storage_buffer>]>
module {
hal.executable private @main {
hal.executable.variant public @main target(#executable_target_rocm_hsaco_fb) {
hal.executable.export public @entry_point layout(#pipeline_layout)
builtin.module {
func.func @fn_empty() {
return
}
}
}
}
}

// CHECK: Printing MMA Collection Before querying supported mma instrinsic instructions, size: 0
// CHECK: Printing MMA Collection After querying supported mma instrinsic instructions, size: 0
// CHECK-LABEL: func.func @fn_empty
24 changes: 24 additions & 0 deletions compiler/src/iree/compiler/Codegen/Utils/GPUUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/IR/BuiltinTypes.h"
Expand Down Expand Up @@ -1028,4 +1029,27 @@ std::optional<int> getGPUSubgroupSize(mlir::FunctionOpInterface func) {
return std::nullopt;
}

void QueryMMAIntrinsics(mlir::ModuleOp moduleOp,
SmallVector<IREE::GPU::MMAAttr> &mmaAttrs) {
IREE::GPU::TargetAttr target;

// Walk through all `func::FuncOp` operations in `moduleOp`.
moduleOp.walk([&](func::FuncOp funcOp) {
if (auto attr = getGPUTargetAttr(funcOp)) {
// Store the target attribute if found.
target = attr;
return WalkResult::interrupt();
}
return WalkResult::advance();
});

if (target) {
// Append each MMA attribute from the target's `Wgp` configuration to
// `mmaAttrs`.
for (IREE::GPU::MMAAttr mma : target.getWgp().getMma()) {
mmaAttrs.emplace_back(mma);
}
}
}

} // namespace mlir::iree_compiler
5 changes: 5 additions & 0 deletions compiler/src/iree/compiler/Codegen/Utils/GPUUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,11 @@ IREE::GPU::TargetAttr getGPUTargetAttr(Operation *op);
/// Returns std::nullopt if none found.
std::optional<int> getGPUSubgroupSize(mlir::FunctionOpInterface func);

/// Returns supported MMA intrinsic instructions based on the GPU target
/// description stored in `moduleOp` and populates them in `mmaAttrs`.
void QueryMMAIntrinsics(mlir::ModuleOp moduleOp,
SmallVector<IREE::GPU::MMAAttr> &mmaAttrs);

} // namespace mlir::iree_compiler

#endif // IREE_COMPILER_CODEGEN_UTILS_GPUUTILS_H_

0 comments on commit 8d86018

Please sign in to comment.