From 819a5aecac03755dcd742f0f86d66ef6ddd6354e Mon Sep 17 00:00:00 2001 From: Krzysztof Drewniak Date: Sat, 9 Nov 2024 00:12:45 +0000 Subject: [PATCH] Update text test, going to need to a pattern for exact cancellations --- .../GPU/test/gpu_distribute_forall.mlir | 4 +-- .../test/gpu_distribute_shared_memory.mlir | 31 ++++++++----------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_forall.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_forall.mlir index 12a8f6e3198f8..5ce8649db6fb5 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_forall.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_forall.mlir @@ -95,11 +95,11 @@ func.func @distribute_thread_forall_single_thread(%out : memref) // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[TX:.+]] = gpu.thread_id x // CHECK-DAG: %[[TY:.+]] = gpu.thread_id y -// CHECK: %[[LINID:.+]] = affine.linearize_index disjoint [%[[TY]], %[[TX]]] by (2, 64) +// CHECK: %[[TFLAT:.+]] = affine.linearize_index disjoint [%[[TY]], %[[TX]]] by (2, 64) // CHECK-NOT: scf.for // CHECK: %[[TIDGUARD:.+]] = arith.cmpi slt, %[[TFLAT]], %[[C1]] // CHECK: scf.if %[[TIDGUARD]] { -// CHECK: memref.store {{.*}}[%[[LINID]]] +// CHECK: memref.store {{.*}}[%[[TFLAT]]] // ----- diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_shared_memory.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_shared_memory.mlir index 49150024fe605..3c0ec578797a8 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_shared_memory.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_shared_memory.mlir @@ -49,12 +49,9 @@ module { } } -// CHECK-DAG: #[[$MAP0:.*]] = affine_map<()[s0, s1, s2] -> (s1 * 8 + s2 * 32 + s0 floordiv 4)> -// CHECK-DAG: #[[$MAP1:.*]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 4) * 16)> -// CHECK-DAG: #[[$MAP2:.*]] = affine_map<()[s0, s1, s2] -> (s1 * 8 + s2 * 32 + s0 floordiv 4 + 32)> -// CHECK-DAG: #[[$MAP3:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 128)> -// CHECK-DAG: #[[$MAP4:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 128 + 128)> -// CHECK-DAG: #[[$MAP5:.*]] = affine_map<()[s0, s1, s2] -> (s0 * 4 + s1 * 128 + s2 * 512)> +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<()[s0] -> (s0 * 4)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<()[s0] -> (s0 + 32)> +// CHECK-DAG: #[[$MAP2:.*]] = affine_map<()[s0] -> (s0 + 128)> // CHECK-LABEL: @shared_mem_cpy( // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index @@ -62,24 +59,22 @@ module { // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[TX:.*]] = gpu.thread_id x // CHECK-DAG: %[[TY:.*]] = gpu.thread_id y -// CHECK-DAG: %[[TZ:.*]] = gpu.thread_id z - -// CHECK-DAG: %[[Y0:.*]] = affine.apply #[[$MAP0]]()[%[[TX]], %[[TY]], %[[TZ]]] -// CHECK-DAG: %[[X0:.*]] = affine.apply #[[$MAP1]]()[%[[TX]]] -// CHECK: %[[R0:.*]] = vector.transfer_read %{{.*}}[%[[Y0]], %[[X0]]], %{{.*}} {in_bounds = [true, true]} : memref<64x16xf32>, vector<1x4xf32> -// CHECK: vector.transfer_write %[[R0]], %{{.*}}[%[[Y0]], %[[X0]]] {in_bounds = [true, true]} : vector<1x4xf32>, memref<64x16xf32, 3> -// CHECK-DAG: %[[Y1:.*]] = affine.apply #[[$MAP2]]()[%[[TX]], %[[TY]], %[[TZ]]] +// CHECK: %[[TFLAT:.*]] = affine.linearize_index disjoint [%[[TY]], %[[TX]]] by (4, 32) +// CHECK: %[[YX:.*]]:2 = affine.delinearize_index %[[TFLAT]] into (32, 4) +// CHECK: %[[X0:.*]] = affine.apply #[[$MAP0]]()[%[[YX]]#1] +// CHECK: %[[R0:.*]] = vector.transfer_read %{{.*}}[%[[YX]]#0, %[[X0]]], %{{.*}} {in_bounds = [true, true]} : memref<64x16xf32>, vector<1x4xf32> +// CHECK: vector.transfer_write %[[R0]], %{{.*}}[%[[YX]]#0, %[[X0]]] {in_bounds = [true, true]} : vector<1x4xf32>, memref<64x16xf32, 3> +// CHECK-DAG: %[[Y1:.*]] = affine.apply #[[$MAP1]]()[%[[YX]]#0] // CHECK: %[[R1:.*]] = vector.transfer_read %{{.*}}[%[[Y1]], %[[X0]]], %{{.*}} {in_bounds = [true, true]} : memref<64x16xf32>, vector<1x4xf32> // CHECK: vector.transfer_write %[[R1]], %{{.*}}[%[[Y1]], %[[X0]]] {in_bounds = [true, true]} : vector<1x4xf32>, memref<64x16xf32, 3> -// CHECK: %[[Y1:.*]] = affine.apply #[[$MAP3]]()[%[[TX]], %[[TY]], %[[TZ]]] -// CHECK: %[[R2:.*]] = vector.transfer_read %{{.*}}[%[[Y1]], %[[C0]]], %{{.*}} {in_bounds = [true, true]} : memref<256x4xf32>, vector<1x4xf32> -// CHECK: vector.transfer_write %[[R2]], %{{.*}}[%[[Y1]], %[[C0]]] {in_bounds = [true, true]} : vector<1x4xf32>, memref<256x4xf32, 3> -// CHECK: %[[Y2:.*]] = affine.apply #[[$MAP4]]()[%[[TX]], %[[TY]], %[[TZ]]] +// CHECK: %[[R2:.*]] = vector.transfer_read %{{.*}}[%[[TFLAT]], %[[C0]]], %{{.*}} {in_bounds = [true, true]} : memref<256x4xf32>, vector<1x4xf32> +// CHECK: vector.transfer_write %[[R2]], %{{.*}}[%[[TFLAT]], %[[C0]]] {in_bounds = [true, true]} : vector<1x4xf32>, memref<256x4xf32, 3> +// CHECK: %[[Y2:.*]] = affine.apply #[[$MAP2]]()[%[[TFLAT]]] // CHECK: %[[R3:.*]] = vector.transfer_read %{{.*}}[%[[Y2]], %[[C0]]], %{{.*}} {in_bounds = [true, true]} : memref<256x4xf32>, vector<1x4xf32> // CHECK: vector.transfer_write %[[R3]], %{{.*}}[%[[Y2]], %[[C0]]] {in_bounds = [true, true]} : vector<1x4xf32>, memref<256x4xf32, 3> -// CHECK: %[[X1:.*]] = affine.apply #[[$MAP5]]()[%[[TX]], %[[TY]], %[[TZ]]] +// CHECK: %[[X1:.*]] = affine.apply #[[$MAP0]]()[%[[TFLAT]]] // CHECK: %[[R4:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[X1]]], %{{.*}} {in_bounds = [true, true]} : memref<3x512xf32>, vector<1x4xf32> // CHECK: vector.transfer_write %[[R4]], %{{.*}}[%[[C0]], %[[X1]]] {in_bounds = [true, true]} : vector<1x4xf32>, memref<3x512xf32, 3> // CHECK: %[[R5:.*]] = vector.transfer_read %{{.*}}[%[[C1]], %[[X1]]], %{{.*}} {in_bounds = [true, true]} : memref<3x512xf32>, vector<1x4xf32>