From 8549c5438ef7a1173980ec9a962c44780411638f Mon Sep 17 00:00:00 2001 From: mshahid Date: Tue, 4 Feb 2025 06:51:09 -0800 Subject: [PATCH 1/5] Enable vector to amx code generation and execution. Adds support to check and enable amx-bf16 feature using libxsmm platform setup API. Updates default pipeline to enable vector to amx lowering based on target feature. --- include/TPP/Transforms/Utils/VNNIUtils.h | 4 +++- lib/TPP/DefaultPipeline.cpp | 5 ++++- lib/TPP/Transforms/Utils/VNNIUtils.cpp | 6 ++++++ .../tpp-run-amx-feature-initialization.mlir | 19 +++++++++++++++++++ tools/tpp-run/tpp-run.cpp | 3 +++ 5 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 test/Integration/tpp-run-amx-feature-initialization.mlir diff --git a/include/TPP/Transforms/Utils/VNNIUtils.h b/include/TPP/Transforms/Utils/VNNIUtils.h index d5d12a6f6..8978f3cba 100644 --- a/include/TPP/Transforms/Utils/VNNIUtils.h +++ b/include/TPP/Transforms/Utils/VNNIUtils.h @@ -28,7 +28,6 @@ class LinalgOp; namespace vnni { namespace utils { - enum class VnniOperandRank { TRANSPOSE = 3, GEMM = 3, @@ -36,6 +35,9 @@ enum class VnniOperandRank { BRGEMM_OUTS = 3 }; +// Returns True if the current architecture supports AMX instructions. +bool hasAMX(); + // Return the VNNI blocking factor if it can be determined for the given type or // zero, otherwise. // Optionally, an operation can be provided to give access to DLTI. diff --git a/lib/TPP/DefaultPipeline.cpp b/lib/TPP/DefaultPipeline.cpp index b9eefa786..cdf84a79b 100644 --- a/lib/TPP/DefaultPipeline.cpp +++ b/lib/TPP/DefaultPipeline.cpp @@ -22,6 +22,7 @@ #include "TPP/Dialect/Perf/PerfOps.h" #include "TPP/Dialect/Xsmm/XsmmDialect.h" #include "TPP/PassUtils.h" +#include "TPP/Transforms/Utils/VNNIUtils.h" #include "mlir/Transforms/Passes.h" #include @@ -187,7 +188,9 @@ struct DefaultPipeline : public tpp::impl::DefaultPipelineBase, pm.addPass(createPrintIRPass()); // Lower to LLVM - pm.addPass(createConvertVectorToLLVMPass()); + ConvertVectorToLLVMPassOptions options; + options.amx = vnni::utils::hasAMX() ? true : false; + pm.addPass(createConvertVectorToLLVMPass(options)); pm.addPass(createFinalizeMemRefToLLVMConversionPass()); pm.addPass(createConvertSCFToCFPass()); if (defParallel) diff --git a/lib/TPP/Transforms/Utils/VNNIUtils.cpp b/lib/TPP/Transforms/Utils/VNNIUtils.cpp index 87f290e25..1e14002b0 100644 --- a/lib/TPP/Transforms/Utils/VNNIUtils.cpp +++ b/lib/TPP/Transforms/Utils/VNNIUtils.cpp @@ -22,6 +22,12 @@ namespace mlir { namespace vnni { namespace utils { +// Returns True if the current architecture supports AMX instructions. +bool hasAMX() { + return (libxsmm_get_target_archid() >= LIBXSMM_X86_AVX512_SPR) && + (libxsmm_get_target_archid() < LIBXSMM_X86_ALLFEAT); +} + unsigned getVnniBlockingFactor(Type type, Operation *op) { unsigned blockingFactor = 0; diff --git a/test/Integration/tpp-run-amx-feature-initialization.mlir b/test/Integration/tpp-run-amx-feature-initialization.mlir new file mode 100644 index 000000000..b1de9f593 --- /dev/null +++ b/test/Integration/tpp-run-amx-feature-initialization.mlir @@ -0,0 +1,19 @@ +// RUN: not --crash tpp-run %s -e entry -entry-point-result=void -mattr=amx-bf16 2>&1 | FileCheck %s --check-prefix=CHECK-AMX-BF16 +// RUN: not --crash env LIBXSMM_TARGET=spr tpp-run %s -e entry -entry-point-result=void -mattr=amx-bf16 2>&1 | FileCheck %s --check-prefix=CHECK-AMX-BF16-SETUP + +//Tests for unsuccessfull compilation implying AMX pipeline was not initialized +// CHECK-AMX-BF16: error: LLVM Translation failed for operation: builtin.unrealized_conversion_cast + +//Tests for successfull compilation implying AMX pipeline was initialized properly. +// CHECK-AMX-BF16-SETUP-NOT: error: LLVM Translation failed for operation: builtin.unrealized_conversion_cast +func.func @entry(%arg0: memref<16x32xbf16>, + %arg1: memref<16x32xbf16>, + %arg2: memref<16x16xf32>) { + %0 = arith.constant 0 : index + %1 = amx.tile_load %arg0[%0, %0] : memref<16x32xbf16> into !amx.tile<16x32xbf16> + %2 = amx.tile_load %arg1[%0, %0] : memref<16x32xbf16> into !amx.tile<16x32xbf16> + %3 = amx.tile_zero : !amx.tile<16x16xf32> + %4 = amx.tile_mulf %1, %2, %3 : !amx.tile<16x32xbf16>, !amx.tile<16x32xbf16>, !amx.tile<16x16xf32> + amx.tile_store %arg2[%0, %0], %4 : memref<16x16xf32>, !amx.tile<16x16xf32> + return +} \ No newline at end of file diff --git a/tools/tpp-run/tpp-run.cpp b/tools/tpp-run/tpp-run.cpp index 7db7f81c2..6ccf3d909 100644 --- a/tools/tpp-run/tpp-run.cpp +++ b/tools/tpp-run/tpp-run.cpp @@ -23,6 +23,7 @@ #include "llvm/Target/TargetOptions.h" #include "TPP/Transforms/Utils/TensorInit.h" +#include "libxsmm.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Transforms/Passes.h" #include "mlir/Dialect/Func/IR/FuncOps.h" @@ -270,6 +271,8 @@ int main(int argc, char **argv) { if (failed(validateInput())) return 1; + // Initialize the underlying platform + libxsmm_init(); // Initialize the LLVM machinery llvm::InitLLVM y(argc, argv); llvm::InitializeNativeTarget(); From 069d64b81c665e02a3a1b4b4d55bdea8fa35206a Mon Sep 17 00:00:00 2001 From: mshahid Date: Fri, 14 Feb 2025 04:48:59 -0800 Subject: [PATCH 2/5] -Fixes the failure of test by considering the fact that CI machine supports amx-bf16 natively. --- .../tpp-run-amx-feature-initialization.mlir | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/test/Integration/tpp-run-amx-feature-initialization.mlir b/test/Integration/tpp-run-amx-feature-initialization.mlir index b1de9f593..3735cebd4 100644 --- a/test/Integration/tpp-run-amx-feature-initialization.mlir +++ b/test/Integration/tpp-run-amx-feature-initialization.mlir @@ -1,11 +1,14 @@ -// RUN: not --crash tpp-run %s -e entry -entry-point-result=void -mattr=amx-bf16 2>&1 | FileCheck %s --check-prefix=CHECK-AMX-BF16 +// RUN: not --crash env LIBXSMM_TARGET=spr tpp-run %s -e entry -entry-point-result=void -mattr=-amx-bf16 2>&1 | FileCheck %s --check-prefix=CHECK-AMX-BF16 // RUN: not --crash env LIBXSMM_TARGET=spr tpp-run %s -e entry -entry-point-result=void -mattr=amx-bf16 2>&1 | FileCheck %s --check-prefix=CHECK-AMX-BF16-SETUP -//Tests for unsuccessfull compilation implying AMX pipeline was not initialized -// CHECK-AMX-BF16: error: LLVM Translation failed for operation: builtin.unrealized_conversion_cast +//Tests for unsuccessfull compilation in absence of 'amx-bf16' feature +// CHECK-AMX-BF16: LLVM ERROR: Cannot select: intrinsic %llvm.x86.tdpbf16ps.internal + +//Tests for successfull compilation in presence of 'amx-bf16' but fails to run due to unsupported instruction. +// CHECK-AMX-BF16-SETUP-NOT: LLVM ERROR: Cannot select: intrinsic %llvm.x86.tdpbf16ps.internal +// CHECK-AMX-BF16-SETUP: Illegal instruction + -//Tests for successfull compilation implying AMX pipeline was initialized properly. -// CHECK-AMX-BF16-SETUP-NOT: error: LLVM Translation failed for operation: builtin.unrealized_conversion_cast func.func @entry(%arg0: memref<16x32xbf16>, %arg1: memref<16x32xbf16>, %arg2: memref<16x16xf32>) { From de06a60a235a5890ef1d845046dccc1823fd6a1f Mon Sep 17 00:00:00 2001 From: mshahid Date: Mon, 17 Feb 2025 02:58:28 -0800 Subject: [PATCH 3/5] -Simplifies the target feature initialization option. Adds a Todo comment to plugin the target platform intialization using target descriptor. --- lib/TPP/DefaultPipeline.cpp | 2 +- tools/tpp-run/tpp-run.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/TPP/DefaultPipeline.cpp b/lib/TPP/DefaultPipeline.cpp index cdf84a79b..45488eb30 100644 --- a/lib/TPP/DefaultPipeline.cpp +++ b/lib/TPP/DefaultPipeline.cpp @@ -189,7 +189,7 @@ struct DefaultPipeline : public tpp::impl::DefaultPipelineBase, // Lower to LLVM ConvertVectorToLLVMPassOptions options; - options.amx = vnni::utils::hasAMX() ? true : false; + options.amx = vnni::utils::hasAMX(); pm.addPass(createConvertVectorToLLVMPass(options)); pm.addPass(createFinalizeMemRefToLLVMConversionPass()); pm.addPass(createConvertSCFToCFPass()); diff --git a/tools/tpp-run/tpp-run.cpp b/tools/tpp-run/tpp-run.cpp index 6ccf3d909..7a2a7d7f1 100644 --- a/tools/tpp-run/tpp-run.cpp +++ b/tools/tpp-run/tpp-run.cpp @@ -272,6 +272,7 @@ int main(int argc, char **argv) { return 1; // Initialize the underlying platform + // TODO: Move this to use the target information flags libxsmm_init(); // Initialize the LLVM machinery llvm::InitLLVM y(argc, argv); From 5418d6dde06e9fbce86c6947ec5e63d37e3cce4c Mon Sep 17 00:00:00 2001 From: mshahid Date: Mon, 17 Feb 2025 21:47:28 -0800 Subject: [PATCH 4/5] -Adds tpp-opt -default-pipeline compilation test for amx-bf16 feature. -Removes the fragile test from integration. --- .../tpp-run-amx-feature-initialization.mlir | 22 ------------------ .../DefaultPipeline/amx-initialization.mlir | 23 +++++++++++++++++++ 2 files changed, 23 insertions(+), 22 deletions(-) delete mode 100644 test/Integration/tpp-run-amx-feature-initialization.mlir create mode 100644 test/Passes/DefaultPipeline/amx-initialization.mlir diff --git a/test/Integration/tpp-run-amx-feature-initialization.mlir b/test/Integration/tpp-run-amx-feature-initialization.mlir deleted file mode 100644 index 3735cebd4..000000000 --- a/test/Integration/tpp-run-amx-feature-initialization.mlir +++ /dev/null @@ -1,22 +0,0 @@ -// RUN: not --crash env LIBXSMM_TARGET=spr tpp-run %s -e entry -entry-point-result=void -mattr=-amx-bf16 2>&1 | FileCheck %s --check-prefix=CHECK-AMX-BF16 -// RUN: not --crash env LIBXSMM_TARGET=spr tpp-run %s -e entry -entry-point-result=void -mattr=amx-bf16 2>&1 | FileCheck %s --check-prefix=CHECK-AMX-BF16-SETUP - -//Tests for unsuccessfull compilation in absence of 'amx-bf16' feature -// CHECK-AMX-BF16: LLVM ERROR: Cannot select: intrinsic %llvm.x86.tdpbf16ps.internal - -//Tests for successfull compilation in presence of 'amx-bf16' but fails to run due to unsupported instruction. -// CHECK-AMX-BF16-SETUP-NOT: LLVM ERROR: Cannot select: intrinsic %llvm.x86.tdpbf16ps.internal -// CHECK-AMX-BF16-SETUP: Illegal instruction - - -func.func @entry(%arg0: memref<16x32xbf16>, - %arg1: memref<16x32xbf16>, - %arg2: memref<16x16xf32>) { - %0 = arith.constant 0 : index - %1 = amx.tile_load %arg0[%0, %0] : memref<16x32xbf16> into !amx.tile<16x32xbf16> - %2 = amx.tile_load %arg1[%0, %0] : memref<16x32xbf16> into !amx.tile<16x32xbf16> - %3 = amx.tile_zero : !amx.tile<16x16xf32> - %4 = amx.tile_mulf %1, %2, %3 : !amx.tile<16x32xbf16>, !amx.tile<16x32xbf16>, !amx.tile<16x16xf32> - amx.tile_store %arg2[%0, %0], %4 : memref<16x16xf32>, !amx.tile<16x16xf32> - return -} \ No newline at end of file diff --git a/test/Passes/DefaultPipeline/amx-initialization.mlir b/test/Passes/DefaultPipeline/amx-initialization.mlir new file mode 100644 index 000000000..1ecad3853 --- /dev/null +++ b/test/Passes/DefaultPipeline/amx-initialization.mlir @@ -0,0 +1,23 @@ +// RUN: tpp-opt --default-pipeline %s | FileCheck %s +// RUN: LIBXSMM_TARGET=spr tpp-opt --default-pipeline %s | FileCheck %s --check-prefix=CHECK-AMX-BF16 + +// CHECK-LABEL: llvm.func @entry +// CHECK: builtin.unrealized_conversion_cast +// CHECK: amx.tile_mulf +// CHECK-NOT: amx.tilestored64 + +// CHECK-AMX-BF16-LABEL: llvm.func @entry +// CHECK-AMX-BF16: amx.tileloadd64 +// CHECK-AMX-BF16: amx.tdpbf16ps +// CHECK-AMX-BF16: amx.tilestored64 +func.func @entry(%arg0: memref<16x32xbf16>, + %arg1: memref<16x32xbf16>, + %arg2: memref<16x16xf32>) { + %0 = arith.constant 0 : index + %1 = amx.tile_load %arg0[%0, %0] : memref<16x32xbf16> into !amx.tile<16x32xbf16> + %2 = amx.tile_load %arg1[%0, %0] : memref<16x32xbf16> into !amx.tile<16x32xbf16> + %3 = amx.tile_zero : !amx.tile<16x16xf32> + %4 = amx.tile_mulf %1, %2, %3 : !amx.tile<16x32xbf16>, !amx.tile<16x32xbf16>, !amx.tile<16x16xf32> + amx.tile_store %arg2[%0, %0], %4 : memref<16x16xf32>, !amx.tile<16x16xf32> + return +} From 0d279ef41a42ace1ec0de6ef4298ca5782ad8bc0 Mon Sep 17 00:00:00 2001 From: mshahid Date: Mon, 17 Feb 2025 22:30:47 -0800 Subject: [PATCH 5/5] -Removes negative test checks as it is host dependent --- test/Passes/DefaultPipeline/amx-initialization.mlir | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/test/Passes/DefaultPipeline/amx-initialization.mlir b/test/Passes/DefaultPipeline/amx-initialization.mlir index 1ecad3853..29e8349c5 100644 --- a/test/Passes/DefaultPipeline/amx-initialization.mlir +++ b/test/Passes/DefaultPipeline/amx-initialization.mlir @@ -1,10 +1,6 @@ -// RUN: tpp-opt --default-pipeline %s | FileCheck %s + // RUN: LIBXSMM_TARGET=spr tpp-opt --default-pipeline %s | FileCheck %s --check-prefix=CHECK-AMX-BF16 -// CHECK-LABEL: llvm.func @entry -// CHECK: builtin.unrealized_conversion_cast -// CHECK: amx.tile_mulf -// CHECK-NOT: amx.tilestored64 // CHECK-AMX-BF16-LABEL: llvm.func @entry // CHECK-AMX-BF16: amx.tileloadd64