From 3c11b025cf2d981743b7b00900b2ac5888568a62 Mon Sep 17 00:00:00 2001 From: Quinn Dawkins Date: Fri, 14 Feb 2025 11:34:46 -0500 Subject: [PATCH] Changes necessary for https://github.com/llvm/llvm-project/pull/123902 --- .../materialize_homogeneous_encodings.mlir | 4 +- .../Codegen/Common/CPU/CPULowerToUKernels.cpp | 8 +- .../Codegen/Common/CPU/CPUPrepareUkernels.cpp | 24 ++--- .../Common/CPU/test/lower_to_ukernel_ops.mlir | 24 ++--- .../Common/CPU/test/prepare_ukernels.mlir | 20 ++-- .../ConvertToDestinationPassingStylePass.cpp | 6 +- .../Codegen/Common/DecomposePackUnPackOps.cpp | 40 ++++---- .../compiler/Codegen/Common/EncodingUtils.cpp | 2 +- .../GPU/test/gpu_fuse_and_hoist_forall.mlir | 4 +- .../GPU/test/gpu_pack_to_instrinsics.mlir | 6 +- .../Codegen/Common/GenericVectorization.cpp | 4 +- .../Codegen/Common/MaterializeEncoding.cpp | 3 +- .../Common/MaterializeEncodingPatterns.cpp | 10 +- .../Codegen/Common/TileInferenceUtils.cpp | 18 ++-- .../convert_to_destination_passing_style.mlir | 12 +-- .../decompose_boundary_pack_unpack_ops.mlir | 48 +++++----- .../test/decompose_pack_unpack_ops.mlir | 28 +++--- .../Common/test/generic_vectorization.mlir | 18 ++-- .../test/gpu_materialize_encoding_gfx942.mlir | 24 ++--- .../test/iree_comprehensive_bufferize.mlir | 6 +- .../test/llvmcpu_materialize_encoding.mlir | 94 +++++++++---------- .../tile_and_distribute_to_workgroups.mlir | 26 ++--- .../test/vmvx_materialize_encoding.mlir | 4 +- .../Dialect/Codegen/IR/IREECodegenTypes.h | 2 +- .../Interfaces/BufferizationInterfaces.cpp | 17 ++-- .../PartitionableLoopsInterface.cpp | 8 +- .../Codegen/LLVMCPU/KernelDispatch.cpp | 28 +++--- .../compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp | 4 +- .../compiler/Codegen/LLVMCPU/test/peel.mlir | 6 +- .../test/pipeline_pack_unpack_tests.mlir | 6 +- .../Codegen/LLVMCPU/test/pipeline_tests.mlir | 2 +- .../select_aarch64_lowering_strategy.mlir | 12 +-- .../test/select_x86_64_lowering_strategy.mlir | 42 ++++----- .../tile-root-fuse-consumer-producer.mlir | 4 +- .../compiler/Codegen/LLVMGPU/KernelConfig.cpp | 12 +-- .../Codegen/LLVMGPU/LLVMGPUTensorPad.cpp | 12 +-- .../iree/compiler/Codegen/LLVMGPU/Passes.cpp | 4 +- .../test/ROCDL/config_tile_and_fuse.mlir | 10 +- .../LLVMGPU/test/gpu_set_num_workgroups.mlir | 4 +- .../LLVMGPU/test/pack_pipeline_test.mlir | 2 +- .../Codegen/LLVMGPU/test/tensor_pad.mlir | 4 +- .../iree/compiler/Codegen/Utils/CPUUtils.cpp | 4 +- .../src/iree/compiler/Codegen/Utils/Utils.cpp | 8 +- .../src/iree/compiler/Codegen/Utils/Utils.h | 8 +- .../VMVX/test/select_lowering_strategy.mlir | 6 +- .../Flow/Transforms/AnnotateDispatches.cpp | 12 +-- .../Transforms/InitializeEmptyTensors.cpp | 2 +- .../Transforms/test/annotate_dispatches.mlir | 8 +- .../DispatchCreation/FormDispatchRegions.cpp | 20 ++-- .../DispatchCreation/FormScalarDispatches.cpp | 2 +- .../DispatchCreation/FusionPreprocessing.cpp | 2 +- .../DispatchCreation/SinkReshapes.cpp | 2 +- .../TensorPadToTensorInsertSlice.cpp | 2 +- .../test/collapse_dimensions.mlir | 6 +- ...spatch_region_formation_preprocessing.mlir | 4 +- .../test/form_dispatch_regions.mlir | 24 ++--- .../ExternalInterfaces/UtilExternalModels.cpp | 8 +- .../DataLayoutPropagation.cpp | 4 +- .../GlobalLoopInvariantCodeMotion.cpp | 3 +- .../GlobalOptimization/SimplifyPackUnpack.cpp | 4 +- .../test/data_layout_propagation.mlir | 8 +- .../global_loop_invariant_code_motion.mlir | 32 +++---- .../test/hoist_into_globals.mlir | 2 +- .../Common/ConvertConvToChannelsLast.cpp | 36 +++---- .../Common/test/conv_to_channels_last.mlir | 14 +-- .../docs/community/blog/posts/microkernels.md | 8 +- ...ranspose_1x9_into_2x4x8x4_issue_12546.mlir | 2 +- tests/e2e/tensor_ops/pack.mlir | 20 ++-- .../tensor_ops/pack_dynamic_inner_tiles.mlir | 16 ++-- tests/e2e/tensor_ops/pack_i8.mlir | 8 +- tests/e2e/tensor_ops/unpack.mlir | 34 +++---- 71 files changed, 463 insertions(+), 458 deletions(-) diff --git a/compiler/plugins/target/LLVMCPU/test/materialize_homogeneous_encodings.mlir b/compiler/plugins/target/LLVMCPU/test/materialize_homogeneous_encodings.mlir index 535ecd75b501..c7bd654921de 100644 --- a/compiler/plugins/target/LLVMCPU/test/materialize_homogeneous_encodings.mlir +++ b/compiler/plugins/target/LLVMCPU/test/materialize_homogeneous_encodings.mlir @@ -19,5 +19,5 @@ module attributes {hal.device.targets = [#device_target_llvm_cpu]} { } } // CHECK-LABEL: util.func public @lhs_encoding -// CHECK: tensor.pack -// CHECK: tensor.unpack +// CHECK: linalg.pack +// CHECK: linalg.unpack diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/CPULowerToUKernels.cpp b/compiler/src/iree/compiler/Codegen/Common/CPU/CPULowerToUKernels.cpp index 8485d70edecd..889dbe64de5b 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/CPULowerToUKernels.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/CPULowerToUKernels.cpp @@ -263,7 +263,7 @@ matchDAGForUKernel(RewriterBase &rewriter, linalg::Mmt4DOp op, } static FailureOr -matchDAGForUKernel(RewriterBase &rewriter, tensor::PackOp op, +matchDAGForUKernel(RewriterBase &rewriter, linalg::PackOp op, bool /*skipIntermediateRoundings*/) { auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(op); const char ukernelName[] = "pack"; @@ -386,7 +386,7 @@ matchDAGForUKernel(RewriterBase &rewriter, tensor::PackOp op, } static FailureOr -matchDAGForUKernel(RewriterBase &rewriter, tensor::UnPackOp op, +matchDAGForUKernel(RewriterBase &rewriter, linalg::UnPackOp op, bool /*skipIntermediateRoundings*/) { auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(op); const char ukernelName[] = "unpack"; @@ -616,8 +616,8 @@ void CPULowerToUKernelsPass::runOnOperation() { // these ops. auto allTargets = [](auto target) { return true; }; patterns.insert, - LowerToUKernelPattern, - LowerToUKernelPattern>( + LowerToUKernelPattern, + LowerToUKernelPattern>( context, allTargets, skipIntermediateRoundings); // These patterns are inherently specific to the VMVX backend. patterns.insert>( diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUPrepareUkernels.cpp b/compiler/src/iree/compiler/Codegen/Common/CPU/CPUPrepareUkernels.cpp index 4c0d3e7b7d73..3e547f4ee5c5 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUPrepareUkernels.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/CPUPrepareUkernels.cpp @@ -46,7 +46,7 @@ static void tileBatchDimsForBatchMmt4dOp(RewriterBase &rewriter, static void tileNonPackedDimsFor3DPackOps(RewriterBase &rewriter, FunctionOpInterface funcOp) { - funcOp.walk([&](tensor::PackOp packOp) { + funcOp.walk([&](linalg::PackOp packOp) { if (packOp.getSourceRank() != 3 || packOp.getDestRank() != 5) { return; } @@ -81,7 +81,7 @@ static void tileNonPackedDimsFor3DPackOps(RewriterBase &rewriter, static void tileNonPackedDimsFor5DPUnpackOps(RewriterBase &rewriter, FunctionOpInterface funcOp) { - funcOp.walk([&](tensor::UnPackOp unpackOp) { + funcOp.walk([&](linalg::UnPackOp unpackOp) { if (unpackOp.getSourceRank() != 5 || unpackOp.getDestRank() != 3) { return; } @@ -251,10 +251,10 @@ struct ConvertBatchMmt4DtoMmt4DPattern } }; -struct Convert3DPackto2DPackPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +struct Convert3DPackto2DPackPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(tensor::PackOp packOp, + LogicalResult matchAndRewrite(linalg::PackOp packOp, PatternRewriter &rewriter) const override { if (packOp.getSourceRank() != 3 || packOp.getDestRank() != 5) { return failure(); @@ -309,7 +309,7 @@ struct Convert3DPackto2DPackPattern : public OpRewritePattern { auto reducedDest = tensor::createCanonicalRankReducingExtractSliceOp( rewriter, loc, packOp.getDest(), reducedDestType); - auto newPackOp = rewriter.create( + auto newPackOp = rewriter.create( loc, reducedSrc, reducedDest, newInnerDimsPos, packOp.getMixedTiles(), packOp.getPaddingValue(), newOuterDimsPerm); @@ -321,10 +321,10 @@ struct Convert3DPackto2DPackPattern : public OpRewritePattern { }; struct Convert5DUnPackto4DUnPackPattern - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(tensor::UnPackOp unpackOp, + LogicalResult matchAndRewrite(linalg::UnPackOp unpackOp, PatternRewriter &rewriter) const override { if (unpackOp.getSourceRank() != 5 || unpackOp.getDestRank() != 3) { return failure(); @@ -387,7 +387,7 @@ struct Convert5DUnPackto4DUnPackPattern auto reducedDest = tensor::createCanonicalRankReducingExtractSliceOp( rewriter, loc, unpackOp.getDest(), reducedDestType); - auto newUnpackOp = rewriter.create( + auto newUnpackOp = rewriter.create( loc, reducedSrc, reducedDest, newInnerDimsPos, unpackOp.getMixedTiles(), newOuterDimsPerm); @@ -436,8 +436,8 @@ void CPUPrepareUkernelsPass::runOnOperation() { tensor::InsertSliceOp::getCanonicalizationPatterns(patterns, ctx); tensor::ExtractSliceOp::getCanonicalizationPatterns(patterns, ctx); tensor::EmptyOp::getCanonicalizationPatterns(patterns, ctx); - tensor::PackOp::getCanonicalizationPatterns(patterns, ctx); - tensor::UnPackOp::getCanonicalizationPatterns(patterns, ctx); + linalg::PackOp::getCanonicalizationPatterns(patterns, ctx); + linalg::UnPackOp::getCanonicalizationPatterns(patterns, ctx); tensor::CastOp::getCanonicalizationPatterns(patterns, ctx); tensor::populateFoldTensorEmptyPatterns(patterns); if (failed(applyPatternsGreedily(funcOp, std::move(patterns)))) { diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir index d5d4001bef9b..c58fbdaecfa6 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir @@ -287,7 +287,7 @@ func.func @mmt4d_bf16bf16f32(%arg0 : tensor, %arg1 : tensor, %arg1 : tensor, %arg2 : i8) -> tensor attributes { hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {ukernels = "all", target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> } { - %result = tensor.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1 + %result = linalg.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1 : tensor -> tensor func.return %result : tensor } @@ -315,7 +315,7 @@ func.func @pack_i8i8_x86(%arg0 : tensor, %arg1 : tensor, %ar func.func @pack_i8i8(%arg0 : tensor, %arg1 : tensor, %arg2 : i8) -> tensor attributes { hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}> } { - %result = tensor.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1 + %result = linalg.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1 : tensor -> tensor func.return %result : tensor } @@ -344,7 +344,7 @@ func.func @pack_i8i8(%arg0 : tensor, %arg1 : tensor, %arg2 : func.func @pack_f16f16(%arg0 : tensor, %arg1 : tensor, %arg2 : f16) -> tensor attributes { hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}> } { - %result = tensor.pack %arg0 padding_value(%arg2 : f16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1 + %result = linalg.pack %arg0 padding_value(%arg2 : f16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1 : tensor -> tensor func.return %result : tensor } @@ -373,7 +373,7 @@ func.func @pack_f16f16(%arg0 : tensor, %arg1 : tensor, %ar func.func @pack_bf16bf16(%arg0 : tensor, %arg1 : tensor, %arg2 : bf16) -> tensor attributes { hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}> } { - %result = tensor.pack %arg0 padding_value(%arg2 : bf16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1 + %result = linalg.pack %arg0 padding_value(%arg2 : bf16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1 : tensor -> tensor func.return %result : tensor } @@ -401,7 +401,7 @@ func.func @pack_bf16bf16(%arg0 : tensor, %arg1 : tensor, func.func @pack_i32i32_transpose_inner(%arg0 : tensor, %arg1 : tensor, %arg2 : i32) -> tensor attributes { hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}> } { - %result = tensor.pack %arg0 padding_value(%arg2 : i32) inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1 + %result = linalg.pack %arg0 padding_value(%arg2 : i32) inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1 : tensor -> tensor func.return %result : tensor } @@ -430,19 +430,19 @@ func.func @pack_i32i32_transpose_inner(%arg0 : tensor, %arg1 : tensor, %arg1 : tensor, %arg2 : f32) -> tensor attributes { hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}> } { - %result = tensor.pack %arg0 padding_value(%arg2 : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1 + %result = linalg.pack %arg0 padding_value(%arg2 : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1 : tensor -> tensor func.return %result : tensor } // ----- -// Check that tensor.pack is not lowered to a microkernel by default - it should +// Check that linalg.pack is not lowered to a microkernel by default - it should // only be on VMVX. // CHECK: func @unpack_f16f16_default -// CHECK: tensor.unpack +// CHECK: linalg.unpack func.func @unpack_f16f16_default(%arg0 : tensor, %arg1 : tensor) -> tensor { - %result = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1 + %result = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1 : tensor -> tensor func.return %result : tensor } @@ -468,7 +468,7 @@ func.func @unpack_f16f16_default(%arg0 : tensor, %arg1 : tensor, %arg1 : tensor) -> tensor attributes { hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}> } { - %result = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1 + %result = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1 : tensor -> tensor func.return %result : tensor } @@ -494,7 +494,7 @@ func.func @unpack_f16f16(%arg0 : tensor, %arg1 : tensor) - func.func @unpack_i32i32_transpose_inner(%arg0 : tensor, %arg1 : tensor) -> tensor attributes { hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}> } { - %result = tensor.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1 + %result = linalg.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1 : tensor -> tensor func.return %result : tensor } @@ -520,7 +520,7 @@ func.func @unpack_i32i32_transpose_inner(%arg0 : tensor, %arg1 : te func.func @unpack_f32f32_transpose_inner_and_outer(%arg0 : tensor, %arg1 : tensor) -> tensor attributes { hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}> } { - %result = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1 + %result = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1 : tensor -> tensor func.return %result : tensor } diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/prepare_ukernels.mlir b/compiler/src/iree/compiler/Codegen/Common/CPU/test/prepare_ukernels.mlir index 199596d99833..7596d7b96517 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/prepare_ukernels.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/test/prepare_ukernels.mlir @@ -158,7 +158,7 @@ func.func @pack_without_outer_dims_perm(%arg0: tensor<1x16384x512xbf16>, %arg1: hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {ukernels = "pack", target_triple="x86_64-xyz-xyz", cpu_features=""}> } { %cst = arith.constant 0.000000e+00 : bf16 - %pack = tensor.pack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 2] into %arg1 : tensor<1x16384x512xbf16> -> tensor<1x1024x256x16x2xbf16> + %pack = linalg.pack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 2] into %arg1 : tensor<1x16384x512xbf16> -> tensor<1x1024x256x16x2xbf16> return %pack : tensor<1x1024x256x16x2xbf16> } // CHECK: func.func @pack_without_outer_dims_perm @@ -168,7 +168,7 @@ func.func @pack_without_outer_dims_perm(%arg0: tensor<1x16384x512xbf16>, %arg1: // CHECK-SAME: tensor<1x16384x512xbf16> to tensor<16384x512xbf16> // CHECK: %[[DEST_SLICE:.+]] = tensor.extract_slice %[[DEST]] // CHECK-SAME: tensor<1x1024x256x16x2xbf16> to tensor<1024x256x16x2xbf16> -// CHECK: %[[PACK:.+]] = tensor.pack %[[SRC_SLICE]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[SRC_SLICE]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [16, 2] // CHECK-SAME: into %[[DEST_SLICE]] @@ -178,7 +178,7 @@ func.func @pack_with_outer_dims_perm(%arg0: tensor<484x16x64xbf16>, %arg1: tenso hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {ukernels = "pack", target_triple="x86_64-xyz-xyz", cpu_features=""}> } { %cst = arith.constant 0.000000e+00 : bf16 - %pack = tensor.pack %arg0 padding_value(%cst : bf16) outer_dims_perm = [2, 0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<484x16x64xbf16> -> tensor<64x31x8x16x2xbf16> + %pack = linalg.pack %arg0 padding_value(%cst : bf16) outer_dims_perm = [2, 0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<484x16x64xbf16> -> tensor<64x31x8x16x2xbf16> return %pack : tensor<64x31x8x16x2xbf16> } // CHECK: func.func @pack_with_outer_dims_perm @@ -190,7 +190,7 @@ func.func @pack_with_outer_dims_perm(%arg0: tensor<484x16x64xbf16>, %arg1: tenso // CHECK-SAME: tensor<484x16x64xbf16> to tensor<484x16xbf16> // CHECK: %[[DEST_SLICE:.+]] = tensor.extract_slice %[[ITER]] // CHECK-SAME: tensor<64x31x8x16x2xbf16> to tensor<31x8x16x2xbf16> -// CHECK: %[[PACK:.+]] = tensor.pack %[[SRC_SLICE]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[SRC_SLICE]] // CHECK-SAME: padding_value(%[[PAD_VAL]] : bf16) // CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] // CHECK-SAME: into %[[DEST_SLICE]] @@ -202,11 +202,11 @@ func.func @do_not_decompose_pack(%arg0: tensor<1x16384x512xbf16>, %arg1: tensor< hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {ukernels = "", target_triple="x86_64-xyz-xyz", cpu_features=""}> } { %cst = arith.constant 0.000000e+00 : bf16 - %pack = tensor.pack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 2] into %arg1 : tensor<1x16384x512xbf16> -> tensor<1x1024x256x16x2xbf16> + %pack = linalg.pack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 2] into %arg1 : tensor<1x16384x512xbf16> -> tensor<1x1024x256x16x2xbf16> return %pack : tensor<1x1024x256x16x2xbf16> } // CHECK-LABEL: func.func @do_not_decompose_pack -// CHECK: tensor.pack {{.+}} : tensor<1x16384x512xbf16> -> tensor<1x1024x256x16x2xbf16> +// CHECK: linalg.pack {{.+}} : tensor<1x16384x512xbf16> -> tensor<1x1024x256x16x2xbf16> // ----- @@ -214,7 +214,7 @@ func.func @unpack_without_transpose(%arg0: tensor<1828x8x64x16x16xf32>) -> tenso hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {ukernels = "unpack", target_triple="x86_64-xyz-xyz", cpu_features=""}> } { %6 = tensor.empty() : tensor<1828x128x1024xf32> - %unpack = tensor.unpack %arg0 + %unpack = linalg.unpack %arg0 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 16] @@ -233,7 +233,7 @@ func.func @unpack_without_transpose(%arg0: tensor<1828x8x64x16x16xf32>) -> tenso // CHECK-SAME: : tensor<1828x8x64x16x16xf32> to tensor<8x64x16x16xf32> // CHECK: %[[DEST_SLICE:.*]] = tensor.extract_slice %[[ITER_ARG]][%[[ITER]], 0, 0] [1, 128, 1024] [1, 1, 1] // CHECK-SAME: : tensor<1828x128x1024xf32> to tensor<128x1024xf32> -// CHECK: %[[UNPACK:.*]] = tensor.unpack %[[SRC_SLICE]] +// CHECK: %[[UNPACK:.*]] = linalg.unpack %[[SRC_SLICE]] // CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] // CHECK-SAME: into %[[DEST_SLICE]] : tensor<8x64x16x16xf32> -> tensor<128x1024xf32> // CHECK: %[[NEW_ITER_ARG:.*]] = tensor.insert_slice %[[UNPACK]] into %[[ITER_ARG]][%[[ITER]], 0, 0] [1, 128, 1024] [1, 1, 1] @@ -250,7 +250,7 @@ func.func @unpack_outer_dim_transpose(%arg0: tensor<4x8x29241x16x16xf32>) -> ten } { %cst = arith.constant 0.000000e+00 : bf16 %4 = tensor.empty() : tensor<29241x128x64xf32> - %unpack = tensor.unpack %arg0 outer_dims_perm = [2, 1, 0] inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %4 : tensor<4x8x29241x16x16xf32> -> tensor<29241x128x64xf32> + %unpack = linalg.unpack %arg0 outer_dims_perm = [2, 1, 0] inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %4 : tensor<4x8x29241x16x16xf32> -> tensor<29241x128x64xf32> return %unpack : tensor<29241x128x64xf32> } // CHECK-LABEL: func.func @unpack_outer_dim_transpose( @@ -265,7 +265,7 @@ func.func @unpack_outer_dim_transpose(%arg0: tensor<4x8x29241x16x16xf32>) -> ten // CHECK-SAME: : tensor<4x8x29241x16x16xf32> to tensor<4x8x16x16xf32> // CHECK: %[[DEST_SLICE:.*]] = tensor.extract_slice %[[ITER_ARG]][%[[ITER]], 0, 0] [1, 128, 64] [1, 1, 1] // CHECK-SAME: : tensor<29241x128x64xf32> to tensor<128x64xf32> -// CHECK: %[[UNPACK:.*]] = tensor.unpack %[[SRC_SLICE]] +// CHECK: %[[UNPACK:.*]] = linalg.unpack %[[SRC_SLICE]] // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 16] // CHECK-SAME: into %[[DEST_SLICE]] : tensor<4x8x16x16xf32> -> tensor<128x64xf32> // CHECK: %[[NEW_ITER_ARG:.*]] = tensor.insert_slice %[[UNPACK]] into %[[ITER_ARG]][%[[ITER]], 0, 0] [1, 128, 64] [1, 1, 1] diff --git a/compiler/src/iree/compiler/Codegen/Common/ConvertToDestinationPassingStylePass.cpp b/compiler/src/iree/compiler/Codegen/Common/ConvertToDestinationPassingStylePass.cpp index 4385ed1307dc..26b1d4298c1a 100644 --- a/compiler/src/iree/compiler/Codegen/Common/ConvertToDestinationPassingStylePass.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/ConvertToDestinationPassingStylePass.cpp @@ -463,13 +463,13 @@ static LogicalResult adaptComputeConsumerToAvoidStackAllocation( } /// Replaces a tensor.empty op with bufferization.alloc_tensor op which is -/// created by tiling tensor.unpack op. It is intended because tiling unpack ops +/// created by tiling linalg.unpack op. It is intended because tiling unpack ops /// with non-perfect sizes needs extra elements. See the tiling implementation -/// of tensor.unpack op for more details. +/// of linalg.unpack op for more details. static LogicalResult replaceUnpackEmptyWithAllocTensor(OpBuilder &b, mlir::FunctionOpInterface funcOp) { - funcOp.walk([&](tensor::UnPackOp unpackOp) { + funcOp.walk([&](linalg::UnPackOp unpackOp) { if (!unpackOp->hasOneUse() || !isa(*(unpackOp->user_begin()))) { return; diff --git a/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp b/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp index 7cdd7052e2a4..3e9a8d7c9ac3 100644 --- a/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp @@ -42,16 +42,16 @@ namespace { // Shared rewrite patterns //===----------------------------------------------------------------------===// -/// A wrapper pattern that calls linalg::lowerPack on tensor::PackOp. It lowers -/// a tensor.pack op to tensor.pad + tensor.expand_shape + linalg.transpose ops. -struct LowerPackPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +/// A wrapper pattern that calls linalg::lowerPack on linalg::PackOp. It lowers +/// a linalg.pack op to tensor.pad + tensor.expand_shape + linalg.transpose ops. +struct LowerPackPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; explicit LowerPackPattern(MLIRContext *context, std::optional controlFn) : OpRewritePattern(context), controlFn(controlFn) {} - LogicalResult matchAndRewrite(tensor::PackOp op, + LogicalResult matchAndRewrite(linalg::PackOp op, PatternRewriter &rewriter) const override { if (controlFn && failed(controlFn.value()(op))) { return failure(); @@ -69,17 +69,17 @@ struct LowerPackPattern : public OpRewritePattern { std::optional controlFn; }; -/// A warpper pattern that calls linalg::lowerUnPack on tensor::UnPackOp. It -/// lowers a tensor.unpack op to tensor.empty + linalg.transpose + +/// A warpper pattern that calls linalg::lowerUnPack on linalg::UnPackOp. It +/// lowers a linalg.unpack op to tensor.empty + linalg.transpose + /// tensor.collapse_shape + tensor.extract_slice ops. -struct LowerUnPackPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +struct LowerUnPackPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; explicit LowerUnPackPattern(MLIRContext *context, std::optional controlFn) : OpRewritePattern(context), controlFn(controlFn) {} - LogicalResult matchAndRewrite(tensor::UnPackOp op, + LogicalResult matchAndRewrite(linalg::UnPackOp op, PatternRewriter &rewriter) const override { if (controlFn && failed(controlFn.value()(op))) { return failure(); @@ -132,7 +132,7 @@ static LogicalResult commonRunOnOperation( } } - // TODO(hanchung): Below is a fallback solution for tensor.pack/unpack + // TODO(hanchung): Below is a fallback solution for linalg.pack/unpack // decomposition. They will be retired after lowerPack and lowerUnPack handle // all the cases. @@ -142,7 +142,7 @@ static LogicalResult commonRunOnOperation( auto packOptions = scf::SCFTileAndFuseOptions().setTilingOptions( scf::SCFTilingOptions().setTileSizeComputationFunction( [](OpBuilder &builder, Operation *op) -> SmallVector { - auto packOp = cast(op); + auto packOp = cast(op); // Do nothing if any of inner tile sizes is dynamic. if (llvm::any_of(packOp.getMixedTiles(), llvm::IsaPred)) { @@ -155,7 +155,7 @@ static LogicalResult commonRunOnOperation( return tileSizes; })); { - WalkResult status = funcOp->walk([&](tensor::PackOp op) { + WalkResult status = funcOp->walk([&](linalg::PackOp op) { if (controlFn && failed(controlFn.value()(op))) { return WalkResult::advance(); } @@ -176,7 +176,7 @@ static LogicalResult commonRunOnOperation( auto unpackTilingOptions = scf::SCFTilingOptions().setTileSizeComputationFunction( [](OpBuilder &builder, Operation *op) { - auto unpackOp = cast(op); + auto unpackOp = cast(op); int numLoops = unpackOp.getDestRank(); auto dimAndTileMapping = unpackOp.getDimAndTileMapping(); SmallVector tileSizes; @@ -190,7 +190,7 @@ static LogicalResult commonRunOnOperation( return tileSizes; }); { - WalkResult status = funcOp->walk([&](tensor::UnPackOp op) { + WalkResult status = funcOp->walk([&](linalg::UnPackOp op) { if (controlFn && failed(controlFn.value()(op))) { return WalkResult::advance(); } @@ -299,12 +299,12 @@ static bool hasPadding(Operation *op) { } return false; }; - auto packOp = dyn_cast(op); + auto packOp = dyn_cast(op); if (packOp && needsPad(packOp.getSourceType(), packOp.getInnerDimsPos(), packOp.getStaticInnerTiles())) { return true; } - auto unPackOp = dyn_cast(op); + auto unPackOp = dyn_cast(op); if (unPackOp && needsPad(unPackOp.getDestType(), unPackOp.getInnerDimsPos(), unPackOp.getStaticInnerTiles())) { return true; @@ -320,7 +320,7 @@ static bool hasPadding(Operation *op) { /// 3. If the op is an UnPackOp, then all of its consumers must be dispatch /// tensor stores. static LogicalResult isUnpaddedAndAtBoundary(Operation *op) { - if (!isa(op) && !isa(op)) { + if (!isa(op) && !isa(op)) { return failure(); } if (hasPadding(op)) { @@ -329,13 +329,13 @@ static LogicalResult isUnpaddedAndAtBoundary(Operation *op) { // If the producer is a dispatch tensor load, then the `op` is decomposable // if it is a PackOp. - if (isa(op) && + if (isa(op) && op->getOperand(0).getDefiningOp()) { return success(); } // If all consumers are dispatch tensor stores, then the `op` is decomposable // if it is an UnPackOp. - if (isa(op) && + if (isa(op) && llvm::all_of(op->getUsers(), [&](Operation *user) { return isa(user); })) { diff --git a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.cpp b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.cpp index ee47168b853b..5a17dec7fe05 100644 --- a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.cpp @@ -36,7 +36,7 @@ MaterializeEncodingTypeConverter::MaterializeEncodingTypeConverter( if (IREE::Codegen::isIdentityLayout(encodingInfo)) { return IREE::Encoding::dropEncoding(type); } - auto packedType = cast(tensor::PackOp::inferPackedType( + auto packedType = cast(linalg::PackOp::inferPackedType( type, encodingInfo.innerTileSizes, encodingInfo.innerDimsPos, encodingInfo.outerDimsPerm)); diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_fuse_and_hoist_forall.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_fuse_and_hoist_forall.mlir index 76a30902610f..f0df8398f11e 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_fuse_and_hoist_forall.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_fuse_and_hoist_forall.mlir @@ -527,7 +527,7 @@ func.func @fuse_imperfectly_aligned_unpack(%arg0: tensor<5x31xf16>, %arg1: index %c128 = arith.constant 128 : index %c0 = arith.constant 0 : index %0 = tensor.empty() : tensor<128xf16> - %unpack = tensor.unpack %arg0 inner_dims_pos = [0] inner_tiles = [31] into %0 : tensor<5x31xf16> -> tensor<128xf16> + %unpack = linalg.unpack %arg0 inner_dims_pos = [0] inner_tiles = [31] into %0 : tensor<5x31xf16> -> tensor<128xf16> %1 = scf.forall (%arg2) in (2) shared_outs(%arg3 = %0) -> (tensor<128xf16>) { %2 = affine.apply #map(%arg2) %extracted_slice = tensor.extract_slice %unpack[%2] [64] [1] : tensor<128xf16> to tensor<64xf16> @@ -542,7 +542,7 @@ func.func @fuse_imperfectly_aligned_unpack(%arg0: tensor<5x31xf16>, %arg1: index // CHECK-LABEL: func @fuse_imperfectly_aligned_unpack // CHECK: scf.forall -// CHECK: tensor.unpack +// CHECK: linalg.unpack // CHECK: linalg.copy // CHECK: scf.forall.in_parallel // CHECK: return diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pack_to_instrinsics.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pack_to_instrinsics.mlir index a14cd543091b..a71c2e9f06f1 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pack_to_instrinsics.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pack_to_instrinsics.mlir @@ -12,9 +12,9 @@ module { // CHECK-SAME: %[[A:[A-Za-z0-9]+]]: tensor<64x64xf16> // CHECK-SAME: %[[B:[A-Za-z0-9]+]]: tensor<64x64xf16> // CHECK-SAME: %[[C:[A-Za-z0-9]+]]: tensor<64x64xf32> -// CHECK-DAG: %[[A_PACK:.+]] = tensor.pack %[[A]] inner_dims_pos = [0, 1] inner_tiles = [32, 8] -// CHECK-DAG: %[[B_PACK:.+]] = tensor.pack %[[B]] inner_dims_pos = [1, 0] inner_tiles = [32, 8] -// CHECK-DAG: %[[C_PACK:.+]] = tensor.pack %[[C]] inner_dims_pos = [0, 1] inner_tiles = [32, 32] +// CHECK-DAG: %[[A_PACK:.+]] = linalg.pack %[[A]] inner_dims_pos = [0, 1] inner_tiles = [32, 8] +// CHECK-DAG: %[[B_PACK:.+]] = linalg.pack %[[B]] inner_dims_pos = [1, 0] inner_tiles = [32, 8] +// CHECK-DAG: %[[C_PACK:.+]] = linalg.pack %[[C]] inner_dims_pos = [0, 1] inner_tiles = [32, 32] // CHECK: iree_gpu.multi_mma %[[A_PACK]], %[[B_PACK]], %[[C_PACK]] // CHECK-SAME: indexing_maps = // CHECK-SAME: affine_map<(d0, d1, d2) -> (d0, d2)> diff --git a/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp b/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp index adb1798dc3e7..2caa7d4910e1 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp @@ -56,7 +56,7 @@ getVectorSizes(Operation *op, bool useConfiguredVectorSizes) { scalableFlags = result->vectorScalableFlags; } }) - .Case([&](auto op) { + .Case([&](auto op) { std::optional result = inferSizesFromIR(op); if (result) { vectorSizes = result->vectorSizes; @@ -122,7 +122,7 @@ void GenericVectorizationPass::runOnOperation() { isa(op)) { candidates.push_back(op); } else if (enableVectorMasking && - isa(op)) { + isa(op)) { candidates.push_back(op); } }); diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncoding.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncoding.cpp index 9521a8f37d64..7ac4fdd13032 100644 --- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncoding.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncoding.cpp @@ -18,6 +18,7 @@ #include "iree/compiler/Dialect/HAL/Analysis/DeviceAnalysis.h" #include "iree/compiler/Dialect/HAL/IR/HALTypes.h" #include "iree/compiler/Dialect/Stream/Analysis/Affinity.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/MemRef/Transforms/Transforms.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Interfaces/FunctionInterfaces.h" @@ -107,7 +108,7 @@ materializeFuncOpEncodings(FunctionOpInterface funcOp, { RewritePatternSet patterns(ctx); tensor::CastOp::getCanonicalizationPatterns(patterns, ctx); - tensor::populateFoldIntoPackAndUnpackPatterns(patterns); + linalg::populateFoldIntoPackAndUnpackPatterns(patterns); memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns); if (failed(applyPatternsGreedily(funcOp, std::move(patterns)))) { funcOp.emitOpError("folding patterns failed"); diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp index 8602a23ea767..a6ea6deb6e5a 100644 --- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp @@ -137,13 +137,13 @@ FailureOr lowerSetEncodingOpToPackOp( loc, rewriter.getZeroAttr(resultType.getElementType())); SmallVector sourceDims = tensor::getMixedSizes(rewriter, loc, source); - SmallVector resultDims = tensor::PackOp::getResultShape( + SmallVector resultDims = linalg::PackOp::getResultShape( rewriter, loc, sourceDims, *innerTileSizesOfr, encodingInfo.innerDimsPos, encodingInfo.outerDimsPerm); auto emptyOp = rewriter.create(loc, resultDims, resultType.getElementType()); return rewriter - .create(loc, source, emptyOp, encodingInfo.innerDimsPos, + .create(loc, source, emptyOp, encodingInfo.innerDimsPos, *innerTileSizesOfr, paddingValue, encodingInfo.outerDimsPerm) .getResult(); @@ -176,7 +176,7 @@ FailureOr lowerUnsetEncodingToUnpackOp( encodingOp, "failed to generate runtime tile size query"); } return rewriter - .create(loc, packedValue, emptyOp, + .create(loc, packedValue, emptyOp, encodingInfo.innerDimsPos, *innerTileSizesOfr, encodingInfo.outerDimsPerm) .getResult(); @@ -209,7 +209,7 @@ lowerOpWithEncoding(RewriterBase &rewriter, tensor::EmptyOp emptyOp, SmallVector sourceDims = emptyOp.getMixedSizes(); (void)foldDynamicIndexList(sourceDims); - SmallVector newShape = tensor::PackOp::getResultShape( + SmallVector newShape = linalg::PackOp::getResultShape( rewriter, loc, sourceDims, *innerTileSizesOfr, encodingInfo.innerDimsPos, encodingInfo.outerDimsPerm); newShape = getSwizzledShape(newShape, encodingInfo); @@ -380,7 +380,7 @@ static FailureOr> getPackedDimsForDispatchTensor( return failure(); } SmallVector convertedTargetShape = - tensor::PackOp::getResultShape(builder, loc, targetShape, *innerTileSizes, + linalg::PackOp::getResultShape(builder, loc, targetShape, *innerTileSizes, encodingInfo.innerDimsPos, encodingInfo.outerDimsPerm); return getSwizzledShape(convertedTargetShape, encodingInfo); diff --git a/compiler/src/iree/compiler/Codegen/Common/TileInferenceUtils.cpp b/compiler/src/iree/compiler/Codegen/Common/TileInferenceUtils.cpp index 875466db5aba..c8d211b29403 100644 --- a/compiler/src/iree/compiler/Codegen/Common/TileInferenceUtils.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/TileInferenceUtils.cpp @@ -38,12 +38,12 @@ inferWorkgroupTileMultiplesFromPackUnPack( std::optional> initialPackedMultiples = std::nullopt, std::optional> initialUnPackedMultiples = std::nullopt) { - static_assert(llvm::is_one_of::value); + static_assert(llvm::is_one_of::value); LDBG("Inferring workgroup tile size multiples from " << op->getName() << ":\n" << op); // Initialize the list of multiples for the packed and unpack inputs. - int64_t unPackedRank = (std::is_same::value) + int64_t unPackedRank = (std::is_same::value) ? op.getSourceRank() : op.getDestRank(); SmallVector innerTiles = op.getStaticTiles(); @@ -103,11 +103,11 @@ inferWorkgroupTileMultiplesFromPackUnPack( } SmallVector srcMultiples = - (std::is_same::value) + (std::is_same::value) ? unPackedMultiples : packedMultiples; SmallVector destMultiples = - (std::is_same::value) + (std::is_same::value) ? packedMultiples : unPackedMultiples; LLVM_DEBUG({ @@ -267,14 +267,14 @@ static SmallVector inferResultWorkgroupTileMultiples(OpResult result) { }); return resultMultiples; }) - .Case([&](tensor::PackOp packOp) { + .Case([&](linalg::PackOp packOp) { SmallVector srcMultiples = getOperandMultiples()[0]; return inferWorkgroupTileMultiplesFromPackUnPack( packOp, /*initialPackedMultiples=*/std::nullopt, /*initialUnPackedMultiples=*/srcMultiples) .second; }) - .Case([&](tensor::UnPackOp unPackOp) { + .Case([&](linalg::UnPackOp unPackOp) { SmallVector srcMultiples = getOperandMultiples()[0]; return inferWorkgroupTileMultiplesFromPackUnPack( unPackOp, /*initialPackedMultiples=*/srcMultiples, @@ -340,14 +340,14 @@ static SmallVector inferUseWorkgroupTileMultiples(OpOperand *use) { }); return srcMultiples; }) - .Case([&](tensor::PackOp packOp) { + .Case([&](linalg::PackOp packOp) { SmallVector destMultiples = getResultMultiples()[0]; return inferWorkgroupTileMultiplesFromPackUnPack( packOp, /*initialPackedMultiples=*/destMultiples, /*initialUnPackedMultiples=*/std::nullopt) .first; }) - .Case([&](tensor::UnPackOp unpackOp) { + .Case([&](linalg::UnPackOp unpackOp) { SmallVector destMultiples = getResultMultiples()[0]; return inferWorkgroupTileMultiplesFromPackUnPack( unpackOp, /*initialPackedMultiples=*/std::nullopt, diff --git a/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir b/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir index d4c2018ec24c..791e0fefed5c 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir @@ -795,7 +795,7 @@ func.func @pack() { %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4x4xi32> %3 = tensor.empty() : tensor<2x2x2x2xi32> - %pack = tensor.pack %2 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %3 : tensor<4x4xi32> -> tensor<2x2x2x2xi32> + %pack = linalg.pack %2 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %3 : tensor<4x4xi32> -> tensor<2x2x2x2xi32> flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [2, 2, 2, 2], strides = [1, 1, 1, 1] : tensor<2x2x2x2xi32> -> !flow.dispatch.tensor> return } @@ -804,7 +804,7 @@ func.func @pack() { // CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[IN:.+]] = flow.dispatch.tensor.load %[[IN_BINDING]] // CHECK-DAG: %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]] -// CHECK: tensor.pack %[[IN]] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]] +// CHECK: linalg.pack %[[IN]] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]] // ----- @@ -818,7 +818,7 @@ func.func @unpack() { %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, 2, 2], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x2x2x2xi32> %3 = tensor.empty() : tensor<4x4xi32> - %4 = tensor.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %3 : tensor<2x2x2x2xi32> -> tensor<4x4xi32> + %4 = linalg.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %3 : tensor<2x2x2x2xi32> -> tensor<4x4xi32> flow.dispatch.tensor.store %4, %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : tensor<4x4xi32> -> !flow.dispatch.tensor> return } @@ -827,7 +827,7 @@ func.func @unpack() { // CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK-DAG: %[[IN:.+]] = flow.dispatch.tensor.load %[[IN_BINDING]] // CHECK-DAG: %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]] -// CHECK: tensor.unpack %[[IN]] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]] +// CHECK: linalg.unpack %[[IN]] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]] // ----- @@ -864,7 +864,7 @@ func.func @non_perfect_tiling_unpack() { %16 = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%arg1)[%0#1] %17 = flow.dispatch.tensor.load %3, offsets = [%15, %16, 0, 0], sizes = [%c1, %c1, %0#0, %0#1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%1, %2, %0#0, %0#1} -> tensor %18 = tensor.empty(%0#0, %0#1) : tensor - %19 = tensor.unpack %17 inner_dims_pos = [0, 1] inner_tiles = [%0#0, %0#1] into %18 : tensor -> tensor + %19 = linalg.unpack %17 inner_dims_pos = [0, 1] inner_tiles = [%0#0, %0#1] into %18 : tensor -> tensor %extracted_slice = tensor.extract_slice %19[%13, %14] [1, 1] [1, 1] : tensor to tensor<1x1xi32> %cast = tensor.cast %extracted_slice : tensor<1x1xi32> to tensor flow.dispatch.tensor.store %cast, %4, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [1, 1] : tensor -> !flow.dispatch.tensor> @@ -874,7 +874,7 @@ func.func @non_perfect_tiling_unpack() { } // CHECK-LABEL: func.func @non_perfect_tiling_unpack // CHECK: %[[ALLOC:.+]] = bufferization.alloc_tensor -// CHECK: %[[UNPACK:.+]] = tensor.unpack +// CHECK: %[[UNPACK:.+]] = linalg.unpack // CHECK-SAME: into %[[ALLOC]] // CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[UNPACK]] diff --git a/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir index 6be2a7d96920..ea515a880caf 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir @@ -10,13 +10,13 @@ func.func @pack_at_source() { %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> %src = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16x16xf32> %dest = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x4x4x4xf32> - %pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32> + %pack = linalg.pack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32> %barrier = util.optimization_barrier %pack : tensor<4x4x4x4xf32> flow.dispatch.tensor.store %barrier, %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : tensor<4x4x4x4xf32> -> !flow.dispatch.tensor> return } // CHECK-LABEL: func.func @pack_at_source -// CHECK-NOT: tensor.pack +// CHECK-NOT: linalg.pack // ----- @@ -30,13 +30,13 @@ func.func @unpack_at_source() { %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> %src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x4x4x4xf32> %dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16x16xf32> - %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32> + %unpack = linalg.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32> %barrier = util.optimization_barrier %unpack : tensor<16x16xf32> flow.dispatch.tensor.store %barrier, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor> return } // CHECK-LABEL: func.func @unpack_at_source -// CHECK: tensor.unpack +// CHECK: linalg.unpack // ----- @@ -51,12 +51,12 @@ func.func @pack_at_dest() { %src = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16x16xf32> %dest = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x4x4x4xf32> %barrier = util.optimization_barrier %src : tensor<16x16xf32> - %pack = tensor.pack %barrier inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32> + %pack = linalg.pack %barrier inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32> flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : tensor<4x4x4x4xf32> -> !flow.dispatch.tensor> return } // CHECK-LABEL: func.func @pack_at_dest -// CHECK: tensor.pack +// CHECK: linalg.pack // ----- @@ -71,12 +71,12 @@ func.func @unpack_at_dest() { %src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x4x4x4xf32> %dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16x16xf32> %barrier = util.optimization_barrier %src : tensor<4x4x4x4xf32> - %unpack = tensor.unpack %barrier inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32> + %unpack = linalg.unpack %barrier inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32> flow.dispatch.tensor.store %unpack, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor> return } // CHECK-LABEL: func.func @unpack_at_dest -// CHECK-NOT: tensor.unpack +// CHECK-NOT: linalg.unpack // ----- @@ -91,12 +91,12 @@ func.func @padded_pack() { %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> %src = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [15, 15], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<15x15xf32> %dest = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x4x4x4xf32> - %pack = tensor.pack %src padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<15x15xf32> -> tensor<4x4x4x4xf32> + %pack = linalg.pack %src padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<15x15xf32> -> tensor<4x4x4x4xf32> flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : tensor<4x4x4x4xf32> -> !flow.dispatch.tensor> return } // CHECK-LABEL: func.func @padded_pack -// CHECK: tensor.pack +// CHECK: linalg.pack // ----- @@ -110,12 +110,12 @@ func.func @padded_unpack() { %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> %src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x4x4x4xf32> %dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [15, 15], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<15x15xf32> - %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<15x15xf32> + %unpack = linalg.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<15x15xf32> flow.dispatch.tensor.store %unpack, %1, offsets = [0, 0], sizes = [15, 15], strides = [1, 1] : tensor<15x15xf32> -> !flow.dispatch.tensor> return } // CHECK-LABEL: func.func @padded_unpack -// CHECK: tensor.unpack +// CHECK: linalg.unpack // ----- @@ -134,12 +134,12 @@ func.func @dynamic_pack() { %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%3, %3} %src = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %2], strides = [1, 1] : !flow.dispatch.tensor>{%2, %2} -> tensor %dest = flow.dispatch.tensor.load %5, offsets = [0, 0, 0, 0], sizes = [%3, %3, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%3, %3} -> tensor - %pack = tensor.pack %src padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor -> tensor + %pack = linalg.pack %src padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor -> tensor flow.dispatch.tensor.store %pack, %5, offsets = [0, 0, 0, 0], sizes = [%3, %3, 4, 4], strides = [1, 1, 1, 1] : tensor -> !flow.dispatch.tensor>{%3, %3} return } // CHECK-LABEL: func.func @dynamic_pack -// CHECK: tensor.pack +// CHECK: linalg.pack // ----- @@ -157,12 +157,12 @@ func.func @dynamic_unpack() { %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%3, %3} %src = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0], sizes = [%2, %2, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%2, %2} -> tensor %dest = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%3, %3], strides = [1, 1] : !flow.dispatch.tensor>{%3, %3} -> tensor - %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor -> tensor + %unpack = linalg.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor -> tensor flow.dispatch.tensor.store %unpack, %5, offsets = [0, 0], sizes = [%3, %3], strides = [1, 1] : tensor -> !flow.dispatch.tensor>{%3, %3} return } // CHECK-LABEL: func.func @dynamic_unpack -// CHECK: tensor.unpack +// CHECK: linalg.unpack // ----- @@ -176,12 +176,12 @@ func.func @load_non_full_slice() { %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> %src = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16x16xf32> %dest = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x4x4x4xf32> - %pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32> + %pack = linalg.pack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32> flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : tensor<4x4x4x4xf32> -> !flow.dispatch.tensor> return } // CHECK-LABEL: func.func @load_non_full_slice -// CHECK-NOT: tensor.pack +// CHECK-NOT: linalg.pack // ----- @@ -195,12 +195,12 @@ func.func @store_non_full_slice() { %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor> %src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x4x4x4xf32> %dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16x16xf32> - %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32> + %unpack = linalg.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32> flow.dispatch.tensor.store %unpack, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor> return } // CHECK-LABEL: func.func @store_non_full_slice -// CHECK-NOT: tensor.unpack +// CHECK-NOT: linalg.unpack // ----- @@ -216,13 +216,13 @@ func.func @multi_use_unpack_fold() { %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor> %src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x4x4x4xf32> %dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16x16xf32> - %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32> + %unpack = linalg.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32> flow.dispatch.tensor.store %unpack, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor> flow.dispatch.tensor.store %unpack, %2, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor> return } // CHECK-LABEL: func.func @multi_use_unpack_fold -// CHECK-NOT: tensor.unpack +// CHECK-NOT: linalg.unpack // ----- @@ -239,11 +239,11 @@ func.func @multi_use_unpack_no_fold() { %src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<4x4x4x4xf32> %dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16x16xf32> %dest2 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<16x16xf32> - %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32> + %unpack = linalg.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32> flow.dispatch.tensor.store %unpack, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor> %copy = linalg.copy ins(%unpack : tensor<16x16xf32>) outs(%dest2 : tensor<16x16xf32>) -> tensor<16x16xf32> flow.dispatch.tensor.store %copy, %2, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor> return } // CHECK-LABEL: func.func @multi_use_unpack_no_fold -// CHECK: tensor.unpack +// CHECK: linalg.unpack diff --git a/compiler/src/iree/compiler/Codegen/Common/test/decompose_pack_unpack_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/test/decompose_pack_unpack_ops.mlir index 9f51fe5ad72a..8703fe66ed79 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/decompose_pack_unpack_ops.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/decompose_pack_unpack_ops.mlir @@ -2,7 +2,7 @@ // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-decompose-pack-unpack-ops{use-only-reshapes=true}))" --split-input-file %s | FileCheck %s -check-prefixes=CHECK-ALL,CHECK-RESHAPE func.func @simple_KCRS_to_KCRSsr(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<1x1x1x1x8x32xf32>) -> tensor<1x1x1x1x8x32xf32> { - %0 = tensor.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x32x8xf32> -> tensor<1x1x1x1x8x32xf32> + %0 = linalg.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x32x8xf32> -> tensor<1x1x1x1x8x32xf32> return %0 : tensor<1x1x1x1x8x32xf32> } // CHECK-ALL-LABEL: func.func @simple_KCRS_to_KCRSsr @@ -21,7 +21,7 @@ func.func @simple_KCRS_to_KCRSsr(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<1x1x // ----- func.func @simple_pad_and_pack(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2xf32>, %pad: f32) -> tensor<1x1x8x2xf32> { - %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<5x1xf32> -> tensor<1x1x8x2xf32> + %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<5x1xf32> -> tensor<1x1x8x2xf32> return %0 : tensor<1x1x8x2xf32> } // CHECK-ALL-LABEL: func.func @simple_pad_and_pack @@ -41,7 +41,7 @@ func.func @simple_pad_and_pack(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2x // ----- func.func @simple_NC_to_CNnc(%arg0: tensor<32x8xf32>, %arg1: tensor<1x1x32x8xf32>) -> tensor<1x1x32x8xf32>{ - %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x8xf32> -> tensor<1x1x32x8xf32> + %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x8xf32> -> tensor<1x1x32x8xf32> return %0 : tensor<1x1x32x8xf32> } // CHECK-ALL-LABEL: func.func @simple_NC_to_CNnc @@ -58,7 +58,7 @@ func.func @simple_NC_to_CNnc(%arg0: tensor<32x8xf32>, %arg1: tensor<1x1x32x8xf32 // ----- func.func @KCRS_to_KCRSsr(%arg0: tensor<1x1x128x64xf32>, %arg1: tensor<1x1x4x8x8x32xf32>) -> tensor<1x1x4x8x8x32xf32> { - %0 = tensor.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x128x64xf32> -> tensor<1x1x4x8x8x32xf32> + %0 = linalg.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x128x64xf32> -> tensor<1x1x4x8x8x32xf32> return %0 : tensor<1x1x4x8x8x32xf32> } // CHECK-ALL: func.func @KCRS_to_KCRSsr @@ -74,7 +74,7 @@ func.func @KCRS_to_KCRSsr(%arg0: tensor<1x1x128x64xf32>, %arg1: tensor<1x1x4x8x8 // ----- func.func @pad_and_pack(%arg0: tensor<13x15xf32>, %arg1: tensor<2x8x8x2xf32>, %arg2: f32) -> tensor<2x8x8x2xf32> { - %0 = tensor.pack %arg0 padding_value(%arg2 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<13x15xf32> -> tensor<2x8x8x2xf32> + %0 = linalg.pack %arg0 padding_value(%arg2 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<13x15xf32> -> tensor<2x8x8x2xf32> return %0 : tensor<2x8x8x2xf32> } // CHECK-ALL: func.func @pad_and_pack @@ -94,7 +94,7 @@ func.func @pad_and_pack(%arg0: tensor<13x15xf32>, %arg1: tensor<2x8x8x2xf32>, %a // ----- func.func @KC_to_CKck(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) -> tensor<32x4x32x8xf32> { - %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32> + %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32> return %0 : tensor<32x4x32x8xf32> } // CHECK-ALL: func.func @KC_to_CKck @@ -110,7 +110,7 @@ func.func @KC_to_CKck(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) // ----- func.func @simple_KCRSsr_to_KCRS(%arg0: tensor<1x1x1x1x8x32xf32>, %arg1: tensor<1x1x32x8xf32>) -> tensor<1x1x32x8xf32> { - %0 = tensor.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x1x1x8x32xf32> -> tensor<1x1x32x8xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x1x1x8x32xf32> -> tensor<1x1x32x8xf32> return %0 : tensor<1x1x32x8xf32> } // CHECK-ALL-LABEL: func.func @simple_KCRSsr_to_KCRS @@ -132,7 +132,7 @@ func.func @simple_KCRSsr_to_KCRS(%arg0: tensor<1x1x1x1x8x32xf32>, %arg1: tensor< // ----- func.func @simple_unpack_and_extract_slice(%input: tensor<1x1x8x2xf32>, %output: tensor<5x1xf32>) -> tensor<5x1xf32> { - %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<1x1x8x2xf32> -> tensor<5x1xf32> + %0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<1x1x8x2xf32> -> tensor<5x1xf32> return %0 : tensor<5x1xf32> } // CHECK-ALL-LABEL: func.func @simple_unpack_and_extract_slice @@ -152,7 +152,7 @@ func.func @simple_unpack_and_extract_slice(%input: tensor<1x1x8x2xf32>, %output: // ----- func.func @simple_CNnc_to_NC(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<32x8xf32>) -> tensor<32x8xf32>{ - %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<1x1x32x8xf32> -> tensor<32x8xf32> + %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<1x1x32x8xf32> -> tensor<32x8xf32> return %0 : tensor<32x8xf32> } // CHECK-ALL-LABEL: func.func @simple_CNnc_to_NC @@ -169,7 +169,7 @@ func.func @simple_CNnc_to_NC(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<32x8xf32 // ----- func.func @KCRSsr_to_KCRS(%arg0: tensor<13x12x4x8x8x32xf32>, %arg1: tensor<13x12x128x64xf32>) -> tensor<13x12x128x64xf32> { - %0 = tensor.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<13x12x4x8x8x32xf32> -> tensor<13x12x128x64xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<13x12x4x8x8x32xf32> -> tensor<13x12x128x64xf32> return %0 : tensor<13x12x128x64xf32> } // CHECK-ALL: func.func @KCRSsr_to_KCRS @@ -189,7 +189,7 @@ func.func @KCRSsr_to_KCRS(%arg0: tensor<13x12x4x8x8x32xf32>, %arg1: tensor<13x12 // ----- func.func @unpack_and_extract_slice(%arg0: tensor<2x8x8x2xf32>, %arg1: tensor<13x15xf32>) -> tensor<13x15xf32> { - %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<2x8x8x2xf32> -> tensor<13x15xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<2x8x8x2xf32> -> tensor<13x15xf32> return %0 : tensor<13x15xf32> } // CHECK-ALL: func.func @unpack_and_extract_slice @@ -210,7 +210,7 @@ func.func @unpack_and_extract_slice(%arg0: tensor<2x8x8x2xf32>, %arg1: tensor<13 // ----- func.func @CKck_to_KC(%arg0: tensor<32x4x32x8xf32>, %arg1: tensor<128x256xf32>) -> tensor<128x256xf32> { - %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x4x32x8xf32> -> tensor<128x256xf32> + %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x4x32x8xf32> -> tensor<128x256xf32> return %0 : tensor<128x256xf32> } // CHECK-ALL: func.func @CKck_to_KC @@ -224,7 +224,7 @@ func.func @CKck_to_KC(%arg0: tensor<32x4x32x8xf32>, %arg1: tensor<128x256xf32>) // ----- func.func @pack_matmul_DYN_LHS(%src: tensor, %dest: tensor) -> tensor { - %pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %dest : tensor -> tensor + %pack = linalg.pack %src inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %dest : tensor -> tensor return %pack : tensor } // CHECK-ALL-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> (s0 * 16 - s1)> @@ -248,7 +248,7 @@ func.func @pack_matmul_DYN_LHS(%src: tensor, %dest: tensor, %dest: tensor) -> tensor { - %pack = tensor.pack %src outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %dest : tensor -> tensor + %pack = linalg.pack %src outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %dest : tensor -> tensor return %pack : tensor } // CHECK-ALL-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> (s0 * 16 - s1)> diff --git a/compiler/src/iree/compiler/Codegen/Common/test/generic_vectorization.mlir b/compiler/src/iree/compiler/Codegen/Common/test/generic_vectorization.mlir index 54a06697aa5b..4cdbcc8eb5fa 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/generic_vectorization.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/generic_vectorization.mlir @@ -51,7 +51,7 @@ func.func @single_static_pack_infer_vector_size(%arg0: tensor<101x201xi8>, %arg1 %7 = affine.min #map5(%2, %arg2) %extracted_slice = tensor.extract_slice %arg0[%4, %6] [%5, %7] [1, 1] : tensor<101x201xi8> to tensor %extracted_slice_0 = tensor.extract_slice %arg5[%arg2, %arg4, 0, 0] [%2, %3, 16, 2] [1, 1, 1, 1] : tensor<13x51x16x2xi8> to tensor - %pack = tensor.pack %extracted_slice padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %extracted_slice_0 : tensor -> tensor + %pack = linalg.pack %extracted_slice padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %extracted_slice_0 : tensor -> tensor %inserted_slice = tensor.insert_slice %pack into %arg5[%arg2, %arg4, 0, 0] [%2, %3, 16, 2] [1, 1, 1, 1] : tensor into tensor<13x51x16x2xi8> scf.yield %inserted_slice : tensor<13x51x16x2xi8> } @@ -59,10 +59,10 @@ func.func @single_static_pack_infer_vector_size(%arg0: tensor<101x201xi8>, %arg1 } return %0 : tensor<13x51x16x2xi8> } -// Direct tensor.pack vectorization is only available with masking. +// Direct linalg.pack vectorization is only available with masking. // TODO: Support non-masking path. // CHECK-LABEL: func.func @single_static_pack_infer_vector_size -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK-MASK: #[[$MAP0:.+]] = affine_map<(d0) -> (-d0 + 13, 2)> // CHECK-MASK: #[[$MAP1:.+]] = affine_map<(d0) -> (-d0 + 51, 4)> @@ -117,7 +117,7 @@ func.func @single_dynamic_pack_infer_vector_size(%arg0: tensor, %arg1: t %7 = affine.min #map5(%2, %arg2)[%dim_2] %extracted_slice = tensor.extract_slice %arg0[%4, %6] [%5, %7] [1, 1] : tensor to tensor %extracted_slice_3 = tensor.extract_slice %arg5[%arg2, %arg4, 0, 0] [%2, %3, 16, 2] [1, 1, 1, 1] : tensor to tensor - %pack = tensor.pack %extracted_slice padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %extracted_slice_3 : tensor -> tensor + %pack = linalg.pack %extracted_slice padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %extracted_slice_3 : tensor -> tensor %inserted_slice = tensor.insert_slice %pack into %arg5[%arg2, %arg4, 0, 0] [%2, %3, 16, 2] [1, 1, 1, 1] : tensor into tensor scf.yield %inserted_slice : tensor } @@ -125,10 +125,10 @@ func.func @single_dynamic_pack_infer_vector_size(%arg0: tensor, %arg1: t } return %0 : tensor } -// Direct tensor.pack vectorization is only available with masking. +// Direct linalg.pack vectorization is only available with masking. // TODO: Support non-masking path. // CHECK-LABEL: func.func @single_dynamic_pack_infer_vector_size -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK-MASK: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> // CHECK-MASK: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)> @@ -194,7 +194,7 @@ func.func @generic_pack_infer_vector_size(%arg0: tensor) -> tensor linalg.yield %13 : bf16 } -> tensor<2x?x?xbf16> %extracted_slice_1 = tensor.extract_slice %arg6[%arg1, %arg3, %arg5, 0, 0] [2, %6, %7, 16, 2] [1, 1, 1, 1, 1] : tensor<32x?x64x16x2xbf16> to tensor<2x?x?x16x2xbf16> - %pack = tensor.pack %12 padding_value(%cst : bf16) outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %extracted_slice_1 : tensor<2x?x?xbf16> -> tensor<2x?x?x16x2xbf16> + %pack = linalg.pack %12 padding_value(%cst : bf16) outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %extracted_slice_1 : tensor<2x?x?xbf16> -> tensor<2x?x?x16x2xbf16> %inserted_slice = tensor.insert_slice %pack into %arg6[%arg1, %arg3, %arg5, 0, 0] [2, %6, %7, 16, 2] [1, 1, 1, 1, 1] : tensor<2x?x?x16x2xbf16> into tensor<32x?x64x16x2xbf16> scf.yield %inserted_slice : tensor<32x?x64x16x2xbf16> } @@ -270,7 +270,7 @@ func.func @single_dynamic_unpack_infer_vector_size(%arg0: tensor, %6 = affine.apply #map3(%3) %extracted_slice = tensor.extract_slice %arg0[%4, %5, 0, 0] [1, %6, 16, 16] [1, 1, 1, 1] : tensor to tensor<1x?x16x16xf32> %extracted_slice_3 = tensor.extract_slice %arg5[%arg2, %arg4] [%2, %3] [1, 1] : tensor to tensor - %unpack = tensor.unpack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %extracted_slice_3 : tensor<1x?x16x16xf32> -> tensor + %unpack = linalg.unpack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %extracted_slice_3 : tensor<1x?x16x16xf32> -> tensor %inserted_slice = tensor.insert_slice %unpack into %arg5[%arg2, %arg4] [%2, %3] [1, 1] : tensor into tensor scf.yield %inserted_slice : tensor } @@ -324,7 +324,7 @@ func.func @generic_unpack_infer_vector_size(%arg0: tensor, %arg1: %6 = affine.apply #map3(%3) %extracted_slice = tensor.extract_slice %arg0[%4, %5, 0, 0] [1, %6, 16, 16] [1, 1, 1, 1] : tensor to tensor<1x?x16x16xf32> %extracted_slice_1 = tensor.extract_slice %arg1[%arg3, %arg5] [%2, %3] [1, 1] : tensor to tensor - %unpack = tensor.unpack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %extracted_slice_1 : tensor<1x?x16x16xf32> -> tensor + %unpack = linalg.unpack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %extracted_slice_1 : tensor<1x?x16x16xf32> -> tensor %extracted_slice_2 = tensor.extract_slice %arg6[%arg3, %arg5] [%2, %3] [1, 1] : tensor to tensor %7 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel"]} ins(%unpack : tensor) outs(%extracted_slice_2 : tensor) { ^bb0(%in: f32, %out: f32): diff --git a/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx942.mlir b/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx942.mlir index 405b58f0cf3a..e886f8185775 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx942.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx942.mlir @@ -46,7 +46,7 @@ func.func @set_encoding_LHS_unroll8x8x4_MFMA_F32_16x16x4_F32() { } // CHECK-LABEL: func.func @set_encoding_LHS_unroll8x8x4_MFMA_F32_16x16x4_F32 -// CHECK: %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : f32) +// CHECK: %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : f32) // CHECK-SAME: outer_dims_perm = [0, 1] // CHECK-SAME: inner_dims_pos = [0, 1] // CHECK-SAME: inner_tiles = [128, 16] @@ -79,7 +79,7 @@ func.func @set_encoding_LHS_narrow_unroll1x8x4_MFMA_F32_16x16x4_F32() { } // CHECK-LABEL: func.func @set_encoding_LHS_narrow_unroll1x8x4_MFMA_F32_16x16x4_F32 -// CHECK: %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : f32) +// CHECK: %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : f32) // CHECK-SAME: outer_dims_perm = [0, 1] // CHECK-SAME: inner_dims_pos = [0, 1] // CHECK-SAME: inner_tiles = [16, 16] @@ -122,7 +122,7 @@ func.func @set_encoding_LHS_dynamic_unroll8x8x4_MFMA_F32_16x16x4_F32() { return } // CHECK-LABEL: func.func @set_encoding_LHS_dynamic_unroll8x8x4_MFMA_F32_16x16x4_F32 -// CHECK: %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : f32) +// CHECK: %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : f32) // CHECK-SAME: outer_dims_perm = [0, 1] // CHECK-SAME: inner_dims_pos = [0, 1] // CHECK-SAME: inner_tiles = [128, 16] @@ -155,7 +155,7 @@ func.func @set_encoding_RHS_unroll8x8x4_MFMA_F32_16x16x4_F32() { } // CHECK-LABEL: func.func @set_encoding_RHS_unroll8x8x4_MFMA_F32_16x16x4_F32 -// CHECK: %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : f32) +// CHECK: %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : f32) // CHECK-SAME: outer_dims_perm = [1, 0] // CHECK-SAME: inner_dims_pos = [1, 0] // CHECK-SAME: inner_tiles = [128, 16] @@ -188,7 +188,7 @@ func.func @set_encoding_RHS_narrow_unroll8x1x4_MFMA_F32_16x16x4_F32() { } // CHECK-LABEL: func.func @set_encoding_RHS_narrow_unroll8x1x4_MFMA_F32_16x16x4_F32 -// CHECK: %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : f32) +// CHECK: %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : f32) // CHECK-SAME: outer_dims_perm = [1, 0] // CHECK-SAME: inner_dims_pos = [1, 0] // CHECK-SAME: inner_tiles = [16, 16] @@ -221,7 +221,7 @@ func.func @set_encoding_ACC_unroll8x8x4_MFMA_F32_16x16x4_F32() { } // CHECK-LABEL: func.func @set_encoding_ACC_unroll8x8x4_MFMA_F32_16x16x4_F32 -// CHECK: %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : f32) +// CHECK: %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : f32) // CHECK-SAME: outer_dims_perm = [0, 1] // CHECK-SAME: inner_dims_pos = [0, 1] // CHECK-SAME: inner_tiles = [128, 128] @@ -260,7 +260,7 @@ func.func @unset_encoding_ACC_unroll8x8x4_MFMA_F32_16x16x4_F32() { // CHECK-SAME: permutation = [0, 1, 5, 3, 7, 2, 6, 4] // CHECK: %[[COLLAPSE:.*]] = tensor.collapse_shape %[[TRANSPOSE]] // CHECK-SAME: : tensor<2x5x4x8x4x4x16x2xf32> into tensor<2x5x128x128xf32> -// CHECK: %[[UNPACK:.*]] = tensor.unpack %[[COLLAPSE]] +// CHECK: %[[UNPACK:.*]] = linalg.unpack %[[COLLAPSE]] // CHECK-SAME: outer_dims_perm = [0, 1] // CHECK-SAME: inner_dims_pos = [0, 1] // CHECK-SAME: inner_tiles = [128, 128] @@ -303,7 +303,7 @@ func.func @unset_encoding_ACC_dynamic_unroll8x8x4_MFMA_F32_16x16x4_F32() { // CHECK-SAME: permutation = [0, 1, 5, 3, 7, 2, 6, 4] // CHECK: %[[COLLAPSE:.*]] = tensor.collapse_shape %[[TRANSPOSE]] // CHECK-SAME: : tensor into tensor -// CHECK: %[[UNPACK:.*]] = tensor.unpack %[[COLLAPSE]] +// CHECK: %[[UNPACK:.*]] = linalg.unpack %[[COLLAPSE]] // CHECK-SAME: outer_dims_perm = [0, 1] // CHECK-SAME: inner_dims_pos = [0, 1] // CHECK-SAME: inner_tiles = [128, 128] @@ -453,7 +453,7 @@ func.func @set_encoding_LHS_unroll8x8x2_MFMA_I32_16x16x32_I8() { } // CHECK-LABEL: func.func @set_encoding_LHS_unroll8x8x2_MFMA_I32_16x16x32_I8 -// CHECK: %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : i8) +// CHECK: %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : i8) // CHECK-SAME: outer_dims_perm = [0, 1] // CHECK-SAME: inner_dims_pos = [0, 1] // CHECK-SAME: inner_tiles = [128, 64] @@ -486,7 +486,7 @@ func.func @set_encoding_RHS_unroll8x8x2_MFMA_I32_16x16x32_I8() { } // CHECK-LABEL: func.func @set_encoding_RHS_unroll8x8x2_MFMA_I32_16x16x32_I8 -// CHECK: %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : i8) +// CHECK: %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : i8) // CHECK-SAME: outer_dims_perm = [1, 0] // CHECK-SAME: inner_dims_pos = [1, 0] // CHECK-SAME: inner_tiles = [128, 64] @@ -519,7 +519,7 @@ func.func @set_encoding_ACC_unroll8x8x2_MFMA_I32_16x16x32_I8() { } // CHECK-LABEL: func.func @set_encoding_ACC_unroll8x8x2_MFMA_I32_16x16x32_I8 -// CHECK: %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : i32) +// CHECK: %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : i32) // CHECK-SAME: outer_dims_perm = [0, 1] // CHECK-SAME: inner_dims_pos = [0, 1] // CHECK-SAME: inner_tiles = [128, 128] @@ -558,7 +558,7 @@ func.func @unset_encoding_ACC_unroll8x8x2_MFMA_I32_16x16x32_I8() { // CHECK-SAME: permutation = [0, 1, 5, 3, 7, 2, 6, 4] // CHECK: %[[COLLAPSE:.*]] = tensor.collapse_shape %[[TRANSPOSE]] // CHECK-SAME: : tensor<2x5x4x8x4x4x16x2xi32> into tensor<2x5x128x128xi32> -// CHECK: %[[UNPACK:.*]] = tensor.unpack %[[COLLAPSE]] +// CHECK: %[[UNPACK:.*]] = linalg.unpack %[[COLLAPSE]] // CHECK-SAME: outer_dims_perm = [0, 1] // CHECK-SAME: inner_dims_pos = [0, 1] // CHECK-SAME: inner_tiles = [128, 128] diff --git a/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir b/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir index 4a13f0a08c42..f8c46e65f63b 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir @@ -2497,7 +2497,7 @@ func.func @tensor_pack() { %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [2, 2, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x2x3x3xi32> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4x4xi32> - %4 = tensor.pack %3 padding_value(%c0_i32 : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %2 : tensor<4x4xi32> -> tensor<2x2x3x3xi32> + %4 = linalg.pack %3 padding_value(%c0_i32 : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %2 : tensor<4x4xi32> -> tensor<2x2x3x3xi32> flow.dispatch.tensor.store %4, %1, offsets = [0, 0, 0, 0], sizes = [2, 2, 3, 3], strides = [1, 1, 1, 1] : tensor<2x2x3x3xi32> -> !flow.dispatch.tensor> return } @@ -2521,7 +2521,7 @@ func.func @tensor_unpack() { %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4x4xi32> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, 2, 2], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x2x2x2xi32> - %4 = tensor.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %2 : tensor<2x2x2x2xi32> -> tensor<4x4xi32> + %4 = linalg.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %2 : tensor<2x2x2x2xi32> -> tensor<4x4xi32> flow.dispatch.tensor.store %4, %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : tensor<4x4xi32> -> !flow.dispatch.tensor> return } @@ -2544,7 +2544,7 @@ func.func @tensor_unpack_fully_dynamic() { %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<4x4xi32> %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, %inner_d0, %inner_d0], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<2x2x?x?xi32> - %4 = tensor.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [%inner_d0, %inner_d0] into %2 : tensor<2x2x?x?xi32> -> tensor<4x4xi32> + %4 = linalg.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [%inner_d0, %inner_d0] into %2 : tensor<2x2x?x?xi32> -> tensor<4x4xi32> flow.dispatch.tensor.store %4, %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : tensor<4x4xi32> -> !flow.dispatch.tensor> return } diff --git a/compiler/src/iree/compiler/Codegen/Common/test/llvmcpu_materialize_encoding.mlir b/compiler/src/iree/compiler/Codegen/Common/test/llvmcpu_materialize_encoding.mlir index c18018465977..01b2c7b83b19 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/llvmcpu_materialize_encoding.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/llvmcpu_materialize_encoding.mlir @@ -17,7 +17,7 @@ func.func @set_encoding_with_padding_semantics_bf16_x86_64_avx512f() attributes return } // This tests that -// 1. The padding value is created for tensor.pack ops. +// 1. The padding value is created for linalg.pack ops. // 2. The inner tile sizes are less than or equal to values in round_dims_to. // We could choose 128 when it is a narrow matrix. // CHECK-LABEL: func.func @set_encoding_with_padding_semantics_bf16_x86_64_avx512f @@ -26,7 +26,7 @@ func.func @set_encoding_with_padding_semantics_bf16_x86_64_avx512f() attributes // CHECK-DAG: %[[OUT_BINDING:.+]] = hal.interface.binding.subspan {{.+}} : !flow.dispatch.tensor> // CHECK: %[[SRC:.+]] = flow.dispatch.tensor.load %[[IN_BINDING]] // CHECK-DAG: %[[INIT:.+]] = tensor.empty() : tensor<1x1000x1x1xbf16> -// CHECK: %[[PACK:.+]] = tensor.pack %[[SRC]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[SRC]] // CHECK-SAME: outer_dims_perm = [0, 1] // CHECK-SAME: inner_dims_pos = [0, 1] // CHECK-SAME: inner_tiles = [1, 1] @@ -60,7 +60,7 @@ func.func @set_encoding_7x7x7_matmul_LHS() attributes { // CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan {{.*}} !flow.dispatch.tensor> // CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]], offsets = [0, 0], sizes = [7, 7], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<7x7xf32> // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x7x8x1xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[INPUT]] padding_value(%[[CST]] : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %3 : tensor<7x7xf32> -> tensor<1x7x8x1xf32> +// CHECK: %[[PACK:.+]] = linalg.pack %[[INPUT]] padding_value(%[[CST]] : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %3 : tensor<7x7xf32> -> tensor<1x7x8x1xf32> // CHECK: flow.dispatch.tensor.store %[[PACK]], %[[OUTPUT_BINDING]], offsets = [0, 0, 0, 0], sizes = [1, 7, 8, 1], strides = [1, 1, 1, 1] : tensor<1x7x8x1xf32> -> !flow.dispatch.tensor> // ----- @@ -91,7 +91,7 @@ func.func @set_encoding_128x80x32_batch_matmul_LHS() attributes { // CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.*}} !flow.dispatch.tensor> // CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]], offsets = [0, 0, 0], sizes = [128, 80, 32], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x80x32xf32> // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<128x10x32x8x1xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[INPUT]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<128x80x32xf32> -> tensor<128x10x32x8x1xf32> +// CHECK: %[[PACK:.+]] = linalg.pack %[[INPUT]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<128x80x32xf32> -> tensor<128x10x32x8x1xf32> // CHECK: flow.dispatch.tensor.store %[[PACK]], %[[OUTPUT_BINDING]], offsets = [0, 0, 0, 0, 0], sizes = [128, 10, 32, 8, 1], strides = [1, 1, 1, 1, 1] : tensor<128x10x32x8x1xf32> -> !flow.dispatch.tensor> // ----- @@ -124,7 +124,7 @@ func.func @set_encoding_128x32x320_batch_matmul_RHS() attributes { // CHECK: %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.*}} !flow.dispatch.tensor> // CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]], offsets = [0, 0, 0], sizes = [128, 32, 320], strides = [1, 1, 1] : !flow.dispatch.tensor> -> tensor<128x32x320xf32> // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<128x40x32x8x1xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[INPUT]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<128x32x320xf32> -> tensor<128x40x32x8x1xf32> +// CHECK: %[[PACK:.+]] = linalg.pack %[[INPUT]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<128x32x320xf32> -> tensor<128x40x32x8x1xf32> // CHECK: flow.dispatch.tensor.store %[[PACK]], %[[OUTPUT_BINDING]], offsets = [0, 0, 0, 0, 0], sizes = [128, 40, 32, 8, 1], strides = [1, 1, 1, 1, 1] : tensor<128x40x32x8x1xf32> -> !flow.dispatch.tensor> // ----- @@ -163,7 +163,7 @@ func.func @unset_encoding_128x80x320_batch_matmul_RESULT() attributes { // CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0, 0], sizes = [128, 10, 40, 8, 8], strides = [1, 1, 1, 1, 1] // CHECK: %[[EMPTY:.+]] = tensor.empty() -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[INPUT]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[INPUT]] // CHECK-SAME: outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] // CHECK-DAG: flow.dispatch.tensor.store %[[UNPACK]], %[[OUTPUT_BINDING]] @@ -203,8 +203,8 @@ func.func @pack_gemm_fill_dynamic(%arg0 : tensor, %arg1 : tensor // CHECK: %[[FILL:.+]] = linalg.fill @@ -212,7 +212,7 @@ func.func @pack_gemm_fill_dynamic(%arg0 : tensor, %arg1 : tensor, %ar // CHECK-SAME: %[[LHS:.+]]: tensor<32x1x128xi8>, %[[RHS:.+]]: tensor<32x128x11008xi8>) -> tensor<32x1x11008xi32> // CHECK: %[[C0_I32:.+]] = arith.constant 0 : i32 // CHECK: %[[INIT_LHS_PACK:.+]] = tensor.empty() : tensor<32x1x64x1x2xi8> -// CHECK: %[[LHS_PACK:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [1, 2] into %[[INIT_LHS_PACK]] : tensor<32x1x128xi8> -> tensor<32x1x64x1x2xi8> +// CHECK: %[[LHS_PACK:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [1, 2] into %[[INIT_LHS_PACK]] : tensor<32x1x128xi8> -> tensor<32x1x64x1x2xi8> // CHECK: %[[INIT_LHS_EXT:.+]] = tensor.empty() : tensor<32x1x64x1x2xi32> // CHECK: %[[LHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP]]], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%[[LHS_PACK]] : tensor<32x1x64x1x2xi8>) outs(%[[INIT_LHS_EXT]] : tensor<32x1x64x1x2xi32>) { // CHECK-NEXT: ^bb0(%[[LHS_EXT_ARG_IN:.+]]: i8, %[[LHS_EXT_ARG_OUT:.+]]: i32): // CHECK-NEXT: %[[LHS_EXT_OP:.+]] = arith.extsi %[[LHS_EXT_ARG_IN]] : i8 to i32 // CHECK-NEXT: linalg.yield %[[LHS_EXT_OP]] : i32 // CHECK: %[[INIT_RHS_PACK:.+]] = tensor.empty() : tensor<32x688x64x16x2xi8> -// CHECK: %[[RHS_PACK:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %[[INIT_RHS_PACK]] : tensor<32x128x11008xi8> -> tensor<32x688x64x16x2xi8> +// CHECK: %[[RHS_PACK:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %[[INIT_RHS_PACK]] : tensor<32x128x11008xi8> -> tensor<32x688x64x16x2xi8> // CHECK: %[[INIT_RHS_EXT:.+]] = tensor.empty() : tensor<32x688x64x16x2xi32> // CHECK: %[[RHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP]]], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%[[RHS_PACK]] : tensor<32x688x64x16x2xi8>) outs(%[[INIT_RHS_EXT]] : tensor<32x688x64x16x2xi32>) { // CHECK-NEXT: ^bb0(%[[RHS_EXT_ARG_IN:.+]]: i8, %[[RHS_EXT_ARG_OUT:.+]]: i32): @@ -2105,7 +2105,7 @@ func.func @extend_batch_vecmat_explicit_unit_dim(%arg0: tensor<32x1x128xi8>, %ar // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[C0_I32]] : i32) outs(%[[INIT_FILL]] : tensor<32x1x688x1x16xi32>) -> tensor<32x1x688x1x16xi32> // CHECK: %[[MMT4D:.+]] = linalg.batch_mmt4d ins(%[[LHS_EXT]], %[[RHS_EXT]] : tensor<32x1x64x1x2xi32>, tensor<32x688x64x16x2xi32>) outs(%[[FILL]] : tensor<32x1x688x1x16xi32>) -> tensor<32x1x688x1x16xi32> // CHECK: %[[INIT_UNPACK:.+]] = tensor.empty() : tensor<32x1x11008xi32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[MMT4D]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [1, 16] into %[[INIT_UNPACK]] : tensor<32x1x688x1x16xi32> -> tensor<32x1x11008xi32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[MMT4D]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [1, 16] into %[[INIT_UNPACK]] : tensor<32x1x688x1x16xi32> -> tensor<32x1x11008xi32> // CHECK: return %[[UNPACK]] // ----- @@ -2293,14 +2293,14 @@ func.func @vecmat(%arg0: tensor<128xi8>, %arg1: tensor<128x11008xi8>) -> tensor< // CHECK-SAME: %[[LHS:.+]]: tensor<128xi8>, %[[RHS:.+]]: tensor<128x11008xi8>) -> tensor<11008xi32> // CHECK-DAG: %[[C0_I32:.+]] = arith.constant 0 : i32 // CHECK: %[[INIT_LHS_PACK:.+]] = tensor.empty() : tensor<64x2xi8> -// CHECK: %[[LHS_PACK:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [2] into %[[INIT_LHS_PACK]] : tensor<128xi8> -> tensor<64x2xi8> +// CHECK: %[[LHS_PACK:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [2] into %[[INIT_LHS_PACK]] : tensor<128xi8> -> tensor<64x2xi8> // CHECK: %[[INIT_LHS_EXT:.+]] = tensor.empty() : tensor<64x2xi32> // CHECK: %[[LHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP]]], iterator_types = ["parallel", "parallel"]} ins(%[[LHS_PACK]] : tensor<64x2xi8>) outs(%[[INIT_LHS_EXT]] : tensor<64x2xi32>) { // CHECK-NEXT: ^bb0(%[[LHS_EXT_ARG_IN:.+]]: i8, %[[LHS_EXT_ARG_OUT:.+]]: i32): // CHECK-NEXT: %[[LHS_EXT_OP:.+]] = arith.extsi %[[LHS_EXT_ARG_IN]] : i8 to i32 // CHECK-NEXT: linalg.yield %[[LHS_EXT_OP]] : i32 // CHECK: %[[INIT_RHS_PACK:.+]] = tensor.empty() : tensor<688x64x16x2xi8> -// CHECK: %[[RHS_PACK:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %[[INIT_RHS_PACK]] : tensor<128x11008xi8> -> tensor<688x64x16x2xi8> +// CHECK: %[[RHS_PACK:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %[[INIT_RHS_PACK]] : tensor<128x11008xi8> -> tensor<688x64x16x2xi8> // CHECK: %[[INIT_RHS_EXT:.+]] = tensor.empty() : tensor<688x64x16x2xi32> // CHECK: %[[RHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[RHS_PACK]] : tensor<688x64x16x2xi8>) outs(%[[INIT_RHS_EXT]] : tensor<688x64x16x2xi32>) { // CHECK-NEXT: ^bb0(%[[RHS_EXT_ARG_IN:.+]]: i8, %[[RHS_EXT_ARG_OUT:.+]]: i32): @@ -2313,7 +2313,7 @@ func.func @vecmat(%arg0: tensor<128xi8>, %arg1: tensor<128x11008xi8>) -> tensor< // CHECK: %[[MMT4D:.+]] = linalg.mmt4d ins(%[[EXPAND_LHS]], %[[RHS_EXT]] : tensor<1x64x1x2xi32>, tensor<688x64x16x2xi32>) outs(%[[FILL]] : tensor<1x688x1x16xi32>) -> tensor<1x688x1x16xi32> // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[MMT4D]] {{\[}}[0, 1], [2, 3]] : tensor<1x688x1x16xi32> into tensor<688x16xi32> // CHECK: %[[INIT_UNPACK:.+]] = tensor.empty() : tensor<11008xi32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[COLLAPSED]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<688x16xi32> -> tensor<11008xi32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[COLLAPSED]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<688x16xi32> -> tensor<11008xi32> // CHECK: return %[[UNPACK]] // ----- @@ -2355,14 +2355,14 @@ func.func @matvec(%arg0: tensor<11008x128xi8>, %arg1: tensor<128xi8>) -> tensor< // CHECK-SAME: %[[LHS:.+]]: tensor<11008x128xi8>, %[[RHS:.+]]: tensor<128xi8>) -> tensor<11008xi32> // CHECK: %[[C0_I32:.+]] = arith.constant 0 : i32 // CHECK: %[[INIT_LHS_PACK:.+]] = tensor.empty() : tensor<688x64x16x2xi8> -// CHECK: %[[LHS_PACK:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %[[INIT_LHS_PACK]] : tensor<11008x128xi8> -> tensor<688x64x16x2xi8> +// CHECK: %[[LHS_PACK:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %[[INIT_LHS_PACK]] : tensor<11008x128xi8> -> tensor<688x64x16x2xi8> // CHECK: %[[INIT_LHS_EXT:.+]] = tensor.empty() : tensor<688x64x16x2xi32> // CHECK: %[[LHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[LHS_PACK]] : tensor<688x64x16x2xi8>) outs(%[[INIT_LHS_EXT]] : tensor<688x64x16x2xi32>) { // CHECK-NEXT: ^bb0(%[[LHS_EXT_ARG_IN:.+]]: i8, %[[LHS_EXT_ARG_OUT:.+]]: i32): // CHECK-NEXT: %[[LHS_EXT_OP:.+]] = arith.extsi %[[LHS_EXT_ARG_IN]] : i8 to i32 // CHECK-NEXT: linalg.yield %[[LHS_EXT_OP]] : i32 // CHECK: %[[INIT_RHS_PACK:.+]] = tensor.empty() : tensor<64x2xi8> -// CHECK: %[[RHS_PACK:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [2] into %[[INIT_RHS_PACK]] : tensor<128xi8> -> tensor<64x2xi8> +// CHECK: %[[RHS_PACK:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [2] into %[[INIT_RHS_PACK]] : tensor<128xi8> -> tensor<64x2xi8> // CHECK: %[[INIT_RHS_EXT:.+]] = tensor.empty() : tensor<64x2xi32> // CHECK: %[[RHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[RHS_PACK]] : tensor<64x2xi8>) outs(%[[INIT_RHS_EXT]] : tensor<64x2xi32>) { // CHECK-NEXT: ^bb0(%[[RHS_EXT_ARG_IN:.+]]: i8, %[[RHS_EXT_ARG_OUT:.+]]: i32): @@ -2375,7 +2375,7 @@ func.func @matvec(%arg0: tensor<11008x128xi8>, %arg1: tensor<128xi8>) -> tensor< // CHECK: %[[MMT4D:.+]] = linalg.mmt4d ins(%[[EXPAND_RHS]], %[[LHS_EXT]] : tensor<1x64x1x2xi32>, tensor<688x64x16x2xi32>) outs(%[[FILL]] : tensor<1x688x1x16xi32>) -> tensor<1x688x1x16xi32> // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[MMT4D]] {{\[}}[0, 1], [2, 3]] : tensor<1x688x1x16xi32> into tensor<688x16xi32> // CHECK: %[[INIT_UNPACK:.+]] = tensor.empty() : tensor<11008xi32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[COLLAPSED]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<688x16xi32> -> tensor<11008xi32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[COLLAPSED]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<688x16xi32> -> tensor<11008xi32> // CHECK: return %[[UNPACK]] // ----- @@ -2418,14 +2418,14 @@ func.func @matvec_with_narrow_M(%arg0: tensor<15x128xi8>, %arg1: tensor<128xi8>) // CHECK-DAG: %[[C0_I8:.+]] = arith.constant 0 : i8 // CHECK-DAG: %[[C0_I32:.+]] = arith.constant 0 : i32 // CHECK: %[[INIT_LHS_PACK:.+]] = tensor.empty() : tensor<1x64x16x2xi8> -// CHECK: %[[LHS_PACK:.+]] = tensor.pack %[[LHS]] padding_value(%[[C0_I8]] : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %[[INIT_LHS_PACK]] : tensor<15x128xi8> -> tensor<1x64x16x2xi8> +// CHECK: %[[LHS_PACK:.+]] = linalg.pack %[[LHS]] padding_value(%[[C0_I8]] : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %[[INIT_LHS_PACK]] : tensor<15x128xi8> -> tensor<1x64x16x2xi8> // CHECK: %[[INIT_LHS_EXT:.+]] = tensor.empty() : tensor<1x64x16x2xi32> // CHECK: %[[LHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[LHS_PACK]] : tensor<1x64x16x2xi8>) outs(%[[INIT_LHS_EXT]] : tensor<1x64x16x2xi32>) { // CHECK-NEXT: ^bb0(%[[LHS_EXT_ARG_IN:.+]]: i8, %[[LHS_EXT_ARG_OUT:.+]]: i32): // CHECK-NEXT: %[[LHS_EXT_OP:.+]] = arith.extsi %[[LHS_EXT_ARG_IN]] : i8 to i32 // CHECK-NEXT: linalg.yield %[[LHS_EXT_OP]] : i32 // CHECK: %[[INIT_RHS_PACK:.+]] = tensor.empty() : tensor<64x2xi8> -// CHECK: %[[RHS_PACK:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [2] into %[[INIT_RHS_PACK]] : tensor<128xi8> -> tensor<64x2xi8> +// CHECK: %[[RHS_PACK:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [2] into %[[INIT_RHS_PACK]] : tensor<128xi8> -> tensor<64x2xi8> // CHECK: %[[INIT_RHS_EXT:.+]] = tensor.empty() : tensor<64x2xi32> // CHECK: %[[RHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[RHS_PACK]] : tensor<64x2xi8>) outs(%[[INIT_RHS_EXT]] : tensor<64x2xi32>) { // CHECK-NEXT: ^bb0(%[[RHS_EXT_ARG_IN:.+]]: i8, %[[RHS_EXT_ARG_OUT:.+]]: i32): @@ -2438,7 +2438,7 @@ func.func @matvec_with_narrow_M(%arg0: tensor<15x128xi8>, %arg1: tensor<128xi8>) // CHECK: %[[MMT4D:.+]] = linalg.mmt4d ins(%[[EXPAND_RHS]], %[[LHS_EXT]] : tensor<1x64x1x2xi32>, tensor<1x64x16x2xi32>) outs(%[[FILL]] : tensor<1x1x1x16xi32>) -> tensor<1x1x1x16xi32> // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[MMT4D]] {{\[}}[0, 1], [2, 3]] : tensor<1x1x1x16xi32> into tensor<1x16xi32> // CHECK: %[[INIT_UNPACK:.+]] = tensor.empty() : tensor<15xi32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[COLLAPSED]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<1x16xi32> -> tensor<15xi32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[COLLAPSED]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<1x16xi32> -> tensor<15xi32> // CHECK: return %[[UNPACK]] // ----- @@ -2480,14 +2480,14 @@ func.func @batch_vecmat(%arg0: tensor<32x128xi8>, %arg1: tensor<32x128x11008xi8> // CHECK-SAME: %[[LHS:.+]]: tensor<32x128xi8>, %[[RHS:.+]]: tensor<32x128x11008xi8>) -> tensor<32x11008xi32> // CHECK: %[[C0_I32:.+]] = arith.constant 0 : i32 // CHECK: %[[INIT_LHS_PACK:.+]] = tensor.empty() : tensor<32x64x2xi8> -// CHECK: %[[LHS_PACK:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [2] into %[[INIT_LHS_PACK]] : tensor<32x128xi8> -> tensor<32x64x2xi8> +// CHECK: %[[LHS_PACK:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [2] into %[[INIT_LHS_PACK]] : tensor<32x128xi8> -> tensor<32x64x2xi8> // CHECK: %[[INIT_LHS_EXT:.+]] = tensor.empty() : tensor<32x64x2xi32> // CHECK: %[[LHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP]]], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[LHS_PACK]] : tensor<32x64x2xi8>) outs(%[[INIT_LHS_EXT]] : tensor<32x64x2xi32>) { // CHECK-NEXT: ^bb0(%[[LHS_EXT_ARG_IN:.+]]: i8, %[[LHS_EXT_ARG_OUT:.+]]: i32): // CHECK-NEXT: %[[LHS_EXT_OP:.+]] = arith.extsi %[[LHS_EXT_ARG_IN]] : i8 to i32 // CHECK-NEXT: linalg.yield %[[LHS_EXT_OP]] : i32 // CHECK: %[[INIT_RHS_PACK:.+]] = tensor.empty() : tensor<32x688x64x16x2xi8> -// CHECK: %[[RHS_PACK:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %[[INIT_RHS_PACK]] : tensor<32x128x11008xi8> -> tensor<32x688x64x16x2xi8> +// CHECK: %[[RHS_PACK:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %[[INIT_RHS_PACK]] : tensor<32x128x11008xi8> -> tensor<32x688x64x16x2xi8> // CHECK: %[[INIT_RHS_EXT:.+]] = tensor.empty() : tensor<32x688x64x16x2xi32> // CHECK: %[[RHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%[[RHS_PACK]] : tensor<32x688x64x16x2xi8>) outs(%[[INIT_RHS_EXT]] : tensor<32x688x64x16x2xi32>) { // CHECK-NEXT: ^bb0(%[[RHS_EXT_ARG_IN:.+]]: i8, %[[RHS_EXT_ARG_OUT:.+]]: i32): @@ -2500,7 +2500,7 @@ func.func @batch_vecmat(%arg0: tensor<32x128xi8>, %arg1: tensor<32x128x11008xi8> // CHECK: %[[MMT4D:.+]] = linalg.batch_mmt4d ins(%[[EXPAND_LHS]], %[[RHS_EXT]] : tensor<32x1x64x1x2xi32>, tensor<32x688x64x16x2xi32>) outs(%[[FILL]] : tensor<32x1x688x1x16xi32>) -> tensor<32x1x688x1x16xi32> // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[MMT4D]] {{\[}}[0], [1, 2], [3, 4]] : tensor<32x1x688x1x16xi32> into tensor<32x688x16xi32> // CHECK: %[[INIT_UNPACK:.+]] = tensor.empty() : tensor<32x11008xi32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[COLLAPSED]] outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<32x688x16xi32> -> tensor<32x11008xi32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[COLLAPSED]] outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<32x688x16xi32> -> tensor<32x11008xi32> // CHECK: return %[[UNPACK]] // ----- @@ -2552,16 +2552,16 @@ func.func @matmul_transpose_a_f32f32f32(%arg0: tensor<256x128xf32>, %arg1: tenso // CHECK-LABEL: func.func @matmul_transpose_a_f32f32f32( // CHECK-SAME: %[[LHS:.+]]: tensor<256x128xf32>, %[[RHS:.+]]: tensor<256x512xf32>, %[[RESULT:.+]]: tensor<128x512xf32>) -> tensor<128x512xf32> // CHECK: %[[PACK_LHS_DEST:.+]] = tensor.empty() : tensor<16x256x8x1xf32> -// CHECK: %[[PACK_LHS:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<256x128xf32> -> tensor<16x256x8x1xf32> +// CHECK: %[[PACK_LHS:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<256x128xf32> -> tensor<16x256x8x1xf32> // CHECK: %[[PACK_RHS_DEST:.+]] = tensor.empty() : tensor<128x256x4x1xf32> -// CHECK: %[[PACK_RHS:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<256x512xf32> -> tensor<128x256x4x1xf32> +// CHECK: %[[PACK_RHS:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<256x512xf32> -> tensor<128x256x4x1xf32> // CHECK: %[[PACK_RES_DEST:.+]] = tensor.empty() : tensor<16x128x8x4xf32> -// CHECK: %[[PACK_RES:.+]] = tensor.pack %[[RESULT]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<128x512xf32> -> tensor<16x128x8x4xf32> +// CHECK: %[[PACK_RES:.+]] = linalg.pack %[[RESULT]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<128x512xf32> -> tensor<16x128x8x4xf32> // CHECK: %[[MMT4D:.+]] = linalg.mmt4d // CHECK-SAME: ins(%[[PACK_LHS]], %[[PACK_RHS]] : // CHECK-SAME: outs(%[[PACK_RES]] : // CHECK-DAG: %[[UNPACK_DEST:.+]] = tensor.empty() : tensor<128x512xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[MMT4D]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<16x128x8x4xf32> -> tensor<128x512xf32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[MMT4D]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<16x128x8x4xf32> -> tensor<128x512xf32> // CHECK: return %[[UNPACK]] // ----- @@ -2591,16 +2591,16 @@ func.func @matmul_transpose_b_f32f32f32(%arg0: tensor<128x256xf32>, %arg1: tenso // CHECK-LABEL: func.func @matmul_transpose_b_f32f32f32( // CHECK-SAME: %[[LHS:.+]]: tensor<128x256xf32>, %[[RHS:.+]]: tensor<512x256xf32>, %[[RESULT:.+]]: tensor<128x512xf32>) -> tensor<128x512xf32> // CHECK: %[[PACK_LHS_DEST:.+]] = tensor.empty() : tensor<16x256x8x1xf32> -// CHECK: %[[PACK_LHS:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<128x256xf32> -> tensor<16x256x8x1xf32> +// CHECK: %[[PACK_LHS:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<128x256xf32> -> tensor<16x256x8x1xf32> // CHECK: %[[PACK_RHS_DEST:.+]] = tensor.empty() : tensor<128x256x4x1xf32> -// CHECK: %[[PACK_RHS:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<512x256xf32> -> tensor<128x256x4x1xf32> +// CHECK: %[[PACK_RHS:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<512x256xf32> -> tensor<128x256x4x1xf32> // CHECK: %[[PACK_RES_DEST:.+]] = tensor.empty() : tensor<16x128x8x4xf32> -// CHECK: %[[PACK_RES:.+]] = tensor.pack %[[RESULT]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<128x512xf32> -> tensor<16x128x8x4xf32> +// CHECK: %[[PACK_RES:.+]] = linalg.pack %[[RESULT]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<128x512xf32> -> tensor<16x128x8x4xf32> // CHECK: %[[MMT4D:.+]] = linalg.mmt4d // CHECK-SAME: ins(%[[PACK_LHS]], %[[PACK_RHS]] : // CHECK-SAME: outs(%[[PACK_RES]] : // CHECK: %[[UNPACK_DEST:.+]] = tensor.empty() : tensor<128x512xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[MMT4D]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<16x128x8x4xf32> -> tensor<128x512xf32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[MMT4D]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<16x128x8x4xf32> -> tensor<128x512xf32> // ----- @@ -2630,16 +2630,16 @@ func.func @batch_matmul_transpose_a_f32f32f32(%arg0: tensor<2x256x128xf32>, %arg // CHECK-LABEL: func.func @batch_matmul_transpose_a_f32f32f32( // CHECK-SAME: %[[LHS:.+]]: tensor<2x256x128xf32>, %[[RHS:.+]]: tensor<2x256x512xf32>, %[[RESULT:.+]]: tensor<2x128x512xf32>) -> tensor<2x128x512xf32> // CHECK: %[[PACK_LHS_DEST:.+]] = tensor.empty() : tensor<2x16x256x8x1xf32> -// CHECK: %[[PACK_LHS:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<2x256x128xf32> -> tensor<2x16x256x8x1xf32> +// CHECK: %[[PACK_LHS:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<2x256x128xf32> -> tensor<2x16x256x8x1xf32> // CHECK: %[[PACK_RHS_DEST:.+]] = tensor.empty() : tensor<2x128x256x4x1xf32> -// CHECK: %[[PACK_RHS:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<2x256x512xf32> -> tensor<2x128x256x4x1xf32> +// CHECK: %[[PACK_RHS:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<2x256x512xf32> -> tensor<2x128x256x4x1xf32> // CHECK: %[[PACK_RES_DEST:.+]] = tensor.empty() : tensor<2x16x128x8x4xf32> -// CHECK: %[[PACK_RES:.+]] = tensor.pack %[[RESULT]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<2x128x512xf32> -> tensor<2x16x128x8x4xf32> +// CHECK: %[[PACK_RES:.+]] = linalg.pack %[[RESULT]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<2x128x512xf32> -> tensor<2x16x128x8x4xf32> // CHECK: %[[BATCH_MMT4D:.+]] = linalg.batch_mmt4d // CHECK: ins(%[[PACK_LHS]], %[[PACK_RHS]] : // CHECK: outs(%[[PACK_RES]] : // CHECK: %[[UNPACK_DEST:.+]] = tensor.empty() : tensor<2x128x512xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[BATCH_MMT4D]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<2x16x128x8x4xf32> -> tensor<2x128x512xf32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[BATCH_MMT4D]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<2x16x128x8x4xf32> -> tensor<2x128x512xf32> // ----- @@ -2669,16 +2669,16 @@ func.func @batch_matmul_transpose_b_f32f32f32(%arg0: tensor<2x128x256xf32>, %arg // CHECK-LABEL: func.func @batch_matmul_transpose_b_f32f32f32( // CHECK-SAME: %[[LHS:.+]]: tensor<2x128x256xf32>, %[[RHS:.+]]: tensor<2x512x256xf32>, %[[RESULT:.+]]: tensor<2x128x512xf32>) -> tensor<2x128x512xf32> // CHECK: %[[PACK_LHS_DEST:.+]] = tensor.empty() : tensor<2x16x256x8x1xf32> -// CHECK: %[[PACK_LHS:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<2x128x256xf32> -> tensor<2x16x256x8x1xf32> +// CHECK: %[[PACK_LHS:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<2x128x256xf32> -> tensor<2x16x256x8x1xf32> // CHECK: %[[PACK_RHS_DEST:.+]] = tensor.empty() : tensor<2x128x256x4x1xf32> -// CHECK: %[[PACK_RHS:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<2x512x256xf32> -> tensor<2x128x256x4x1xf32> +// CHECK: %[[PACK_RHS:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<2x512x256xf32> -> tensor<2x128x256x4x1xf32> // CHECK: %[[PACK_RES_DEST:.+]] = tensor.empty() : tensor<2x16x128x8x4xf32> -// CHECK: %[[PACK_RES:.+]] = tensor.pack %[[RESULT]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<2x128x512xf32> -> tensor<2x16x128x8x4xf32> +// CHECK: %[[PACK_RES:.+]] = linalg.pack %[[RESULT]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<2x128x512xf32> -> tensor<2x16x128x8x4xf32> // CHECK: %[[BATCH_MMT4D:.+]] = linalg.batch_mmt4d // CHECK-SAME: ins(%[[PACK_LHS]], %[[PACK_RHS]] : // CHECK-SAME: outs(%[[PACK_RES]] : // CHECK-DAG: %[[UNPACK_DEST:.+]] = tensor.empty() : tensor<2x128x512xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[BATCH_MMT4D]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<2x16x128x8x4xf32> -> tensor<2x128x512xf32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[BATCH_MMT4D]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<2x16x128x8x4xf32> -> tensor<2x128x512xf32> // ----- @@ -2717,12 +2717,12 @@ func.func @generic_batch_vecmat_transposed_i16u4i32(%arg0: tensor<32x128xi16>, % // CHECK-LABEL: func.func @generic_batch_vecmat_transposed_i16u4i32( // CHECK-SAME: %[[LHS:.+]]: tensor<32x128xi16>, %[[RHS:.+]]: tensor<4096x32x128xi4>, %[[RESULT:.+]]: tensor<4096x32xi32>) -> tensor<4096x32xi32> // CHECK-DAG: %[[PACK_LHS_DEST:.+]] = tensor.empty() : tensor<32x16x8xi16> -// CHECK-DAG: %[[PACK_LHS:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [8] into %[[PACK_LHS_DEST]] : tensor<32x128xi16> -> tensor<32x16x8xi16> +// CHECK-DAG: %[[PACK_LHS:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [8] into %[[PACK_LHS_DEST]] : tensor<32x128xi16> -> tensor<32x16x8xi16> // CHECK-DAG: %[[EXPAND_LHS:.+]] = tensor.expand_shape %[[PACK_LHS]] {{.*}} output_shape [32, 1, 16, 1, 8] : tensor<32x16x8xi16> into tensor<32x1x16x1x8xi16> // CHECK-DAG: %[[PACK_RHS_DEST:.+]] = tensor.empty() : tensor<32x128x16x32x8xi4> -// CHECK-DAG: %[[PACK_RHS:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [1, 0, 2] inner_dims_pos = [0, 2] inner_tiles = [32, 8] into %[[PACK_RHS_DEST]] : tensor<4096x32x128xi4> -> tensor<32x128x16x32x8xi4> +// CHECK-DAG: %[[PACK_RHS:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [1, 0, 2] inner_dims_pos = [0, 2] inner_tiles = [32, 8] into %[[PACK_RHS_DEST]] : tensor<4096x32x128xi4> -> tensor<32x128x16x32x8xi4> // CHECK-DAG: %[[PACK_RES_DEST:.+]] = tensor.empty() : tensor<32x128x32xi32> -// CHECK-DAG: %[[PACK_RES:.+]] = tensor.pack %[[RESULT]] outer_dims_perm = [1, 0] inner_dims_pos = [0] inner_tiles = [32] into %[[PACK_RES_DEST]] : tensor<4096x32xi32> -> tensor<32x128x32xi32> +// CHECK-DAG: %[[PACK_RES:.+]] = linalg.pack %[[RESULT]] outer_dims_perm = [1, 0] inner_dims_pos = [0] inner_tiles = [32] into %[[PACK_RES_DEST]] : tensor<4096x32xi32> -> tensor<32x128x32xi32> // CHECK-DAG: %[[EXTEND_DEST:.+]] = tensor.empty() : tensor<32x128x16x32x8xi32> // CHECK: %[[EXTEND:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]] @@ -2734,7 +2734,7 @@ func.func @generic_batch_vecmat_transposed_i16u4i32(%arg0: tensor<32x128xi16>, % // CHECK-SAME: outs(%[[EXPAND_RES]] : // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %[[BATCH_MMT4D]] {{.*}} : tensor<32x1x128x1x32xi32> into tensor<32x128x32xi32> // CHECK-DAG: %[[UNPACK_DEST:.+]] = tensor.empty() : tensor<4096x32xi32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[COLLAPSE]] outer_dims_perm = [1, 0] inner_dims_pos = [0] inner_tiles = [32] into %[[UNPACK_DEST]] : tensor<32x128x32xi32> -> tensor<4096x32xi32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[COLLAPSE]] outer_dims_perm = [1, 0] inner_dims_pos = [0] inner_tiles = [32] into %[[UNPACK_DEST]] : tensor<32x128x32xi32> -> tensor<4096x32xi32> // ----- @@ -2980,7 +2980,7 @@ func.func @set_encoding_LHS_with_layout() attributes { // CHECK-DAG: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan {{.*}} binding(0) {{.*}} : !flow.dispatch.tensor> // CHECK-DAG: %[[RESULT_BINDING:.+]] = hal.interface.binding.subspan {{.*}} binding(1) {{.*}} : !flow.dispatch.tensor> // CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]] -// CHECK: %[[PACK:.+]] = tensor.pack %[[INPUT]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[INPUT]] // CHECK-SAME: outer_dims_perm = [0, 1] // CHECK-SAME: inner_dims_pos = [0, 1] // CHECK-SAME: inner_tiles = [1, 1] @@ -3012,7 +3012,7 @@ func.func @set_encoding_RHS_with_layout() attributes { // CHECK-DAG: %[[RESULT_BINDING:.+]] = hal.interface.binding.subspan {{.*}} binding(1) {{.*}} : !flow.dispatch.tensor> // CHECK-DAG: %[[PAD_VALUE:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]] -// CHECK: %[[PACK:.+]] = tensor.pack %[[INPUT]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[INPUT]] // CHECK-SAME: padding_value(%[[PAD_VALUE]] : f32) // CHECK-SAME: outer_dims_perm = [1, 0] // CHECK-SAME: inner_dims_pos = [1, 0] @@ -3044,7 +3044,7 @@ func.func @unset_encoding_RES_with_layout() attributes { // CHECK-DAG: %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan {{.*}} binding(0) {{.*}} : !flow.dispatch.tensor> // CHECK-DAG: %[[RESULT_BINDING:.+]] = hal.interface.binding.subspan {{.*}} binding(1) {{.*}} : !flow.dispatch.tensor> // CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]] -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[INPUT]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[INPUT]] // CHECK-SAME: outer_dims_perm = [0, 1] // CHECK-SAME: inner_dims_pos = [0, 1] // CHECK-SAME: inner_tiles = [1, 16] diff --git a/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir b/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir index 03d1e51f9398..6ed536d29c15 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir @@ -1918,7 +1918,7 @@ hal.executable private @pack_lowering { %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [100, 250], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<100x250xf32> %3 = tensor.empty() : tensor<14x64x8x4xf32> - %4 = tensor.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %3 + %4 = linalg.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %3 {lowering_config = #iree_codegen.lowering_config} : tensor<100x250xf32> -> tensor<14x64x8x4xf32> flow.dispatch.tensor.store %4, %1, offsets = [0, 0, 0, 0], sizes = [14, 64, 8, 4], strides = [1, 1, 1, 1] @@ -1960,7 +1960,7 @@ hal.executable private @pack_lowering { %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [250, 500], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<250x500xf32> %3 = tensor.empty() : tensor<64x64x8x4xf32> - %4 = tensor.pack %2 padding_value(%cst : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 4] into %3 + %4 = linalg.pack %2 padding_value(%cst : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 4] into %3 {lowering_config = #iree_codegen.lowering_config} : tensor<250x500xf32> -> tensor<64x64x8x4xf32> flow.dispatch.tensor.store %4, %1, offsets = [0, 0, 0, 0], sizes = [64, 64, 8, 4], strides = [1, 1, 1, 1] @@ -2018,7 +2018,7 @@ hal.executable private @clone_index_computations { %15 = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%13] %16 = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%14] %17 = tensor.empty(%15, %16) : tensor - %18 = tensor.pack %12 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %17 + %18 = linalg.pack %12 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %17 {lowering_config = #iree_codegen.lowering_config} : tensor -> tensor %19 = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%6] @@ -2075,7 +2075,7 @@ hal.executable private @dynamic_unpack { %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor>{%6, %7} %10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%4, %5} -> tensor %11 = tensor.empty(%6, %7) : tensor - %12 = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 + %12 = linalg.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 {lowering_config = #iree_codegen.lowering_config} : tensor -> tensor flow.dispatch.tensor.store %12, %9, offsets = [0, 0], sizes = [%6, %7], strides = [1, 1] : tensor -> !flow.dispatch.tensor>{%6, %7} @@ -2087,7 +2087,7 @@ hal.executable private @dynamic_unpack { // CHECK-LABEL: func.func @dynamic_unpack // CHECK: scf.for // CHECK: scf.for -// CHECK: tensor.unpack +// CHECK: linalg.unpack // ----- @@ -2128,7 +2128,7 @@ hal.executable private @dynamic_unpack_dynamic_tile { %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor>{%6, %7} %10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, %c32, %c16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%4, %5, %c32, %c16} -> tensor %11 = tensor.empty(%6, %7) : tensor - %12 = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [%tile0, %tile1] into %11 + %12 = linalg.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [%tile0, %tile1] into %11 {lowering_config = #iree_codegen.lowering_config} : tensor -> tensor flow.dispatch.tensor.store %12, %9, offsets = [0, 0], sizes = [%6, %7], strides = [1, 1] : tensor -> !flow.dispatch.tensor>{%6, %7} @@ -2140,7 +2140,7 @@ hal.executable private @dynamic_unpack_dynamic_tile { // CHECK-LABEL: func.func @dynamic_unpack_dynamic_tile // CHECK: scf.for // CHECK: scf.for -// CHECK: tensor.unpack +// CHECK: linalg.unpack // ----- @@ -2162,7 +2162,7 @@ hal.executable private @unpack_elem { %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [16, 48, 8, 8], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<16x48x8x8xf32> %3 = tensor.empty() : tensor<128x384xf32> - %4 = tensor.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %3 {lowering_config = #iree_codegen.lowering_config} : tensor<16x48x8x8xf32> -> tensor<128x384xf32> + %4 = linalg.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %3 {lowering_config = #iree_codegen.lowering_config} : tensor<16x48x8x8xf32> -> tensor<128x384xf32> %5 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%4 : tensor<128x384xf32>) outs(%3 : tensor<128x384xf32>) { ^bb0(%in: f32, %out: f32): %6 = arith.addf %in, %in : f32 @@ -2178,7 +2178,7 @@ hal.executable private @unpack_elem { // CHECK-LABEL: func.func @unpack_elem // CHECK: scf.for // CHECK: scf.for -// CHECK: tensor.unpack +// CHECK: linalg.unpack // CHECK: linalg.generic // ----- @@ -2221,7 +2221,7 @@ hal.executable private @dynamic_unpack_fusion { %12 = flow.dispatch.tensor.load %5, offsets = [0], sizes = [16], strides = [1] : !flow.dispatch.tensor> -> tensor<16xi32> %13 = tensor.empty() : tensor<12544x16xi32> %14 = tensor.empty() : tensor<12544x16xi32> - %16 = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [%0#0, %0#1] into %14 {lowering_config = #iree_codegen.lowering_config} : tensor -> tensor<12544x16xi32> + %16 = linalg.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [%0#0, %0#1] into %14 {lowering_config = #iree_codegen.lowering_config} : tensor -> tensor<12544x16xi32> %17 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%cst, %16, %11, %12 : tensor<16xi32>, tensor<12544x16xi32>, tensor<12544xi32>, tensor<16xi32>) outs(%13 : tensor<12544x16xi32>) { ^bb0(%in: i32, %in_0: i32, %in_1: i32, %in_2: i32, %out: i32): %18 = arith.muli %in_1, %c-30_i32 : i32 @@ -2240,7 +2240,7 @@ hal.executable private @dynamic_unpack_fusion { } // CHECK-LABEL: func.func @dynamic_unpack_fusion // CHECK: scf.for -// CHECK: tensor.unpack +// CHECK: linalg.unpack // CHECK: tensor.extract_slice // CHECK: linalg.generic @@ -2300,7 +2300,7 @@ hal.executable private @elem_pack { linalg.yield %23, %25 : f32, f32 } -> (tensor<384x512xf32>, tensor<384x512xf32>) %17 = tensor.empty() : tensor<48x512x8x1xf32> - %18 = tensor.pack %16#0 inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %17 {lowering_config = #iree_codegen.lowering_config} : tensor<384x512xf32> -> tensor<48x512x8x1xf32> + %18 = linalg.pack %16#0 inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %17 {lowering_config = #iree_codegen.lowering_config} : tensor<384x512xf32> -> tensor<48x512x8x1xf32> flow.dispatch.tensor.store %18, %6, offsets = [0, 0, 0, 0], sizes = [48, 512, 8, 1], strides = [1, 1, 1, 1] : tensor<48x512x8x1xf32> -> !flow.dispatch.tensor> flow.dispatch.tensor.store %16#0, %7, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : tensor<384x512xf32> -> !flow.dispatch.tensor> return @@ -2312,7 +2312,7 @@ hal.executable private @elem_pack { // CHECK: scf.for // CHECK: scf.for // CHECK: %[[ELEM:.+]]:2 = linalg.generic -// CHECK: %[[PACK:.+]] = tensor.pack +// CHECK: %[[PACK:.+]] = linalg.pack // CHECK-DAG: flow.dispatch.tensor.store %[[PACK]], {{.*}} sizes = [8, 64, 8, 1] // CHECK-DAG: flow.dispatch.tensor.store %[[ELEM]]#0, {{.*}} sizes = [64, 64] diff --git a/compiler/src/iree/compiler/Codegen/Common/test/vmvx_materialize_encoding.mlir b/compiler/src/iree/compiler/Codegen/Common/test/vmvx_materialize_encoding.mlir index 2f3b91ff7255..0602dc6ae32a 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/vmvx_materialize_encoding.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/vmvx_materialize_encoding.mlir @@ -178,7 +178,7 @@ func.func @set_encoding_dynamic() attributes { // CHECK-SAME: !flow.dispatch.tensor>{%[[TILED_D0]], %[[TILED_D1]]} // CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]] // CHECK: %[[EMPTY:.+]] = tensor.empty -// CHECK: %[[PACK:.+]] = tensor.pack +// CHECK: %[[PACK:.+]] = linalg.pack // CHECK-SAME: %[[INPUT]] padding_value(%[[CST]] : f32) // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[EMPTY]] // CHECK: flow.dispatch.tensor.store %[[PACK]], %[[OUTPUT_BINDING]] @@ -229,7 +229,7 @@ func.func @unset_encoding_dynamic() attributes { // CHECK: %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]] // CHECK-SAME: offsets = [0, 0, 0, 0], sizes = [%[[TILED_D0]], %[[TILED_D1]], 8, 4], strides = [1, 1, 1, 1] // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[D0]], %[[D1]]) -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[INPUT]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[INPUT]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[EMPTY]] // CHECK-DAG: flow.dispatch.tensor.store %[[UNPACK]], %[[OUTPUT_BINDING]] diff --git a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenTypes.h b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenTypes.h index 13a393d33914..55945f2329ca 100644 --- a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenTypes.h +++ b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenTypes.h @@ -84,7 +84,7 @@ struct TileSwizzle { /// Container of information needed to materialize the layout transformations. struct MaterializeEncodingInfo { - // The next 3 fields are used to create a `tensor.pack` or `tensor.unpack` op, + // The next 3 fields are used to create a `linalg.pack` or `linalg.unpack` op, // changing the overall layout between row-major and tiled (where each tile is // row-major). SmallVector innerDimsPos; diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp index 3dc8e36d3b4b..6fbc9c45ff8a 100644 --- a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp +++ b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp @@ -364,7 +364,7 @@ template static FailureOr> getSourceAndDestFromPackUnPackOp(RewriterBase &rewriter, OpTy op, const BufferizationOptions &options) { - static_assert(llvm::is_one_of::value); + static_assert(llvm::is_one_of::value); Value source; auto maybeBuffer = getBuffer(rewriter, op.getSource(), options); if (failed(maybeBuffer)) @@ -385,7 +385,7 @@ getSourceAndDestFromPackUnPackOp(RewriterBase &rewriter, OpTy op, return std::make_pair(source, dest); } -static LogicalResult bufferizePackOp(RewriterBase &rewriter, tensor::PackOp op, +static LogicalResult bufferizePackOp(RewriterBase &rewriter, linalg::PackOp op, const BufferizationOptions &options) { // Take a guard before anything else. OpBuilder::InsertionGuard g(rewriter); @@ -410,7 +410,7 @@ static LogicalResult bufferizePackOp(RewriterBase &rewriter, tensor::PackOp op, } static LogicalResult bufferizeUnPackOp(RewriterBase &rewriter, - tensor::UnPackOp op, + linalg::UnPackOp op, const BufferizationOptions &options) { // Take a guard before anything else. OpBuilder::InsertionGuard g(rewriter); @@ -489,9 +489,9 @@ struct PackUnPackOpInterface LogicalResult bufferize(Operation *op, RewriterBase &rewriter, const BufferizationOptions &options) const { return TypeSwitch(op) - .template Case( + .template Case( [&](auto pack) { return bufferizePackOp(rewriter, pack, options); }) - .template Case([&](auto unpack) { + .template Case([&](auto unpack) { return bufferizeUnPackOp(rewriter, unpack, options); }) .Default([](auto) { return failure(); }); @@ -643,10 +643,11 @@ void registerBufferizationInterfaces(DialectRegistry ®istry) { IREE::LinalgExt::AttentionOp::attachInterface< LinalgExtOpInterface>(*ctx); }); - registry.addExtension(+[](MLIRContext *ctx, tensor::TensorDialect *dialect) { - tensor::PackOp::attachInterface>( + registry.insert(); + registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) { + linalg::PackOp::attachInterface>( *ctx); - tensor::UnPackOp::attachInterface>( + linalg::UnPackOp::attachInterface>( *ctx); }); } diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.cpp index f669b5e0e17b..6264b843bc22 100644 --- a/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.cpp +++ b/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.cpp @@ -227,6 +227,10 @@ void registerPartitionableLoopsInterfaceModels(DialectRegistry ®istry) { #define GET_OP_LIST registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) { + linalg::PackOp::attachInterface< + OuterParallelAsPartitionableLoops>(*ctx); + linalg::UnPackOp::attachInterface< + OuterParallelAsPartitionableLoops>(*ctx); registerInterfaceForLinalgOps< #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" >(ctx); @@ -263,12 +267,8 @@ void registerPartitionableLoopsInterfaceModels(DialectRegistry ®istry) { *ctx); }); registry.addExtension(+[](MLIRContext *ctx, tensor::TensorDialect *dialect) { - tensor::PackOp::attachInterface< - OuterParallelAsPartitionableLoops>(*ctx); tensor::PadOp::attachInterface< OuterParallelAsPartitionableLoops>(*ctx); - tensor::UnPackOp::attachInterface< - OuterParallelAsPartitionableLoops>(*ctx); }); registry.addExtension( +[](MLIRContext *ctx, IREE::GPU::IREEGPUDialect *dialect) { diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp index 0c52f1ae2b06..0dbc504d7de3 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp @@ -1720,7 +1720,7 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, DispatchLoweringPassPipeline::Mmt4dTilingExpert); } -static bool isPackMatmulLHS(tensor::PackOp op) { +static bool isPackMatmulLHS(linalg::PackOp op) { // linalg.batch_matmul LHS shape if (op.getSourceRank() == 3 && op.getInnerDimsPos().size() == 2 && op.getInnerDimsPos()[0] == 1 && op.getInnerDimsPos()[1] == 2) { @@ -1735,7 +1735,7 @@ static bool isPackMatmulLHS(tensor::PackOp op) { /// configurations and target CPU features. static SmallVector getPackVectorTileSizes(mlir::FunctionOpInterface entryPointFn, - tensor::PackOp op) { + linalg::PackOp op) { SmallVector tileSizes(op.getSourceRank(), 1); auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(entryPointFn); int64_t vectorSize = getVectorSize(entryPointFn, op.getSourceType()); @@ -1755,7 +1755,7 @@ getPackVectorTileSizes(mlir::FunctionOpInterface entryPointFn, } static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, - tensor::PackOp op) { + linalg::PackOp op) { assert(!getLoweringConfig(op) && "expected lowering_config is not set"); int srcRank = op.getSourceRank(); @@ -1803,7 +1803,7 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, } static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, - tensor::UnPackOp op) { + linalg::UnPackOp op) { DistributionHeuristicConfig distConfig; distConfig.maxTileSizes.resize(op.getDestRank(), clDefaultDistTileSize); SmallVector distTileSizes = @@ -2629,7 +2629,7 @@ setRootConfigImpl(mlir::FunctionOpInterface entryPointFn, Operation *op, initCPULaunchConfig); }) .Case( [&](auto op) { return setRootConfig(entryPointFn, op); }) .Case &distTileSizes, SmallVector ¶llelVecTileSizes) { @@ -2716,9 +2716,9 @@ adjustTileSizesForPackOp(mlir::FunctionOpInterface entryPointFn, } /// Adjusts the tile sizes (carried by `rootOp`) to be aligned with -/// tensor.unpack inner tile sizes, if there are tensor.unpack producers. If the +/// linalg.unpack inner tile sizes, if there are linalg.unpack producers. If the /// tile sizes are not aligned, a stack buffer is needed because of -/// tensor.unpack tiling implementations. +/// linalg.unpack tiling implementations. static LogicalResult adjustTileSizesForUnPackOp(mlir::FunctionOpInterface entryPointFn, Operation *rootOp) { @@ -2733,7 +2733,7 @@ adjustTileSizesForUnPackOp(mlir::FunctionOpInterface entryPointFn, bool foundUnPackOp = false; SmallVector alignedSizes(linalgOp.getNumLoops(), 1); for (OpOperand *opOperand : linalgOp.getDpsInputOperands()) { - auto unpackOp = opOperand->get().getDefiningOp(); + auto unpackOp = opOperand->get().getDefiningOp(); if (!unpackOp) continue; @@ -2874,7 +2874,7 @@ adjustTileSizesForGenericOp(mlir::FunctionOpInterface entryPointFn, /// ^bb0(%in: f32, %in_2: f32, %out: f32): /// ... /// } -> tensor<384x1024xf32> -/// %pack = tensor.pack %13 +/// %pack = linalg.pack %13 /// inner_dims_pos = [0, 1] /// inner_tiles = [16, 1] /// into %14 : tensor<384x1024xf32> -> tensor<24x1024x16x1xf32> @@ -2944,7 +2944,7 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn, // Given there are 3 generic ops in the dispatch: // %rootOp = linalg.generic {iterator_types = ["reduction", "parallel"]} ... // %2 = linalg.generic {iterator_types = ["parallel", "parallel"]} - // %3 = tensor.pack %2 + // %3 = linalg.pack %2 // Assume the distribution and parallel vector tile sizes from %rootOp is: // [[X1, 0], [X2, 0]] // Then the generic op %2 set the missing parallel vector tile sizes on its @@ -2970,7 +2970,7 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn, if (op == rootOperation) continue; - if (auto packOp = dyn_cast(op)) { + if (auto packOp = dyn_cast(op)) { if (failed(adjustTileSizesForPackOp(entryPointFn, packOp, distTileSizes, parallelVecTileSizes))) { return failure(); @@ -3065,7 +3065,7 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn, scalableTileFlagsList.push_back(commonVecScalableTileFlags); bool setUpOK = TypeSwitch(op) - .Case([&](auto packOp) { + .Case([&](auto packOp) { for (auto flags : rootLoweringConfig.getScalableTileFlagVals()) { // TODO: Handle scalable flags @@ -3119,7 +3119,7 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn, }); // TODO: (awarzynski) This is effectively tracking the case of - // tensor.pack + scalable flags, which is not support ATM (see TODO + // linalg.pack + scalable flags, which is not support ATM (see TODO // above). Remove once that's implemented. if (!setUpOK) return failure(); diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp index 6f15257417ae..6fe11c03ae4f 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp @@ -36,7 +36,7 @@ void collectLoopsToPeel(Operation *op, .Case([](auto linalgOp) { return linalgOp.getNumLoops(); }) - .Case([](auto packOp) { + .Case([](auto packOp) { return packOp.getSourceRank(); }) .Default([](auto) { return 0; }); @@ -66,7 +66,7 @@ void LLVMCPUPeelPass::runOnOperation() { llvm::SmallSetVector uniqueLoopsToPeel; funcOp.walk([&](Operation *op) { - if (isa(op)) { + if (isa(op)) { LLVM_DEBUG(llvm::dbgs() << "Gather loops to peel from candidate op:\n" << *op << "\n"); collectLoopsToPeel(op, uniqueLoopsToPeel); diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir index a44a37000a17..3ec520150259 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir @@ -96,7 +96,7 @@ module { %3 = affine.apply #map1(%arg2) %extracted_slice = tensor.extract_slice %arg0[%3, %arg4] [16, %2] [1, 1] : tensor to tensor<16x?xf32> %extracted_slice_1 = tensor.extract_slice %arg5[%arg2, %arg4, 0, 0] [1, %2, 16, 1] [1, 1, 1, 1] : tensor to tensor<1x?x16x1xf32> - %pack = tensor.pack %extracted_slice inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %extracted_slice_1 {lowering_config = #config} : tensor<16x?xf32> -> tensor<1x?x16x1xf32> + %pack = linalg.pack %extracted_slice inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %extracted_slice_1 {lowering_config = #config} : tensor<16x?xf32> -> tensor<1x?x16x1xf32> %inserted_slice = tensor.insert_slice %pack into %arg5[%arg2, %arg4, 0, 0] [1, %2, 16, 1] [1, 1, 1, 1] : tensor<1x?x16x1xf32> into tensor scf.yield %inserted_slice : tensor } @@ -108,6 +108,6 @@ module { // CHECK-LABEL: func.func @peel_pack // CHECK: scf.for // CHECK: scf.for -// CHECK: tensor.pack {{.*}} : tensor<16x16xf32> -> tensor<1x16x16x1xf32> +// CHECK: linalg.pack {{.*}} : tensor<16x16xf32> -> tensor<1x16x16x1xf32> // CHECK: scf.for -// CHECK: tensor.pack {{.*}} : tensor<16x?xf32> -> tensor<1x?x16x1xf32> +// CHECK: linalg.pack {{.*}} : tensor<16x?xf32> -> tensor<1x?x16x1xf32> diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir index f8827d14338c..823928b08bec 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir @@ -27,7 +27,7 @@ module { %10 = arith.maximumf %9, %cst_0 : f32 linalg.yield %10 : f32 } -> tensor<384x512xf32> - %pack = tensor.pack %7 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %5 : tensor<384x512xf32> -> tensor<24x512x16x1xf32> + %pack = linalg.pack %7 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %5 : tensor<384x512xf32> -> tensor<24x512x16x1xf32> flow.dispatch.tensor.store %pack, %2, offsets = [0, 0, 0, 0], sizes = [24, 512, 16, 1], strides = [1, 1, 1, 1] : tensor<24x512x16x1xf32> -> !flow.dispatch.tensor> return } @@ -64,7 +64,7 @@ module { %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [24, 32, 16, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<24x32x16x16xf32> %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor> -> tensor<512xf32> %5 = tensor.empty() : tensor<384x512xf32> - %unpack = tensor.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %5 : tensor<24x32x16x16xf32> -> tensor<384x512xf32> + %unpack = linalg.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %5 : tensor<24x32x16x16xf32> -> tensor<384x512xf32> %6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%4, %unpack : tensor<512xf32>, tensor<384x512xf32>) outs(%5 : tensor<384x512xf32>) { ^bb0(%in: f32, %in_1: f32, %out: f32): %7 = arith.addf %in, %in_1 : f32 @@ -101,7 +101,7 @@ module { %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [383, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<383x512xf32> %3 = tensor.empty() : tensor<24x512x16x1xf32> - %pack = tensor.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<383x512xf32> -> tensor<24x512x16x1xf32> + %pack = linalg.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<383x512xf32> -> tensor<24x512x16x1xf32> flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [24, 512, 16, 1], strides = [1, 1, 1, 1] : tensor<24x512x16x1xf32> -> !flow.dispatch.tensor> return } diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir index 6936adef7e01..e8338e89e770 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir @@ -407,7 +407,7 @@ func.func @fuse_inputs_reduction() attributes {hal.executable.target = #executab %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [64, 1, 1, 16, 16], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x1x1x16x16xf32> %3 = tensor.empty() : tensor<64x16x16xf32> %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<64x16x16xf32>) -> tensor<64x16x16xf32> - %unpack = tensor.unpack %2 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %3 : tensor<64x1x1x16x16xf32> -> tensor<64x16x16xf32> + %unpack = linalg.unpack %2 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %3 : tensor<64x1x1x16x16xf32> -> tensor<64x16x16xf32> %5 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%unpack : tensor<64x16x16xf32>) outs(%4 : tensor<64x16x16xf32>) { ^bb0(%in: f32, %out: f32): %6 = arith.addf %out, %in : f32 diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir index cba1af4fec2c..faeb04a02d9a 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir @@ -255,7 +255,7 @@ func.func @pack() attributes {hal.executable.target = #executable_target_system_ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [20, 40], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<20x40xf32> %3 = tensor.empty() : tensor<4x48x8x1xf32> - %pack = tensor.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %3 : tensor<20x40xf32> -> tensor<4x48x8x1xf32> + %pack = linalg.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %3 : tensor<20x40xf32> -> tensor<4x48x8x1xf32> flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [4, 48, 8, 1], strides = [1, 1, 1, 1] : tensor<4x48x8x1xf32> -> !flow.dispatch.tensor> return } @@ -263,7 +263,7 @@ func.func @pack() attributes {hal.executable.target = #executable_target_system_ // CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info // CHECK: func.func @pack() // CHECK-SAME: translation_info = #[[TRANSLATION]] -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK-SAME: lowering_config = #[[CONFIG]] // ----- @@ -288,7 +288,7 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor>{%6, %7} %10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%4, %5} -> tensor %11 = tensor.empty(%6, %7) : tensor - %unpack = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 : tensor -> tensor + %unpack = linalg.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 : tensor -> tensor flow.dispatch.tensor.store %unpack, %9, offsets = [0, 0], sizes = [%6, %7], strides = [1, 1] : tensor -> !flow.dispatch.tensor>{%6, %7} return } @@ -296,7 +296,7 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl // CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info // CHECK: func.func @unpack_outer_dynamic() // CHECK-SAME: translation_info = #[[TRANSLATION]] -// CHECK: tensor.unpack +// CHECK: linalg.unpack // CHECK-SAME: lowering_config = #[[CONFIG]] // ----- @@ -322,7 +322,7 @@ func.func @unpack_fully_dynamic() attributes {hal.executable.target = #executabl %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor>{%8, %9} %14 = flow.dispatch.tensor.load %12, offsets = [0, 0, 0, 0], sizes = [%6, %7, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%6, %7, %10, %11} -> tensor %15 = tensor.empty(%8, %9) : tensor - %unpack = tensor.unpack %14 inner_dims_pos = [0, 1] inner_tiles = [%10, %11] into %15 : tensor -> tensor + %unpack = linalg.unpack %14 inner_dims_pos = [0, 1] inner_tiles = [%10, %11] into %15 : tensor -> tensor flow.dispatch.tensor.store %unpack, %13, offsets = [0, 0], sizes = [%8, %9], strides = [1, 1] : tensor -> !flow.dispatch.tensor>{%8, %9} return } @@ -330,7 +330,7 @@ func.func @unpack_fully_dynamic() attributes {hal.executable.target = #executabl // CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info // CHECK: func.func @unpack_fully_dynamic() // CHECK-SAME: translation_info = #[[TRANSLATION]] -// CHECK: tensor.unpack +// CHECK: linalg.unpack // CHECK-SAME: lowering_config = #[[CONFIG]] // ----- diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir index fff8747cf504..3c6301d3dcaf 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir @@ -1064,7 +1064,7 @@ func.func @pack() attributes {hal.executable.target = #executable_target_embedde %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [20, 40], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<20x40xf32> %3 = tensor.empty() : tensor<2x48x16x1xf32> - %pack = tensor.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<20x40xf32> -> tensor<2x48x16x1xf32> + %pack = linalg.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<20x40xf32> -> tensor<2x48x16x1xf32> flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [2, 48, 16, 1], strides = [1, 1, 1, 1] : tensor<2x48x16x1xf32> -> !flow.dispatch.tensor> return } @@ -1073,7 +1073,7 @@ func.func @pack() attributes {hal.executable.target = #executable_target_embedde // CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info // CHECK: func.func @pack() // CHECK-SAME: translation_info = #[[TRANSLATION]] -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK-SAME: lowering_config = #[[CONFIG]] // ----- @@ -1090,7 +1090,7 @@ func.func @pack_f16() attributes {hal.executable.target = #executable_target_emb %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [20, 40], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<20x40xf16> %3 = tensor.empty() : tensor<2x48x16x1xf16> - %pack = tensor.pack %2 padding_value(%cst : f16) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<20x40xf16> -> tensor<2x48x16x1xf16> + %pack = linalg.pack %2 padding_value(%cst : f16) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<20x40xf16> -> tensor<2x48x16x1xf16> flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [2, 48, 16, 1], strides = [1, 1, 1, 1] : tensor<2x48x16x1xf16> -> !flow.dispatch.tensor> return } @@ -1099,7 +1099,7 @@ func.func @pack_f16() attributes {hal.executable.target = #executable_target_emb // CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info // CHECK: func.func @pack_f16() // CHECK-SAME: translation_info = #[[TRANSLATION]] -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK-SAME: lowering_config = #[[CONFIG]] // ----- @@ -1115,7 +1115,7 @@ func.func @pack_many_elements() attributes {hal.executable.target = #executable_ %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1200, 500000], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<1200x500000xf32> %3 = tensor.empty() : tensor<31250x1200x16x1xf32> - %pack = tensor.pack %2 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %3 : tensor<1200x500000xf32> -> tensor<31250x1200x16x1xf32> + %pack = linalg.pack %2 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %3 : tensor<1200x500000xf32> -> tensor<31250x1200x16x1xf32> flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [31250, 1200, 16, 1], strides = [1, 1, 1, 1] : tensor<31250x1200x16x1xf32> -> !flow.dispatch.tensor> return } @@ -1124,7 +1124,7 @@ func.func @pack_many_elements() attributes {hal.executable.target = #executable_ // CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info // CHECK: func.func @pack_many_elements() // CHECK-SAME: translation_info = #[[TRANSLATION]] -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK-SAME: lowering_config = #[[CONFIG]] // ----- @@ -1148,7 +1148,7 @@ func.func @unpack_generic_pack(%arg0: !stream.binding {stream.alignment = 64 : i %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor> -> tensor<512xf32> %5 = tensor.empty() : tensor<24x512x16x1xf32> %6 = tensor.empty() : tensor<384x512xf32> - %unpack = tensor.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %6 : tensor<24x32x16x16xf32> -> tensor<384x512xf32> + %unpack = linalg.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %6 : tensor<24x32x16x16xf32> -> tensor<384x512xf32> %7 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%4, %unpack : tensor<512xf32>, tensor<384x512xf32>) outs(%6 : tensor<384x512xf32>) { ^bb0(%in: f32, %in_1: f32, %out: f32): %8 = arith.addf %in, %in_1 : f32 @@ -1156,7 +1156,7 @@ func.func @unpack_generic_pack(%arg0: !stream.binding {stream.alignment = 64 : i %10 = arith.maximumf %9, %cst_0 : f32 linalg.yield %10 : f32 } -> tensor<384x512xf32> - %pack = tensor.pack %7 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %5 : tensor<384x512xf32> -> tensor<24x512x16x1xf32> + %pack = linalg.pack %7 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %5 : tensor<384x512xf32> -> tensor<24x512x16x1xf32> flow.dispatch.tensor.store %pack, %2, offsets = [0, 0, 0, 0], sizes = [24, 512, 16, 1], strides = [1, 1, 1, 1] : tensor<24x512x16x1xf32> -> !flow.dispatch.tensor> return } @@ -1166,11 +1166,11 @@ func.func @unpack_generic_pack(%arg0: !stream.binding {stream.alignment = 64 : i // CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info // CHECK: func.func @unpack_generic_pack( // CHECK-SAME: translation_info = #[[TRANSLATION]] -// CHECK: tensor.unpack +// CHECK: linalg.unpack // CHECK-SAME: lowering_config = #[[CONFIG2]] // CHECK: linalg.generic // CHECK-SAME: lowering_config = #[[CONFIG2]] -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK-SAME: lowering_config = #[[CONFIG1]] // ----- @@ -1193,7 +1193,7 @@ func.func @elem_pack() attributes {hal.executable.target = #executable_target_em linalg.yield %6 : f32 } -> tensor<128x384xf32> %5 = tensor.empty() : tensor<16x384x8x1xf32> - %pack = tensor.pack %4 inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %5 : tensor<128x384xf32> -> tensor<16x384x8x1xf32> + %pack = linalg.pack %4 inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %5 : tensor<128x384xf32> -> tensor<16x384x8x1xf32> flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [16, 384, 8, 1], strides = [1, 1, 1, 1] : tensor<16x384x8x1xf32> -> !flow.dispatch.tensor> return } @@ -1205,7 +1205,7 @@ func.func @elem_pack() attributes {hal.executable.target = #executable_target_em // CHECK-SAME: translation_info = #[[TRANSLATION]] // CHECK: linalg.generic // CHECK-SAME: lowering_config = #[[CONFIG1]] -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK-SAME: lowering_config = #[[CONFIG2]] // ----- @@ -1230,7 +1230,7 @@ func.func @transpose_pack() attributes {hal.executable.target = #executable_targ linalg.yield %in : f32 } -> tensor<768x30522xf32> %5 = tensor.empty() : tensor<1908x768x16x1xf32> - %pack = tensor.pack %4 padding_value(%cst : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %5 : tensor<768x30522xf32> -> tensor<1908x768x16x1xf32> + %pack = linalg.pack %4 padding_value(%cst : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %5 : tensor<768x30522xf32> -> tensor<1908x768x16x1xf32> flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [1908, 768, 16, 1], strides = [1, 1, 1, 1] : tensor<1908x768x16x1xf32> -> !flow.dispatch.tensor> return } @@ -1242,7 +1242,7 @@ func.func @transpose_pack() attributes {hal.executable.target = #executable_targ // CHECK-SAME: translation_info = #[[TRANSLATION]] // CHECK: linalg.generic // CHECK-SAME: lowering_config = #[[CONFIG1]] -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK-SAME: lowering_config = #[[CONFIG2]] // ----- @@ -1295,7 +1295,7 @@ func.func @reduction_broadcast_pack() attributes {hal.executable.target = #execu %22 = arith.addf %21, %20 : f32 linalg.yield %22 : f32 } -> tensor<384x1024xf32> - %pack = tensor.pack %14 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %9 : tensor<384x1024xf32> -> tensor<24x1024x16x1xf32> + %pack = linalg.pack %14 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %9 : tensor<384x1024xf32> -> tensor<24x1024x16x1xf32> flow.dispatch.tensor.store %pack, %4, offsets = [0, 0, 0, 0], sizes = [24, 1024, 16, 1], strides = [1, 1, 1, 1] : tensor<24x1024x16x1xf32> -> !flow.dispatch.tensor> return } @@ -1313,7 +1313,7 @@ func.func @reduction_broadcast_pack() attributes {hal.executable.target = #execu // CHECK-SAME: lowering_config = #[[CONFIG2]] // CHECK: linalg.generic // CHECK-SAME: lowering_config = #[[CONFIG3]] -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK-SAME: lowering_config = #[[CONFIG4]] // ----- @@ -1345,7 +1345,7 @@ func.func @reduction_pack() attributes {hal.executable.target = #executable_targ %12 = arith.addf %out, %11 : f32 linalg.yield %12 : f32 } -> tensor<384x1024xf32> - %pack = tensor.pack %9 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %5 : tensor<384x1024xf32> -> tensor<1024x24x16x1xf32> + %pack = linalg.pack %9 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %5 : tensor<384x1024xf32> -> tensor<1024x24x16x1xf32> flow.dispatch.tensor.store %pack, %2, offsets = [0, 0, 0, 0], sizes = [1024, 24, 16, 1], strides = [1, 1, 1, 1] : tensor<1024x24x16x1xf32> -> !flow.dispatch.tensor> return } @@ -1360,7 +1360,7 @@ func.func @reduction_pack() attributes {hal.executable.target = #executable_targ // CHECK-SAME: lowering_config = #[[CONFIG1]] // CHECK: linalg.generic // CHECK-SAME: lowering_config = #[[CONFIG2]] -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK-SAME: lowering_config = #[[CONFIG3]] // ----- @@ -1377,7 +1377,7 @@ func.func @unpack_static() attributes {hal.executable.target = #executable_targe %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [64, 256, 16, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<64x256x16x16xf32> %3 = tensor.empty() : tensor<1024x4096xf32> - %unpack = tensor.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<64x256x16x16xf32> -> tensor<1024x4096xf32> + %unpack = linalg.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<64x256x16x16xf32> -> tensor<1024x4096xf32> flow.dispatch.tensor.store %unpack, %1, offsets = [0, 0], sizes = [1024, 4096], strides = [1, 1] : tensor<1024x4096xf32> -> !flow.dispatch.tensor> return } @@ -1386,7 +1386,7 @@ func.func @unpack_static() attributes {hal.executable.target = #executable_targe // CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info // CHECK: func.func @unpack_static() // CHECK-SAME: translation_info = #[[TRANSLATION]] -// CHECK: tensor.unpack +// CHECK: linalg.unpack // CHECK-SAME: lowering_config = #[[CONFIG]] // ----- @@ -1409,7 +1409,7 @@ func.func @unpack_elem() attributes {hal.executable.target = #executable_target_ %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [128], strides = [1] : !flow.dispatch.tensor> -> tensor<128xf32> %5 = tensor.empty() : tensor<128x384xf32> %6 = tensor.empty() : tensor<384x128xf32> - %unpack = tensor.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %6 : tensor<48x64x8x2xf32> -> tensor<384x128xf32> + %unpack = linalg.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %6 : tensor<48x64x8x2xf32> -> tensor<384x128xf32> %7 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel"]} ins(%4, %unpack : tensor<128xf32>, tensor<384x128xf32>) outs(%5 : tensor<128x384xf32>) { ^bb0(%in: f32, %in_0: f32, %out: f32): %8 = arith.addf %in, %in_0 : f32 diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile-root-fuse-consumer-producer.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile-root-fuse-consumer-producer.mlir index 0433b677f24b..f0ca16903d17 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile-root-fuse-consumer-producer.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile-root-fuse-consumer-producer.mlir @@ -66,7 +66,7 @@ func.func @quantized_matmul(%arg0: tensor<2x4x128x16x1xi8>, %arg1: tensor<2x4x16 %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<2x4x688x16x16xf32>) -> tensor<2x4x688x16x16xf32> %6 = linalg.batch_mmt4d {lowering_config = #config2} ins(%1, %3 : tensor<2x4x128x16x1xf32>, tensor<2x688x128x16x1xf32>) outs(%5 : tensor<2x4x688x16x16xf32>) -> tensor<2x4x688x16x16xf32> %7 = tensor.empty() : tensor<2x11008x64xf32> - %unpack = tensor.unpack %6 outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 16] into %7 : tensor<2x4x688x16x16xf32> -> tensor<2x11008x64xf32> + %unpack = linalg.unpack %6 outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 16] into %7 : tensor<2x4x688x16x16xf32> -> tensor<2x11008x64xf32> return %unpack : tensor<2x11008x64xf32> } // CHECK: func.func @quantized_matmul( @@ -76,7 +76,7 @@ func.func @quantized_matmul(%arg0: tensor<2x4x128x16x1xi8>, %arg1: tensor<2x4x16 // CHECK: linalg.generic // CHECK: linalg.fill // CHECK: linalg.batch_mmt4d -// CHECK: tensor.unpack +// CHECK: linalg.unpack // CHECK: } diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp index a8ef7b779f56..c6e0a3c2d11a 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp @@ -1637,7 +1637,7 @@ getDefaultWorkgroupTileSizesForPackUnPack(TilingInterface op, static LogicalResult setPackConfig(IREE::GPU::TargetAttr target, mlir::FunctionOpInterface entryPoint, - tensor::PackOp packOp) { + linalg::PackOp packOp) { SmallVector tileSizes = getDefaultWorkgroupTileSizesForPackUnPack( cast(packOp.getOperation()), target.getPreferredSubgroupSize()); @@ -2436,7 +2436,7 @@ static LogicalResult setRootConfig(IREE::GPU::TargetAttr target, LDBG("Winograd Config"); return setWinogradOpConfig(target, entryPointFn, winogradOp); }) - .Case([&](auto packOp) { + .Case([&](auto packOp) { LDBG("Pack Config"); return setPackConfig(target, entryPointFn, packOp); }) @@ -2534,15 +2534,15 @@ LogicalResult initGPULaunchConfig(FunctionOpInterface funcOp) { Operation *rootOperation = nullptr; - // Find the root operation. linalg.generic, linalg.fill, tensor.pack, - // tensor.unpack, and scatter are not root operations if there are other + // Find the root operation. linalg.generic, linalg.fill, linalg.pack, + // linalg.unpack, and scatter are not root operations if there are other // compute operations present. Also, construct a set of generic ops that // are to be skipped. These generic ops that are used to compute scatter // indices are not root operations. llvm::SmallDenseSet genericToSkip; for (Operation *op : llvm::reverse(computeOps)) { if (!isa(op)) { + linalg::PackOp, linalg::UnPackOp>(op)) { rootOperation = op; break; } @@ -2583,7 +2583,7 @@ LogicalResult initGPULaunchConfig(FunctionOpInterface funcOp) { // Pack and unpack ops take priority over scatter and fill ops as the root op. if (!rootOperation) { for (Operation *op : llvm::reverse(computeOps)) { - if (isa(op)) { + if (isa(op)) { rootOperation = op; break; } diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp index bc9465ce4d31..ca2fbe1f057a 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp @@ -49,8 +49,8 @@ getPaddedShapeFromTensorLoad(IREE::Flow::DispatchTensorLoadOp tensorLoad, } static FailureOr rewriteAsPaddedOp(IRRewriter &rewriter, - tensor::UnPackOp op, - tensor::UnPackOp &paddedOp) { + linalg::UnPackOp op, + linalg::UnPackOp &paddedOp) { Location loc = op.getLoc(); // Set IP after op because we also take the dims of the original output. @@ -68,7 +68,7 @@ static FailureOr rewriteAsPaddedOp(IRRewriter &rewriter, return failure(); auto paddedShape = *maybePaddedShape; - // Pad to the shape that makes tensor.unpack ops produce full tiles. + // Pad to the shape that makes linalg.unpack ops produce full tiles. SmallVector innerTiles = op.getStaticTiles(); ArrayRef dimPos = op.getInnerDimsPos(); for (auto [pos, size] : llvm::zip_equal(dimPos, innerTiles)) { @@ -85,7 +85,7 @@ static FailureOr rewriteAsPaddedOp(IRRewriter &rewriter, SmallVector paddedOperands = {op.getSource(), paddedValue}; paddedOperands.append(op.getInnerTiles().begin(), op.getInnerTiles().end()); - paddedOp = rewriter.create( + paddedOp = rewriter.create( loc, TypeRange{paddedValue.getType()}, paddedOperands, op->getAttrs()); // Slice out the original shape from the padded result to pass on to @@ -154,8 +154,8 @@ struct LLVMGPUTensorPadPass final rewriter.replaceOp(linalgOp, newResults); }); - funcOp.walk([&](tensor::UnPackOp unpackOp) { - tensor::UnPackOp paddedOp; + funcOp.walk([&](linalg::UnPackOp unpackOp) { + linalg::UnPackOp paddedOp; FailureOr newResult = rewriteAsPaddedOp(rewriter, unpackOp, paddedOp); if (failed(newResult)) { diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp index 4783036a7172..53c9cd3ed7be 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp @@ -333,12 +333,12 @@ static void addGPUBufferizePasses(OpPassManager &funcPassManager) { /// op is a PackOp with a DispatchTensorLoadOp producer, or an UnPackOp with /// only DispatchTensorStoreOp consumers. LogicalResult isAtBoundary(Operation *op) { - if (isa(op)) { + if (isa(op)) { if (isa_and_nonnull( op->getOperand(0).getDefiningOp())) { return success(); } - } else if (isa(op)) { + } else if (isa(op)) { if (llvm::all_of(op->getUsers(), [](Operation *user) { return isa(user); })) { diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir index 5901999d5adb..e78b11ba20de 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir @@ -525,7 +525,7 @@ func.func @scatter_as_root_op(%arg0: tensor<4x?xi64>, func.func @set_encoding_gpu(%0 : tensor<1234x567xi8>) -> tensor<10x9x8x4x4x4x2x8xi8> { %c0_i8 = arith.constant 0 : i8 %22 = tensor.empty() : tensor<10x9x128x64xi8> - %pack = tensor.pack %0 padding_value(%c0_i8 : i8) + %pack = linalg.pack %0 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [128, 64] into %22 : tensor<1234x567xi8> -> tensor<10x9x128x64xi8> %expanded = tensor.expand_shape %pack [[0], [1], [2, 3, 4], [5, 6, 7]] @@ -565,7 +565,7 @@ func.func @unset_encoding_gpu(%arg0: tensor<10x5x4x8x2x4x16x4xi32>) -> tensor<12 %collapsed = tensor.collapse_shape %transposed [[0], [1], [2, 3, 4], [5, 6, 7]] : tensor<10x5x4x8x4x4x16x2xi32> into tensor<10x5x128x128xi32> %1 = tensor.empty() : tensor<1234x567xi32> - %unpack = tensor.unpack %collapsed + %unpack = linalg.unpack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [128, 128] into %1 : tensor<10x5x128x128xi32> -> tensor<1234x567xi32> return %unpack : tensor<1234x567xi32> @@ -591,7 +591,7 @@ func.func @pack_dynamic_producer(%arg0: tensor, %d0: index, %d1: index, linalg.yield %in : i8 } -> tensor %init1 = tensor.empty(%d2, %d3) : tensor - %pack = tensor.pack %0 padding_value(%c0_i8 : i8) + %pack = linalg.pack %0 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %init1 : tensor -> tensor return %pack : tensor @@ -617,7 +617,7 @@ func.func @pack_full_tile(%arg0: tensor<32x32xi8>) -> tensor<1x1x32x32xi8> { linalg.yield %in : i8 } -> tensor<32x32xi8> %init1 = tensor.empty() : tensor<1x1x32x32xi8> - %pack = tensor.pack %0 padding_value(%c0_i8 : i8) + %pack = linalg.pack %0 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %init1 : tensor<32x32xi8> -> tensor<1x1x32x32xi8> return %pack : tensor<1x1x32x32xi8> @@ -643,7 +643,7 @@ func.func @pack_dynamic_tile(%arg0: tensor<32x32xi8>, %d0: index, %d1: index, %t linalg.yield %in : i8 } -> tensor<32x32xi8> %init1 = tensor.empty(%d0, %d1, %tile0, %tile1) : tensor - %pack = tensor.pack %0 padding_value(%c0_i8 : i8) + %pack = linalg.pack %0 padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [%tile0, %tile1] into %init1 : tensor<32x32xi8> -> tensor return %pack : tensor diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir index 642c6ed1a179..efac8f5670ca 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir @@ -455,7 +455,7 @@ func.func @dynamic_pack_2x2() { %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor>{%6, %7} %10 = flow.dispatch.tensor.load %8, offsets = [0, 0], sizes = [%4, %5], strides = [1, 1] : !flow.dispatch.tensor>{%4, %5} -> tensor %11 = tensor.empty(%6, %7) : tensor - %pack = tensor.pack %10 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %11 : tensor -> tensor + %pack = linalg.pack %10 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %11 : tensor -> tensor flow.dispatch.tensor.store %pack, %9, offsets = [0, 0, 0, 0], sizes = [%6, %7, 2, 2], strides = [1, 1, 1, 1] : tensor -> !flow.dispatch.tensor>{%6, %7} return } @@ -464,7 +464,7 @@ func.func @dynamic_pack_2x2() { // SM80-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info // SM80: func.func @dynamic_pack_2x2() // SM80-SAME: translation_info = #[[TRANSLATION]] -// SM80: tensor.pack +// SM80: linalg.pack // SM80-SAME: lowering_config = #[[CONFIG]] // ----- diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir index bb7722c3086b..8cdd5223dbb8 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir @@ -10,7 +10,7 @@ func.func @static_pack() { %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xi32> %3 = tensor.empty() : tensor<4x16x16x32xi32> - %pack = tensor.pack %2 inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %3 : tensor<128x256xi32> -> tensor<4x16x16x32xi32> + %pack = linalg.pack %2 inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %3 : tensor<128x256xi32> -> tensor<4x16x16x32xi32> flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [4, 16, 16, 32], strides = [1, 1, 1, 1] : tensor<4x16x16x32xi32> -> !flow.dispatch.tensor> return } diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir index 48fc842231ed..b49f474debab 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir @@ -114,7 +114,7 @@ func.func @unpack_dynamic() { %c0_i32 = arith.constant 0 : i32 %22 = arith.subi %c16, %12 : index %23 = arith.subi %c16, %15 : index - %unpack = tensor.unpack %21 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %16 : tensor -> tensor + %unpack = linalg.unpack %21 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %16 : tensor -> tensor flow.dispatch.tensor.store %unpack, %9, offsets = [%arg0, %arg1], sizes = [%12, %15], strides = [1, 1] : tensor -> !flow.dispatch.tensor>{%6, %7} } } @@ -124,6 +124,6 @@ func.func @unpack_dynamic() { // CHECK: %[[DEST_BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) // CHECK: %[[LOAD:.+]] = flow.dispatch.tensor.load %[[DEST_BUF]] // CHECK: %[[PAD:.+]] = tensor.pad %[[LOAD]] -// CHECK: %[[UNPACK:.+]] = tensor.unpack {{.+}} into %[[PAD]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack {{.+}} into %[[PAD]] // CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[UNPACK]] // CHECK: flow.dispatch.tensor.store %[[SLICE]], %[[DEST_BUF]] diff --git a/compiler/src/iree/compiler/Codegen/Utils/CPUUtils.cpp b/compiler/src/iree/compiler/Codegen/Utils/CPUUtils.cpp index eafb5cf83721..072f6afd0742 100644 --- a/compiler/src/iree/compiler/Codegen/Utils/CPUUtils.cpp +++ b/compiler/src/iree/compiler/Codegen/Utils/CPUUtils.cpp @@ -34,7 +34,7 @@ FailureOr getRootOperation(ArrayRef computeOps) { } if (isa(op) && - !isa(op)) { + !isa(op)) { // All other operations that implement this interface are root ops. rootOperation = op; break; @@ -54,7 +54,7 @@ FailureOr getRootOperation(ArrayRef computeOps) { if (!rootOperation) { // Check for pad/pack/unpack ops by themselves. for (auto op : llvm::reverse(computeOps)) { - if (isa(op)) { + if (isa(op)) { rootOperation = op; break; } diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp index 3f0c42b7a4de..f7287ed1d1d2 100644 --- a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp +++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp @@ -1605,7 +1605,7 @@ inferSizesFromIR(linalg::LinalgOp linalgOp, std::optional opResult) { std::optional vscaleRange; if (!opResult) { // Note: Inferring scalable sizes is not supported is `opResult` is set - // (which is used to compute sizes for tensor.pack/unpack). + // (which is used to compute sizes for linalg.pack/unpack). auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(linalgOp); vscaleRange = getDefaultVscaleRange(targetAttr); } @@ -1672,7 +1672,7 @@ inferSizesFromIR(linalg::LinalgOp linalgOp, std::optional opResult) { return result; } -std::optional inferSizesFromIR(tensor::PackOp op) { +std::optional inferSizesFromIR(linalg::PackOp op) { LLVM_DEBUG(llvm::dbgs() << "Inferring dest sizes for:\n" << op << "\n"); if (llvm::any_of(op.getInnerTiles(), [](OpFoldResult v) { @@ -1715,7 +1715,7 @@ std::optional inferSizesFromIR(tensor::PackOp op) { return result; } -std::optional inferSizesFromIR(tensor::UnPackOp op) { +std::optional inferSizesFromIR(linalg::UnPackOp op) { LLVM_DEBUG(llvm::dbgs() << "Inferring dest sizes for:\n" << op << "\n"); if (llvm::any_of(op.getInnerTiles(), [](OpFoldResult v) { @@ -1766,7 +1766,7 @@ std::optional inferSizesFromIR(Value val) { TypeSwitch(val.getDefiningOp()) .Case( [&](auto op) { result = inferSizesFromIR(op, cast(val)); }) - .Case([&](auto op) { result = inferSizesFromIR(op); }) + .Case([&](auto op) { result = inferSizesFromIR(op); }) .Case([&](tensor::ExtractSliceOp op) { // tensor::ExtractSliceOp is not vectorizable, so only `destShape` has // the values. diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.h b/compiler/src/iree/compiler/Codegen/Utils/Utils.h index ea83c9f8de4d..18427690d1f9 100644 --- a/compiler/src/iree/compiler/Codegen/Utils/Utils.h +++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.h @@ -267,15 +267,15 @@ struct VectorizationTileSizes { /// chain. std::optional inferSizesFromIR(Value val); -/// Returns the result sizes and vector input sizes of the tensor.unpack op. The +/// Returns the result sizes and vector input sizes of the linalg.unpack op. The /// inferred bounding size is returned if it is dynamic shape. Returns /// std::nullopt if the shape inference failed. -std::optional inferSizesFromIR(tensor::UnPackOp op); +std::optional inferSizesFromIR(linalg::UnPackOp op); -/// Returns the result sizes and vector input sizes of the tensor.pack op. The +/// Returns the result sizes and vector input sizes of the linalg.pack op. The /// inferred bounding size is returned if it is dynamic shape. Returns /// std::nullopt if the shape inference failed. -std::optional inferSizesFromIR(tensor::PackOp op); +std::optional inferSizesFromIR(linalg::PackOp op); /// Tries to infer the vector sizes from an IR using ValueBounds analysis. If /// `opResult` is provided, it stores the bounded result shapes to destShape. diff --git a/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir index ecc662357740..c8f40e490cf4 100644 --- a/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir @@ -169,7 +169,7 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor>{%6, %7} %10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor>{%4, %5} -> tensor %11 = tensor.empty(%6, %7) : tensor - %unpack = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 : tensor -> tensor + %unpack = linalg.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 : tensor -> tensor flow.dispatch.tensor.store %unpack, %9, offsets = [0, 0], sizes = [%6, %7], strides = [1, 1] : tensor -> !flow.dispatch.tensor>{%6, %7} return } @@ -178,7 +178,7 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl // CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info // CHECK: func.func @unpack_outer_dynamic // CHECK-SAME: translation_info = #[[TRANSLATION]] -// CHECK: tensor.unpack +// CHECK: linalg.unpack // CHECK-SAME: lowering_config = #[[CONFIG]] // ----- @@ -210,7 +210,7 @@ func.func @elem_pack_ukernels() attributes {hal.executable.target = #executable_ %9 = affine.apply #map()[%8#0] %10 = affine.apply #map1()[%8#1] %11 = tensor.empty(%9, %10, %8#0, %8#1) : tensor - %pack = tensor.pack %7 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [%8#0, %8#1] into %11 : tensor<1024x2048xf32> -> tensor + %pack = linalg.pack %7 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [%8#0, %8#1] into %11 : tensor<1024x2048xf32> -> tensor %12:2 = iree_codegen.query_tile_sizes tensor<1024x2048xf32, #iree_encoding.encoding> -> index, index %13 = affine.apply #map()[%12#0] %14 = affine.apply #map1()[%12#1] diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/AnnotateDispatches.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/AnnotateDispatches.cpp index ad93878b83d2..03028fb2220e 100644 --- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/AnnotateDispatches.cpp +++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/AnnotateDispatches.cpp @@ -381,7 +381,7 @@ static std::string summarizeDispatchRegion(Region ®ion) { << "', cost: " << bestEstimatedCost << "\n"); }) .Case([&](auto op) { + linalg::PackOp, linalg::UnPackOp>([&](auto op) { // SetEncoding/UnsetEncoding/PackOp/UnPackOp is the bestOp only if // there are no other operations. int64_t estimatedCost = kMinEstimatedCost + 1; @@ -429,7 +429,7 @@ static std::string summarizeDispatchRegion(Region ®ion) { [&](auto op) { bestSummary = summarizeLinalgExtOp(op); }) .Case( [&](auto op) { bestSummary = summarizeLinalgOp(op); }) - .Case([&](auto op) { + .Case([&](auto op) { auto opName = getOpNameWithoutDialectName(op); bestSummary = opName + "_" + operandTypeToString(op.getSource()); }) @@ -462,12 +462,12 @@ static std::string summarizeDispatchRegion(Region ®ion) { // Add heuristic hint to dispatch name if the unpack op is the first op and // the pack op is the last op. if (!tileableOps.empty()) { - if (!isa(bestOp) && - isa(tileableOps.front())) { + if (!isa(bestOp) && + isa(tileableOps.front())) { bestSummary = "unpack_" + bestSummary; } - if (!isa(bestOp) && - isa(tileableOps.back())) { + if (!isa(bestOp) && + isa(tileableOps.back())) { bestSummary = bestSummary + "_pack"; } } diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InitializeEmptyTensors.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InitializeEmptyTensors.cpp index 0e340c4e69b5..294d44325edd 100644 --- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InitializeEmptyTensors.cpp +++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InitializeEmptyTensors.cpp @@ -35,7 +35,7 @@ static FailureOr getZero(OpBuilder &builder, Location loc, static bool shouldBeConvertedToFlowTensorOp(tensor::EmptyOp emptyTensorOp) { return !(llvm::all_of(emptyTensorOp->getUsers(), llvm::IsaPred) || + linalg::PackOp, linalg::UnPackOp>) || emptyTensorOp->getParentOfType()); } diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/annotate_dispatches.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/annotate_dispatches.mlir index c795817b0e75..b4f4becc6323 100644 --- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/annotate_dispatches.mlir +++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/annotate_dispatches.mlir @@ -184,7 +184,7 @@ flow.executable private @ex { func.func @ex(%arg0: !flow.dispatch.tensor>, %arg1: !flow.dispatch.tensor>) { %0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<384x512xf32> %1 = tensor.empty() : tensor<24x512x16x1xf32> - %pack = tensor.pack %0 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %1 : tensor<384x512xf32> -> tensor<24x512x16x1xf32> + %pack = linalg.pack %0 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %1 : tensor<384x512xf32> -> tensor<24x512x16x1xf32> flow.dispatch.tensor.store %pack, %arg1, offsets = [0, 0, 0, 0], sizes = [24, 512, 16, 1], strides = [1, 1, 1, 1] : tensor<24x512x16x1xf32> -> !flow.dispatch.tensor> return } @@ -200,7 +200,7 @@ flow.executable private @ex { func.func @ex(%arg0: !flow.dispatch.tensor>, %arg1: !flow.dispatch.tensor>) { %0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0, 0, 0], sizes = [24, 32, 16, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<24x32x16x16xf32> %1 = tensor.empty() : tensor<384x512xf32> - %unpack = tensor.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %1 : tensor<24x32x16x16xf32> -> tensor<384x512xf32> + %unpack = linalg.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %1 : tensor<24x32x16x16xf32> -> tensor<384x512xf32> flow.dispatch.tensor.store %unpack, %arg1, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : tensor<384x512xf32> -> !flow.dispatch.tensor> return } @@ -220,13 +220,13 @@ flow.executable private @ex { %1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor> -> tensor<512xf32> %2 = tensor.empty() : tensor<24x512x16x1xf32> %3 = tensor.empty() : tensor<384x512xf32> - %unpack = tensor.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<24x32x16x16xf32> -> tensor<384x512xf32> + %unpack = linalg.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<24x32x16x16xf32> -> tensor<384x512xf32> %4 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%1, %unpack : tensor<512xf32>, tensor<384x512xf32>) outs(%3 : tensor<384x512xf32>) { ^bb0(%in: f32, %in_0: f32, %out: f32): %5 = arith.addf %in, %in_0 : f32 linalg.yield %5 : f32 } -> tensor<384x512xf32> - %pack = tensor.pack %4 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %2 : tensor<384x512xf32> -> tensor<24x512x16x1xf32> + %pack = linalg.pack %4 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %2 : tensor<384x512xf32> -> tensor<24x512x16x1xf32> flow.dispatch.tensor.store %pack, %arg2, offsets = [0, 0, 0, 0], sizes = [24, 512, 16, 1], strides = [1, 1, 1, 1] : tensor<24x512x16x1xf32> -> !flow.dispatch.tensor> return } diff --git a/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp b/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp index b2de618764fd..8ac73b8accc2 100644 --- a/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp +++ b/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp @@ -110,11 +110,11 @@ static void removeFusionGroupsAttribute(Operation *op) { //===----------------------------------------------------------------------===// /// Returns true if the reduced dimensions in the linalgOp of the unpack result -/// are not unpacked by the producer tensor::UnPackOp. This means the reduced +/// are not unpacked by the producer linalg::UnPackOp. This means the reduced /// dimensions of the unpack result are not part of the inner_dims_pos. static bool hasNoPackedReductionDimensions(linalg::LinalgOp linalgOp, Operation *producer) { - auto unpack = dyn_cast(producer); + auto unpack = dyn_cast(producer); if (!unpack) { return false; } @@ -148,7 +148,7 @@ static bool hasNoPackedReductionDimensions(linalg::LinalgOp linalgOp, /// Returns true if the linalgOp is fusable with an unpack producer static bool hasFusableUnpackProducer(linalg::LinalgOp linalgOp) { return llvm::any_of(linalgOp->getOperands(), [&](Value operand) { - auto producer = operand.getDefiningOp(); + auto producer = operand.getDefiningOp(); return producer && hasNoPackedReductionDimensions(linalgOp, producer); }); } @@ -173,27 +173,27 @@ static bool isRootOp(Operation *op) { return !isa(op); } if (isa(op)) { - return !isa(op); + return !isa(op); } - return isa(op); + return isa(op); } /// Returns true if the operation is a `pack` op or a `set_encoding` op that /// has pack semantics. // TODO(ravishankarm): This seems like a use case for an interface. static bool isPackLikeOp(Operation *op) { - return isa(op); + return isa(op); } /// Returns true if the operation is an `unpack` op or an `unset_encoding` op. static bool isUnpackLikeOp(Operation *op) { - return isa(op); + return isa(op); } /// Since `iree_encoding.set_encoding` doesnt have padding semantics a /// `tensor.pad` is introduced to get the shapes of the input and output to /// match. The `tensor.pad` -> `set_encoding` can be folded later on into a -/// single `tensor.pack` operation. But it means the fusion has to try to keep +/// single `linalg.pack` operation. But it means the fusion has to try to keep /// these in the same dispatch. // TODO(ravishankarm): Maybe make `set_encoding` have pad semantics that can be // explicitly broken down if needed. @@ -648,7 +648,7 @@ isFusableWithProducer(OpOperand &operand, return TypeSwitch(producer) .Case([&](auto padOp) { return true; }) .Case([&](auto linalgOp) { - if (auto packOp = dyn_cast(consumer)) { + if (auto packOp = dyn_cast(consumer)) { // TODO(#12746): fusion of pack with dynamic inner tile size // causes an error in backend. Disable for now. if (!packOp.getInnerTiles().empty()) { @@ -777,7 +777,7 @@ decideFusableLinalgOps(Region ®ion, DominanceInfo const &dominanceInfo, // to convert them to splats. Also avoid moving dequantization-like ops // into their own dispatch since it is better to clone these ops and avoid // materializing large tensors between dispatches. - if (!isa(op) || IREE::Flow::isClonableIntoDispatchOp(&op, clonableOptions)) { continue; diff --git a/compiler/src/iree/compiler/DispatchCreation/FormScalarDispatches.cpp b/compiler/src/iree/compiler/DispatchCreation/FormScalarDispatches.cpp index 16e5a32a0e1b..8173d9b07b56 100644 --- a/compiler/src/iree/compiler/DispatchCreation/FormScalarDispatches.cpp +++ b/compiler/src/iree/compiler/DispatchCreation/FormScalarDispatches.cpp @@ -56,7 +56,7 @@ static bool isComputeOperation(Operation *op) { } if (op->getDialect() == context->getLoadedDialect()) { return !isa(op); + tensor::ExpandShapeOp, linalg::PackOp, linalg::UnPackOp>(op); } return false; } diff --git a/compiler/src/iree/compiler/DispatchCreation/FusionPreprocessing.cpp b/compiler/src/iree/compiler/DispatchCreation/FusionPreprocessing.cpp index 8ef1c4e53299..19ac81c3bb80 100644 --- a/compiler/src/iree/compiler/DispatchCreation/FusionPreprocessing.cpp +++ b/compiler/src/iree/compiler/DispatchCreation/FusionPreprocessing.cpp @@ -160,7 +160,7 @@ struct FusionPreprocessingPass final // operand shapes. memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns); memref::populateResolveShapedTypeResultDimsPatterns(patterns); - tensor::populateFoldIntoPackAndUnpackPatterns(patterns); + linalg::populateFoldIntoPackAndUnpackPatterns(patterns); if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) { return signalPassFailure(); } diff --git a/compiler/src/iree/compiler/DispatchCreation/SinkReshapes.cpp b/compiler/src/iree/compiler/DispatchCreation/SinkReshapes.cpp index 416dc5ec17b7..1d67e5cfe531 100644 --- a/compiler/src/iree/compiler/DispatchCreation/SinkReshapes.cpp +++ b/compiler/src/iree/compiler/DispatchCreation/SinkReshapes.cpp @@ -50,7 +50,7 @@ struct SinkReshapesPass final static bool isFusableUsingTileAndFuse(Operation *producer, Operation *consumer) { return llvm::isa_and_nonnull(producer); } diff --git a/compiler/src/iree/compiler/DispatchCreation/TensorPadToTensorInsertSlice.cpp b/compiler/src/iree/compiler/DispatchCreation/TensorPadToTensorInsertSlice.cpp index 28927ecc86a4..a9c3caec84e0 100644 --- a/compiler/src/iree/compiler/DispatchCreation/TensorPadToTensorInsertSlice.cpp +++ b/compiler/src/iree/compiler/DispatchCreation/TensorPadToTensorInsertSlice.cpp @@ -65,7 +65,7 @@ struct TensorPadOpConversion : public OpRewritePattern { return failure(); } } - // (pad + set_encoding) gets folded in to tensor.pack in the + // (pad + set_encoding) gets folded in to linalg.pack in the // MaterializeEncoding pass. Rewriting those pads into insert_slice would // defeat that. if (isa(use)) { diff --git a/compiler/src/iree/compiler/DispatchCreation/test/collapse_dimensions.mlir b/compiler/src/iree/compiler/DispatchCreation/test/collapse_dimensions.mlir index 377c91b6a054..484d446b99ef 100644 --- a/compiler/src/iree/compiler/DispatchCreation/test/collapse_dimensions.mlir +++ b/compiler/src/iree/compiler/DispatchCreation/test/collapse_dimensions.mlir @@ -34,7 +34,7 @@ util.func public @do_not_collapse_cst_in_place(%arg0: tensor<1x1x2304xf32>) { util.func public @unpack_collapse(%arg0: tensor<2x320x128x128xf32>, %arg1: tensor<320xf32>, %arg2: tensor<320xf32>, %arg3: tensor<1x5x2x64xf32>) -> tensor<2x320x128x128xf16> { %dispatch = flow.dispatch.region -> (tensor<2x320x128x128xf16>) { %0 = tensor.empty() : tensor<2x320xf32> - %unpack = tensor.unpack %arg3 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 64] into %0 : tensor<1x5x2x64xf32> -> tensor<2x320xf32> + %unpack = linalg.unpack %arg3 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 64] into %0 : tensor<1x5x2x64xf32> -> tensor<2x320xf32> %1 = tensor.empty() : tensor<2x320x128x128xf16> %2 = linalg.generic { indexing_maps = [#map, #map1, #map2, #map1, #map], @@ -83,7 +83,7 @@ util.func public @unpack_elementwise_collapse(%arg0: tensor<2x320x128x128xf32>, linalg.yield %22 : f32 } -> tensor<2x320x128x128xf32> - %unpack = tensor.unpack %arg3 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 64] into %1 : tensor<1x5x2x64xf32> -> tensor<2x320xf32> + %unpack = linalg.unpack %arg3 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 64] into %1 : tensor<1x5x2x64xf32> -> tensor<2x320xf32> %3 = linalg.generic {indexing_maps = [#map, #map1, #map2, #map1, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%elementwise, %arg1, %unpack, %arg2 : tensor<2x320x128x128xf32>, tensor<320xf32>, tensor<2x320xf32>, tensor<320xf32>) outs(%2 : tensor<2x320x128x128xf16>) { ^bb0(%in: f32, %in_0: f32, %in_1: f32, %in_2: f32, %out: f16): @@ -133,7 +133,7 @@ util.func public @prevent_collapse(%arg0: tensor<2x320x128x128xf32>, %arg1: tens } -> tensor<2x320x128x128xf32> %barrier = util.optimization_barrier %elementwise : tensor<2x320x128x128xf32> - %unpack = tensor.unpack %arg3 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 64] into %1 : tensor<1x5x2x64xf32> -> tensor<2x320xf32> + %unpack = linalg.unpack %arg3 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 64] into %1 : tensor<1x5x2x64xf32> -> tensor<2x320xf32> %3 = linalg.generic {indexing_maps = [#map, #map1, #map2, #map1, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%barrier, %arg1, %unpack, %arg2 : tensor<2x320x128x128xf32>, tensor<320xf32>, tensor<2x320xf32>, tensor<320xf32>) outs(%2 : tensor<2x320x128x128xf16>) { ^bb0(%in: f32, %in_0: f32, %in_1: f32, %in_2: f32, %out: f16): diff --git a/compiler/src/iree/compiler/DispatchCreation/test/dispatch_region_formation_preprocessing.mlir b/compiler/src/iree/compiler/DispatchCreation/test/dispatch_region_formation_preprocessing.mlir index 33ef6f9ed76f..a848e366d547 100644 --- a/compiler/src/iree/compiler/DispatchCreation/test/dispatch_region_formation_preprocessing.mlir +++ b/compiler/src/iree/compiler/DispatchCreation/test/dispatch_region_formation_preprocessing.mlir @@ -524,7 +524,7 @@ util.func public @fix_issue_16835(%arg0: tensor<49x6x16x16xf32>, %arg1: tensor<9 %cst_2 = arith.constant 0.166666672 : f32 %0 = tensor.empty() : tensor<784x96xf32> %1 = tensor.empty() : tensor<784x96xf32> - %unpack = tensor.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %0 : tensor<49x6x16x16xf32> -> tensor<784x96xf32> + %unpack = linalg.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %0 : tensor<49x6x16x16xf32> -> tensor<784x96xf32> %2 = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]} ins(%unpack, %arg1 : tensor<784x96xf32>, tensor<96xf32>) outs(%1 : tensor<784x96xf32>) { ^bb0(%in: f32, %in_3: f32, %out: f32): %3 = arith.addf %in, %in_3 : f32 @@ -539,7 +539,7 @@ util.func public @fix_issue_16835(%arg0: tensor<49x6x16x16xf32>, %arg1: tensor<9 util.return %expanded : tensor<28x28x96xf32> } // CHECK-LABEL: util.func public @fix_issue_16835 -// CHECK: tensor.unpack +// CHECK: linalg.unpack // CHECK: linalg.generic // CHECK: tensor.expand_shape diff --git a/compiler/src/iree/compiler/DispatchCreation/test/form_dispatch_regions.mlir b/compiler/src/iree/compiler/DispatchCreation/test/form_dispatch_regions.mlir index 4a1b808a9127..6f5abce51feb 100644 --- a/compiler/src/iree/compiler/DispatchCreation/test/form_dispatch_regions.mlir +++ b/compiler/src/iree/compiler/DispatchCreation/test/form_dispatch_regions.mlir @@ -24,7 +24,7 @@ util.func public @pack_elementwise_fusion(%arg0 : tensor, %8 = tensor.empty(%6, %7) : tensor // TODO(#12746) : The inner_tiles could be dynamic here. It is disabled // due to unrelated codegen issue. - %9 = tensor.pack %5 padding_value(%cst : f32) + %9 = linalg.pack %5 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %8 : tensor -> tensor util.return %9 : tensor @@ -39,7 +39,7 @@ util.func public @pack_elementwise_fusion(%arg0 : tensor, // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG1]], %[[ARG0]] : -// CHECK: %[[PACK:.+]] = tensor.pack %[[GENERIC]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[GENERIC]] // CHECK: flow.return %[[PACK]] // CHECK: util.return %[[RETURN]] @@ -80,7 +80,7 @@ util.func public @pack_fusion(%arg0 : tensor, %8 = tensor.empty(%6, %7) : tensor // TODO(#12746) : The inner_tiles could be dynamic here. It is disabled // due to unrelated codegen issue. - %9 = tensor.pack %5 padding_value(%cst : f32) + %9 = linalg.pack %5 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %8 : tensor -> tensor util.return %9 : tensor @@ -95,7 +95,7 @@ util.func public @pack_fusion(%arg0 : tensor, // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG1]], %[[REDUCTION]] : -// CHECK: %[[PACK:.+]] = tensor.pack %[[GENERIC]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[GENERIC]] // CHECK: flow.return %[[PACK]] // CHECK: util.return %[[RETURN]] @@ -119,7 +119,7 @@ util.func public @tranpose_pack_fusion(%arg0: tensor) -> tensor - %pack = tensor.pack %1 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %4 : tensor -> tensor + %pack = linalg.pack %1 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %4 : tensor -> tensor util.return %pack : tensor } // No fusion as the CPU backend currently can't handle fusion with transpose @@ -131,7 +131,7 @@ util.func public @tranpose_pack_fusion(%arg0: tensor) -> tensor (s0 * s1)>()[%d0, %d2] %folded_dim1 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%d1, %d3] %dest = tensor.empty(%folded_dim0, %folded_dim1) : tensor - %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [%d2, %d3] + %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [%d2, %d3] into %dest : tensor -> tensor %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, @@ -370,7 +370,7 @@ util.func public @unpack_elementwise_fusion( // CHECK-SAME: %[[ARG0:.+]]: tensor // CHECK-SAME: %[[ARG1:.+]]: tensor) // CHECK: %[[RESULT:.+]] = flow.dispatch.region -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: ins(%[[UNPACK]], %[[ARG1]] // CHECK: flow.return %[[GENERIC]] @@ -406,7 +406,7 @@ util.func public @unpack_non_intersecting_reduction( %folded_dim = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%d1, %d2] %dest0 = tensor.empty(%d0, %folded_dim) : tensor %dest1 = tensor.empty(%folded_dim) : tensor - %0 = tensor.unpack %arg0 inner_dims_pos = [1] inner_tiles = [%d2] + %0 = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [%d2] into %dest0 : tensor -> tensor %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, @@ -426,7 +426,7 @@ util.func public @unpack_non_intersecting_reduction( // CHECK-SAME: %[[ARG0:.+]]: tensor // CHECK-SAME: %[[ARG1:.+]]: tensor) // CHECK: %[[RESULT:.+]] = flow.dispatch.region -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: ins(%[[UNPACK]], %[[ARG1]] // CHECK: flow.return %[[GENERIC]] @@ -1014,7 +1014,7 @@ util.func @scatter_index_producer_fusion(%arg0 : tensor, util.func @move_captured_from_above_ops(%arg0 : tensor<1x1x2x4xi32>, %arg1 : f64, %arg2 : f64) -> tensor<2x3xi8> { %empty = tensor.empty() : tensor<2x3xi32> - %unpack = tensor.unpack %arg0 outer_dims_perm = [0, 1] + %unpack = linalg.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %empty : tensor<1x1x2x4xi32> -> tensor<2x3xi32> %0 = arith.mulf %arg1, %arg2 : f64 %1 = tensor.empty() : tensor<2x3xi8> @@ -1034,7 +1034,7 @@ util.func @move_captured_from_above_ops(%arg0 : tensor<1x1x2x4xi32>, // CHECK-LABEL: func public @move_captured_from_above_ops // CHECK: %[[OP:.+]] = arith.mulf // CHECK: %[[DISPATCH:.+]] = flow.dispatch.region -// CHECK: %[[UNPACK:.+]] = tensor.unpack +// CHECK: %[[UNPACK:.+]] = linalg.unpack // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: ins(%[[UNPACK]] : // CHECK: %[[TRUNCF:.+]] = arith.truncf %[[OP]] diff --git a/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp b/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp index e2778cc8e2cc..2915fddcce52 100644 --- a/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp +++ b/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp @@ -510,6 +510,9 @@ void registerUtilExternalModels(DialectRegistry ®istry) { #define GET_OP_LIST #include "mlir/Dialect/Linalg/IR/LinalgOps.cpp.inc" >::registerOpInterface(context); + + AlwaysHoistableOpInterfaceHelper< + linalg::PackOp, linalg::UnPackOp>::registerOpInterface(context); }); // Register hoistable type interfaces for tensor ops. registry.addExtension( @@ -521,9 +524,8 @@ void registerUtilExternalModels(DialectRegistry ®istry) { tensor::ExtractSliceOp>::registerOpInterface(context); // Cases of trivial pack/unpack should be handled as canonicalizations // before we get here, thus we're safe to always hoist. - AlwaysHoistableOpInterfaceHelper< - tensor::PadOp, tensor::PackOp, - tensor::UnPackOp>::registerOpInterface(context); + AlwaysHoistableOpInterfaceHelper::registerOpInterface( + context); }); registry.addExtension( +[](MLIRContext *context, IREE::Util::UtilDialect *dialect) { diff --git a/compiler/src/iree/compiler/GlobalOptimization/DataLayoutPropagation.cpp b/compiler/src/iree/compiler/GlobalOptimization/DataLayoutPropagation.cpp index e01d71dd0d47..267c4f34ff47 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/DataLayoutPropagation.cpp +++ b/compiler/src/iree/compiler/GlobalOptimization/DataLayoutPropagation.cpp @@ -27,10 +27,10 @@ struct DataLayoutPropagationPass patterns, [](OpOperand *opOperand) { Operation *producer = opOperand->get().getDefiningOp(); Operation *consumer = opOperand->getOwner(); - if (isa(consumer)) { + if (isa(consumer)) { return isa(producer); } - if (isa(producer)) { + if (isa(producer)) { return isa(consumer); } return false; diff --git a/compiler/src/iree/compiler/GlobalOptimization/GlobalLoopInvariantCodeMotion.cpp b/compiler/src/iree/compiler/GlobalOptimization/GlobalLoopInvariantCodeMotion.cpp index 0489448d9b0a..cbbbe5f4880c 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/GlobalLoopInvariantCodeMotion.cpp +++ b/compiler/src/iree/compiler/GlobalOptimization/GlobalLoopInvariantCodeMotion.cpp @@ -6,6 +6,7 @@ #include "iree/compiler/GlobalOptimization/Passes.h" #include "llvm/ADT/TypeSwitch.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/SCF/Transforms/Transforms.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" @@ -30,7 +31,7 @@ static bool isHoistableOp(LoopLikeOpInterface loopOp, Operation *op, } // Check if the op type is hoistable. - if (!isa(op)) { + if (!isa(op)) { return false; } diff --git a/compiler/src/iree/compiler/GlobalOptimization/SimplifyPackUnpack.cpp b/compiler/src/iree/compiler/GlobalOptimization/SimplifyPackUnpack.cpp index 86d4135c636e..69a61842f7e8 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/SimplifyPackUnpack.cpp +++ b/compiler/src/iree/compiler/GlobalOptimization/SimplifyPackUnpack.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "iree/compiler/GlobalOptimization/Passes.h" -#include "mlir/Dialect/Tensor/Transforms/Transforms.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -25,7 +25,7 @@ struct SimplifyPackUnpackPass void SimplifyPackUnpackPass::runOnOperation() { MLIRContext *context = &getContext(); RewritePatternSet patterns(context); - tensor::populateSimplifyPackAndUnpackPatterns(patterns); + linalg::populateSimplifyPackAndUnpackPatterns(patterns); if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) { return signalPassFailure(); } diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/data_layout_propagation.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/data_layout_propagation.mlir index 556cfedf8fee..d05143897c03 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/test/data_layout_propagation.mlir +++ b/compiler/src/iree/compiler/GlobalOptimization/test/data_layout_propagation.mlir @@ -3,7 +3,7 @@ func.func @bubble_up_pack_through_collapse(%1: tensor, %dim : index) -> tensor { %collapsed = tensor.collapse_shape %1 [[0, 1], [2]] : tensor into tensor %2 = tensor.empty(%dim) : tensor - %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor -> tensor + %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor -> tensor func.return %pack : tensor } // CHECK-LABEL: func.func @bubble_up_pack_through_collapse @@ -12,7 +12,7 @@ func.func @bubble_up_pack_through_collapse(%1: tensor, %dim : index) // CHECK: %[[C0:.+]] = arith.constant 0 : index // CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor -> tensor +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor -> tensor // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]] : tensor into tensor // CHECK: return %[[COLLAPSED]] : tensor @@ -20,7 +20,7 @@ func.func @bubble_up_pack_through_collapse(%1: tensor, %dim : index) func.func @push_down_unpack_through_expand(%5: tensor, %dim: index, %1 : index) -> tensor { %6 = tensor.empty(%dim) : tensor - %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor -> tensor + %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor -> tensor %expanded = tensor.expand_shape %unpack [[0, 1], [2]] output_shape [%1, 256, 256]: tensor into tensor func.return %expanded : tensor } @@ -31,5 +31,5 @@ func.func @push_down_unpack_through_expand(%5: tensor, %dim: index // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2], [3], [4]] output_shape {{.*}} : tensor into tensor // CHECK: %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] : tensor // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor -> tensor +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor -> tensor // CHECK: return %[[UNPACK]] : tensor diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/global_loop_invariant_code_motion.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/global_loop_invariant_code_motion.mlir index 9af32a090c76..2d5e95b6913f 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/test/global_loop_invariant_code_motion.mlir +++ b/compiler/src/iree/compiler/GlobalOptimization/test/global_loop_invariant_code_motion.mlir @@ -11,7 +11,7 @@ func.func @hoist_pack_op_with_zero_trip_check(%bound : i32, %src : tensor<100x10 } do { ^bb0(%arg1: i32, %arg2: tensor<13x13x8x8xf32>): %dest = tensor.empty() : tensor<13x13x8x8xf32> - %pack = tensor.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32> + %pack = linalg.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32> %add = arith.addf %arg2, %pack : tensor<13x13x8x8xf32> %next = arith.addi %arg1, %cst1 : i32 scf.yield %next, %add : i32, tensor<13x13x8x8xf32> @@ -28,7 +28,7 @@ func.func @hoist_pack_op_with_zero_trip_check(%bound : i32, %src : tensor<100x10 // CHECK: %[[PRECOND:.+]] = arith.cmpi slt, %[[C0]], %[[BOUND]] : i32 // CHECK: %[[RES:.+]]:2 = scf.if %[[PRECOND]] -> (i32, tensor<13x13x8x8xf32>) { // CHECK: %[[DEST:.+]] = tensor.empty() : tensor<13x13x8x8xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[SRC]] padding_value(%[[PAD]] : f32) +// CHECK: %[[PACK:.+]] = linalg.pack %[[SRC]] padding_value(%[[PAD]] : f32) // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %[[DEST]] // CHECK-SAME: : tensor<100x100xf32> -> tensor<13x13x8x8xf32> // CHECK: %[[LOOP:.+]]:2 = scf.while (%[[ARG2:.+]] = %[[C0]], %[[ARG3:.+]] = %[[INIT]]) @@ -56,7 +56,7 @@ func.func @hoist_pack_op_from_do_while(%bound : i32, %src : tensor<100x100xf32>) %init = arith.constant dense<0.0> : tensor<13x13x8x8xf32> %res:2 = scf.while (%iter = %cst0, %val = %init) : (i32, tensor<13x13x8x8xf32>) -> (i32, tensor<13x13x8x8xf32>) { %dest = tensor.empty() : tensor<13x13x8x8xf32> - %pack = tensor.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32> + %pack = linalg.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32> %add = arith.addf %val, %pack : tensor<13x13x8x8xf32> %next = arith.addi %iter, %cst1 : i32 %cond = arith.cmpi slt, %next, %bound : i32 @@ -76,7 +76,7 @@ func.func @hoist_pack_op_from_do_while(%bound : i32, %src : tensor<100x100xf32>) // CHECK-DAG: %[[INIT:.+]] = arith.constant dense<0.000000e+00> : tensor<13x13x8x8xf32> // CHECK-NOT: scf.if // CHECK: %[[DEST:.+]] = tensor.empty() : tensor<13x13x8x8xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[SRC]] padding_value(%[[PAD]] : f32) +// CHECK: %[[PACK:.+]] = linalg.pack %[[SRC]] padding_value(%[[PAD]] : f32) // CHECK: inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %[[DEST]] : tensor<100x100xf32> -> tensor<13x13x8x8xf32> // CHECK: %[[LOOP:.+]]:2 = scf.while (%[[ARG2:.+]] = %[[C0]], %[[ARG3:.+]] = %[[INIT]]) // CHECK-SAME: (i32, tensor<13x13x8x8xf32>) -> (i32, tensor<13x13x8x8xf32>) { @@ -104,7 +104,7 @@ func.func @hoist_pack_op_with_zero_trip_check_in_outer_loop(%bound : i32, %src : } do { ^bb0(%arg1: i32, %arg2: tensor<13x13x8x8xf32>): %dest = tensor.empty() : tensor<13x13x8x8xf32> - %pack = tensor.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32> + %pack = linalg.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32> %add = arith.addf %arg2, %pack : tensor<13x13x8x8xf32> %next = arith.addi %arg1, %cst1 : i32 scf.yield %next, %add : i32, tensor<13x13x8x8xf32> @@ -127,7 +127,7 @@ func.func @hoist_pack_op_with_zero_trip_check_in_outer_loop(%bound : i32, %src : // CHECK: scf.while // CHECK: scf.if // CHECK: tensor.empty -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK: scf.while // CHECK: } do { // CHECK: } @@ -145,10 +145,10 @@ func.func @not_hoist_loop_variant(%bound : i32, %src : tensor<100x100xf32>) -> t %bias = arith.constant dense<1.0> : tensor<13x13x8x8xf32> %res:2 = scf.while (%iter = %cst0, %val = %src) : (i32, tensor<100x100xf32>) -> (i32, tensor<100x100xf32>) { %pack_dest = tensor.empty() : tensor<13x13x8x8xf32> - %pack = tensor.pack %val padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %pack_dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32> + %pack = linalg.pack %val padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %pack_dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32> %add = arith.addf %pack, %bias : tensor<13x13x8x8xf32> %unpack_dest = tensor.empty() : tensor<100x100xf32> - %unpack = tensor.unpack %add inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %unpack_dest : tensor<13x13x8x8xf32> -> tensor<100x100xf32> + %unpack = linalg.unpack %add inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %unpack_dest : tensor<13x13x8x8xf32> -> tensor<100x100xf32> %next = arith.addi %iter, %cst1 : i32 %cond = arith.cmpi slt, %next, %bound : i32 scf.condition(%cond) %next, %unpack : i32, tensor<100x100xf32> @@ -162,11 +162,11 @@ func.func @not_hoist_loop_variant(%bound : i32, %src : tensor<100x100xf32>) -> t // CHECK-LABEL: func.func @not_hoist_loop_variant // CHECK-DAG: %[[PACK_DEST:.+]] = tensor.empty // CHECK-DAG: %[[UNPACK_DEST:.+]] = tensor.empty -// CHECK-NOT: tensor.pack -// CHECK-NOT: tensor.unpack +// CHECK-NOT: linalg.pack +// CHECK-NOT: linalg.unpack // CHECK: scf.while -// CHECK: tensor.pack {{.*}} into %[[PACK_DEST]] -// CHECK: tensor.unpack {{.*}} into %[[UNPACK_DEST]] +// CHECK: linalg.pack {{.*}} into %[[PACK_DEST]] +// CHECK: linalg.unpack {{.*}} into %[[UNPACK_DEST]] // CHECK: scf.condition // CHECK: } do { // CHECK: scf.yield @@ -182,7 +182,7 @@ func.func @not_hoist_from_nested_regions(%bound : i32, %flag : i1, %src : tensor %res:2 = scf.while (%iter = %cst0, %val = %init) : (i32, tensor<13x13x8x8xf32>) -> (i32, tensor<13x13x8x8xf32>) { %ifadd = scf.if %flag -> tensor<13x13x8x8xf32> { %dest = tensor.empty() : tensor<13x13x8x8xf32> - %pack = tensor.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32> + %pack = linalg.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32> %add = arith.addf %val, %pack : tensor<13x13x8x8xf32> scf.yield %add : tensor<13x13x8x8xf32> } else { @@ -200,12 +200,12 @@ func.func @not_hoist_from_nested_regions(%bound : i32, %flag : i1, %src : tensor // CHECK-LABEL: func.func @not_hoist_from_nested_regions // CHECK-NOT: tensor.empty -// CHECK-NOT: tensor.pack -// CHECK-NOT: tensor.unpack +// CHECK-NOT: linalg.pack +// CHECK-NOT: linalg.unpack // CHECK: scf.while // CHECK: scf.if // CHECK: %[[PACK_DEST:.+]] = tensor.empty -// CHECK: tensor.pack {{.*}} into %[[PACK_DEST]] +// CHECK: linalg.pack {{.*}} into %[[PACK_DEST]] // CHECK: } else { // CHECK: scf.condition // CHECK: } do { diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/hoist_into_globals.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/hoist_into_globals.mlir index 67e631518488..7f462b30f691 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/test/hoist_into_globals.mlir +++ b/compiler/src/iree/compiler/GlobalOptimization/test/hoist_into_globals.mlir @@ -100,7 +100,7 @@ module @hoist_constant_pack_computation { %pad = arith.constant 5 : i4 %val1 = stablehlo.constant dense<3> : tensor<7x15xi4> %val2 = tensor.empty() : tensor<4x1x16x2xi4> - %ret = tensor.pack %val1 padding_value(%pad : i4) inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %val2 : tensor<7x15xi4> -> tensor<4x1x16x2xi4> + %ret = linalg.pack %val1 padding_value(%pad : i4) inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %val2 : tensor<7x15xi4> -> tensor<4x1x16x2xi4> util.return %ret : tensor<4x1x16x2xi4> } } diff --git a/compiler/src/iree/compiler/Preprocessing/Common/ConvertConvToChannelsLast.cpp b/compiler/src/iree/compiler/Preprocessing/Common/ConvertConvToChannelsLast.cpp index 2ac2642717ec..88f6861076f0 100644 --- a/compiler/src/iree/compiler/Preprocessing/Common/ConvertConvToChannelsLast.cpp +++ b/compiler/src/iree/compiler/Preprocessing/Common/ConvertConvToChannelsLast.cpp @@ -180,9 +180,9 @@ getUnitOuterDimPackReassociationMap(SmallVector targetIndices, } // Transpose the given tensor based on the given transpose indices using a -// tensor.pack. Additionally returns a new AffineMap for the packed value +// linalg.pack. Additionally returns a new AffineMap for the packed value // assuming otherwise the same iteration space. -static std::tuple, AffineMap> +static std::tuple, AffineMap> createTransposeAsTensorPack( PatternRewriter &rewriter, Location loc, Value input, AffineMap inputMap, SmallVector targetIndices, int tilingFactor, @@ -209,10 +209,10 @@ createTransposeAsTensorPack( } // Pack the input tensor. - auto empty = tensor::PackOp::createDestinationTensor( + auto empty = linalg::PackOp::createDestinationTensor( rewriter, loc, input, transposedTileSizes, targetIndices, SmallVector{}); - auto packedInput = rewriter.create( + auto packedInput = rewriter.create( loc, input, empty, targetIndices, transposedTileSizes, /*padding=*/std::nullopt, SmallVector{}); @@ -220,7 +220,7 @@ createTransposeAsTensorPack( AffineMap transposedMap; Value packedOperand = packedInput; - // Collapse the unit dims created by tensor.pack if the pack is just a + // Collapse the unit dims created by linalg.pack if the pack is just a // transpose. if (tilingFactor <= 0) { auto reassociationMap = @@ -256,7 +256,7 @@ createTransposeAsTensorPack( // unit dimensions necessary for the unpack. static Value createTransposeAsTensorUnPack(PatternRewriter &rewriter, Location loc, Value output, - tensor::PackOp packOp, + linalg::PackOp packOp, int tilingFactor) { Value packedOutput = output; if (tilingFactor <= 0) { @@ -289,11 +289,11 @@ static Value createTransposeAsTensorUnPack(PatternRewriter &rewriter, .getResult(); } - Value empty = tensor::UnPackOp::createDestinationTensor( + Value empty = linalg::UnPackOp::createDestinationTensor( rewriter, loc, packedOutput, packOp.getMixedTiles(), packOp.getInnerDimsPos(), packOp.getOuterDimsPerm()); - auto unpackedOutput = rewriter.create( + auto unpackedOutput = rewriter.create( loc, packedOutput, empty, packOp.getInnerDimsPos(), packOp.getMixedTiles(), packOp.getOuterDimsPerm()); return unpackedOutput.getResult(); @@ -509,7 +509,7 @@ getTilingReassociationMap(const int64_t rank, SetTy innerDims) { // dims. Produces a transpose on the tiled dimensions followed by an // expand_shape to introduce the outer unit dims. For example, // -// tensor.pack inner_dims_pos = [1] inner_tiles = [64] +// linalg.pack inner_dims_pos = [1] inner_tiles = [64] // : tensor<32x64x16xf32> to tensor<32x1x16x64xf32> // // Generalizes to: @@ -517,14 +517,14 @@ getTilingReassociationMap(const int64_t rank, SetTy innerDims) { // linalg.transpose ... tensor<32x64x16xf32> to tensor<32x16x64xf32> // tensor.expand_shape ... tensor<32x16x64xf32> to tensor<32x1x16x64xf32> class GeneralizeOuterUnitDimsPackOp final - : public OpRewritePattern { + : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; GeneralizeOuterUnitDimsPackOp(MLIRContext *context, PatternBenefit benefit = 2) - : OpRewritePattern(context, benefit) {} + : OpRewritePattern(context, benefit) {} - LogicalResult matchAndRewrite(tensor::PackOp packOp, + LogicalResult matchAndRewrite(linalg::PackOp packOp, PatternRewriter &rewriter) const override { if (!packOp.getOuterDimsPerm().empty()) return failure(); @@ -591,7 +591,7 @@ class GeneralizeOuterUnitDimsPackOp final // and thus no padding. Produces a collapse_shape to remove the unit dimensions // followed by a transpose. For example: // -// tensor.unpack inner_dims_pos = [1] inner_tiles = [64] +// linalg.unpack inner_dims_pos = [1] inner_tiles = [64] // : tensor<32x1x16x64xf32> to tensor<32x64x16xf32> // // Generalizes to: @@ -599,14 +599,14 @@ class GeneralizeOuterUnitDimsPackOp final // tensor.collapse_shape ... tensor<32x1x16x64xf32> to tensor<32x16x64xf32> // linalg.transpose ... tensor<32x16x64xf32> to tensor<32x64x16xf32> class GeneralizeOuterUnitDimsUnPackOp final - : public OpRewritePattern { + : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; GeneralizeOuterUnitDimsUnPackOp(MLIRContext *context, PatternBenefit benefit = 2) - : OpRewritePattern(context, benefit) {} + : OpRewritePattern(context, benefit) {} - LogicalResult matchAndRewrite(tensor::UnPackOp unpackOp, + LogicalResult matchAndRewrite(linalg::UnPackOp unpackOp, PatternRewriter &rewriter) const override { if (!unpackOp.getOuterDimsPerm().empty()) return failure(); @@ -697,8 +697,8 @@ class ConvertConvToChannelsLastPass // Run pack/unpack canonicalization to try to cancel any packs. { RewritePatternSet patterns(context); - tensor::PackOp::getCanonicalizationPatterns(patterns, context); - tensor::UnPackOp::getCanonicalizationPatterns(patterns, context); + linalg::PackOp::getCanonicalizationPatterns(patterns, context); + linalg::UnPackOp::getCanonicalizationPatterns(patterns, context); linalg::FillOp::getCanonicalizationPatterns(patterns, context); if (failed(applyPatternsGreedily(op, std::move(patterns)))) { return signalPassFailure(); diff --git a/compiler/src/iree/compiler/Preprocessing/Common/test/conv_to_channels_last.mlir b/compiler/src/iree/compiler/Preprocessing/Common/test/conv_to_channels_last.mlir index 67c66e58dc8d..813952d505f4 100644 --- a/compiler/src/iree/compiler/Preprocessing/Common/test/conv_to_channels_last.mlir +++ b/compiler/src/iree/compiler/Preprocessing/Common/test/conv_to_channels_last.mlir @@ -44,9 +44,9 @@ util.func @conv_nchw_nhwc(%arg0: tensor<8x256x16x16xf32>, %arg1: tensor<16x256x3 // TILE16: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d0, d1, d2, d3, d8)> // TILE16-LABEL: util.func public @conv_nchw_nhwc -// TILE16: %[[IMG:.+]] = tensor.pack {{.*}} inner_dims_pos = [1] inner_tiles = [16] +// TILE16: %[[IMG:.+]] = linalg.pack {{.*}} inner_dims_pos = [1] inner_tiles = [16] // TILE16-SAME: tensor<8x256x16x16xf32> -> tensor<8x16x16x16x16xf32> -// TILE16: %[[FILTER:.+]] = tensor.pack {{.*}} inner_dims_pos = [1, 0] inner_tiles = [16, 16] +// TILE16: %[[FILTER:.+]] = linalg.pack {{.*}} inner_dims_pos = [1, 0] inner_tiles = [16, 16] // TILE16-SAME: tensor<16x256x3x3xf32> -> tensor<1x16x3x3x16x16xf32> // TILE16: %[[OUT:.+]] = linalg.transpose ins(%{{.*}} : tensor<8x16x14x14xf32>) // TILE16-SAME: outs(%{{.*}} : tensor<8x14x14x16xf32>) permutation = [0, 2, 3, 1] @@ -104,17 +104,17 @@ module { // TILE16: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d0, d1, d2, d3, d8)> // TILE16-LABEL: util.func public @generic_conv_nchw -// TILE16: %[[IMG:.+]] = tensor.pack {{.*}} inner_dims_pos = [1] inner_tiles = [16] +// TILE16: %[[IMG:.+]] = linalg.pack {{.*}} inner_dims_pos = [1] inner_tiles = [16] // TILE16-SAME: tensor<8x256x16x16xf32> -> tensor<8x16x16x16x16xf32> -// TILE16: %[[FILTER:.+]] = tensor.pack {{.*}} inner_dims_pos = [1, 0] inner_tiles = [16, 16] +// TILE16: %[[FILTER:.+]] = linalg.pack {{.*}} inner_dims_pos = [1, 0] inner_tiles = [16, 16] // TILE16-SAME: tensor<64x256x3x3xf32> -> tensor<4x16x3x3x16x16xf32> -// TILE16: %[[OUT:.+]] = tensor.pack {{.*}} inner_dims_pos = [1] inner_tiles = [16] +// TILE16: %[[OUT:.+]] = linalg.pack {{.*}} inner_dims_pos = [1] inner_tiles = [16] // TILE16-SAME: tensor<8x64x14x14xf32> -> tensor<8x4x14x14x16xf32> // TILE16: %[[TILED_CONV:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // TILE16-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction", "parallel"]} // TILE16-SAME: ins(%[[IMG]], %[[FILTER]] : tensor<8x16x16x16x16xf32>, tensor<4x16x3x3x16x16xf32>) // TILE16-SAME: outs(%[[OUT]] : tensor<8x4x14x14x16xf32>) { -// TILE16: tensor.unpack %[[TILED_CONV]] inner_dims_pos = [1] inner_tiles = [16] +// TILE16: linalg.unpack %[[TILED_CONV]] inner_dims_pos = [1] inner_tiles = [16] // TILE16-SAME: tensor<8x4x14x14x16xf32> -> tensor<8x64x14x14xf32> // ----- @@ -143,7 +143,7 @@ util.func @mmt_no_transpose(%arg0: tensor<2048x1280xf16>, %arg1: tensor<1280x128 util.func @test_unit_dims_pack(%arg0: tensor<10x20x5xf32>) -> tensor<1x1x5x20x10xf32> { %dst = tensor.empty() : tensor<1x1x5x20x10xf32> - %packed = tensor.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [20, 10] + %packed = linalg.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [20, 10] into %dst : tensor<10x20x5xf32> -> tensor<1x1x5x20x10xf32> util.return %packed : tensor<1x1x5x20x10xf32> diff --git a/docs/website/docs/community/blog/posts/microkernels.md b/docs/website/docs/community/blog/posts/microkernels.md index 7e14195b0555..49667d136513 100644 --- a/docs/website/docs/community/blog/posts/microkernels.md +++ b/docs/website/docs/community/blog/posts/microkernels.md @@ -382,17 +382,17 @@ module attributes {hal.device.targets = [#device_target_llvm_cpu]} { %8 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor{%6, %7} %9 = affine.apply #map()[%0] %10 = tensor.empty(%9, %1) : tensor - %pack = tensor.pack %2 padding_value(%cst : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %10 : tensor -> tensor + %pack = linalg.pack %2 padding_value(%cst : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %10 : tensor -> tensor %11 = affine.apply #map()[%4] %12 = tensor.empty(%11, %3) : tensor - %pack_0 = tensor.pack %5 padding_value(%cst : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %12 : tensor -> tensor + %pack_0 = linalg.pack %5 padding_value(%cst : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %12 : tensor -> tensor %13 = affine.apply #map()[%6] %14 = affine.apply #map()[%7] %15 = tensor.empty(%13, %14) : tensor - %pack_1 = tensor.pack %8 padding_value(%cst : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %15 : tensor -> tensor + %pack_1 = linalg.pack %8 padding_value(%cst : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %15 : tensor -> tensor %16 = linalg.mmt4d ins(%pack, %pack_0 : tensor, tensor) outs(%pack_1 : tensor) -> tensor %17 = tensor.empty(%6, %7) : tensor - %unpack = tensor.unpack %16 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %17 : tensor -> tensor + %unpack = linalg.unpack %16 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %17 : tensor -> tensor %18 = hal.tensor.export %unpack "output0" : tensor{%6, %7} -> !hal.buffer_view return %18 : !hal.buffer_view } diff --git a/tests/e2e/regression/pack_pad_transpose_1x9_into_2x4x8x4_issue_12546.mlir b/tests/e2e/regression/pack_pad_transpose_1x9_into_2x4x8x4_issue_12546.mlir index 57b0782e22a0..b88b686b16ae 100644 --- a/tests/e2e/regression/pack_pad_transpose_1x9_into_2x4x8x4_issue_12546.mlir +++ b/tests/e2e/regression/pack_pad_transpose_1x9_into_2x4x8x4_issue_12546.mlir @@ -3,7 +3,7 @@ func.func @pack_pad_transpose_1x9_into_2x4x8x4_issue_12546.mlir() { dense<[[1, 2, 3, 4, 5, 6, 7, 8, 9]]> : tensor<1x9xi8> %empty = tensor.empty() : tensor<2x4x8x4xi8> %c0_i8 = arith.constant 0 : i8 - %pack = tensor.pack %iree_input padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] + %pack = linalg.pack %iree_input padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 4] into %empty : tensor<1x9xi8> -> tensor<2x4x8x4xi8> check.expect_eq_const(%pack, dense< diff --git a/tests/e2e/tensor_ops/pack.mlir b/tests/e2e/tensor_ops/pack.mlir index 98d5db6c2111..7ede2b15a88c 100644 --- a/tests/e2e/tensor_ops/pack.mlir +++ b/tests/e2e/tensor_ops/pack.mlir @@ -21,7 +21,7 @@ func.func private @generate_2D_source(%height : index, %width : index) -> tensor func.func @static_pack_simple() { %iree_input = util.unfoldable_constant dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : tensor<4x4xi32> %init = tensor.empty() : tensor<2x2x2x2xi32> - %pack = tensor.pack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init + %pack = linalg.pack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init : tensor<4x4xi32> -> tensor<2x2x2x2xi32> check.expect_eq_const(%pack, dense<[[[[0, 1], [4, 5]], [[2, 3], [6, 7]]], [[[8, 9], [12, 13]], [[10 ,11], [14, 15]]]]> : tensor<2x2x2x2xi32>) : tensor<2x2x2x2xi32> return @@ -41,7 +41,7 @@ func.func @dynamic_pack_simple() { %out_d0 = arith.ceildivui %in_d0, %c2 : index %out_d1 = arith.ceildivui %in_d1, %c2 : index %init = tensor.empty(%out_d0, %out_d1) : tensor - %pack = tensor.pack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init + %pack = linalg.pack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init : tensor -> tensor %cast = tensor.cast %pack : tensor to tensor<2x2x2x2xi32> check.expect_eq_const(%cast, dense<[[[[0, 1], [4, 5]], [[2, 3], [6, 7]]], [[[8, 9], [12, 13]], [[10 ,11], [14, 15]]]]> : tensor<2x2x2x2xi32>) : tensor<2x2x2x2xi32> @@ -52,7 +52,7 @@ func.func @static_pack_simple_pad_mode() { %iree_input = util.unfoldable_constant dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : tensor<4x4xi32> %pad = arith.constant 0 : i32 %init = tensor.empty() : tensor<2x2x3x3xi32> - %pack = tensor.pack %iree_input padding_value(%pad : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init + %pack = linalg.pack %iree_input padding_value(%pad : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init : tensor<4x4xi32> -> tensor<2x2x3x3xi32> // After padding, the input is // 0, 1, 2, 3, 0, 0 @@ -83,7 +83,7 @@ func.func @dynamic_pack_simple_pad_mode() { %out_d0 = arith.ceildivui %in_d0, %c3 : index %out_d1 = arith.ceildivui %in_d1, %c3 : index %init = tensor.empty(%out_d0, %out_d1) : tensor - %pack = tensor.pack %iree_input padding_value(%pad : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init + %pack = linalg.pack %iree_input padding_value(%pad : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init : tensor -> tensor %cast = tensor.cast %pack : tensor to tensor<2x2x3x3xi32> check.expect_eq_const(%cast, dense<[[[[0, 1, 2], [4, 5, 6], [8, 9, 10]], @@ -100,7 +100,7 @@ func.func @static_pack_large() { %source = tensor.cast %0 : tensor to tensor<128x256xi32> %init_pack = tensor.empty() : tensor<4x16x32x16xi32> - %pack = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_pack + %pack = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_pack : tensor<128x256xi32> -> tensor<4x16x32x16xi32> // Pack without padding is just a reshape followed by a transpose. @@ -121,7 +121,7 @@ func.func @static_pack_transpose_inner_dims_large() { %source = tensor.cast %0 : tensor to tensor<128x256xi32> %init_pack = tensor.empty() : tensor<4x16x16x32xi32> - %pack = tensor.pack %source inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack + %pack = linalg.pack %source inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack : tensor<128x256xi32> -> tensor<4x16x16x32xi32> %reshape = tensor.expand_shape %source [[0, 1], [2, 3]] output_shape [4, 32, 16, 16] : tensor<128x256xi32> into tensor<4x32x16x16xi32> %init_transpose = tensor.empty() : tensor<4x16x16x32xi32> @@ -142,7 +142,7 @@ func.func @static_pack_pad_large() { %padding_value = arith.constant 42 : i32 %init_pack = tensor.empty() : tensor<4x16x32x16xi32> - %pack = tensor.pack %source padding_value(%padding_value : i32) + %pack = linalg.pack %source padding_value(%padding_value : i32) inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_pack : tensor<100x250xi32> -> tensor<4x16x32x16xi32> @@ -169,7 +169,7 @@ func.func @static_pack_pad_transpose_outer_dims_large() { %padding_value = arith.constant 42 : i32 %init_pack = tensor.empty() : tensor<16x4x32x16xi32> - %pack = tensor.pack %source padding_value(%padding_value : i32) + %pack = linalg.pack %source padding_value(%padding_value : i32) outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_pack : tensor<100x250xi32> -> tensor<16x4x32x16xi32> @@ -196,7 +196,7 @@ func.func @static_pack_pad_transpose_inner_dims_large() { %padding_value = arith.constant 42 : i32 %init_pack = tensor.empty() : tensor<4x16x16x32xi32> - %pack = tensor.pack %source padding_value(%padding_value : i32) + %pack = linalg.pack %source padding_value(%padding_value : i32) inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack : tensor<100x250xi32> -> tensor<4x16x16x32xi32> @@ -223,7 +223,7 @@ func.func @static_pack_pad_transpose_inner_and_outer_dims_large() { %padding_value = arith.constant 42 : i32 %init_pack = tensor.empty() : tensor<16x4x16x32xi32> - %pack = tensor.pack %source padding_value(%padding_value : i32) + %pack = linalg.pack %source padding_value(%padding_value : i32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack : tensor<100x250xi32> -> tensor<16x4x16x32xi32> diff --git a/tests/e2e/tensor_ops/pack_dynamic_inner_tiles.mlir b/tests/e2e/tensor_ops/pack_dynamic_inner_tiles.mlir index b3a29ebdf545..6b3156d25366 100644 --- a/tests/e2e/tensor_ops/pack_dynamic_inner_tiles.mlir +++ b/tests/e2e/tensor_ops/pack_dynamic_inner_tiles.mlir @@ -32,7 +32,7 @@ func.func @fully_dynamic_pack_simple() { %out_d0 = arith.ceildivui %in_d0, %c2 : index %out_d1 = arith.ceildivui %in_d1, %c2 : index %init = tensor.empty(%out_d0, %out_d1, %c2, %c2) : tensor - %pack = tensor.pack %iree_input inner_dims_pos = [0, 1] inner_tiles = [%c2, %c2] into %init + %pack = linalg.pack %iree_input inner_dims_pos = [0, 1] inner_tiles = [%c2, %c2] into %init : tensor -> tensor %cast = tensor.cast %pack : tensor to tensor<2x2x2x2xi32> check.expect_eq_const(%cast, dense<[[[[0, 1], [4, 5]], [[2, 3], [6, 7]]], [[[8, 9], [12, 13]], [[10 ,11], [14, 15]]]]> : tensor<2x2x2x2xi32>) : tensor<2x2x2x2xi32> @@ -50,7 +50,7 @@ func.func @fully_dynamic_pack_pad_transpose_inner_and_outer_dims_large() { %tiled_d0 = arith.ceildivui %d0, %c32 : index %tiled_d1 = arith.ceildivui %d1, %c16 : index %init_pack = tensor.empty(%tiled_d1, %tiled_d0, %c16, %c32) : tensor - %pack = tensor.pack %source padding_value(%padding_value : i32) + %pack = linalg.pack %source padding_value(%padding_value : i32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [%c16, %c32] into %init_pack : tensor -> tensor %cast_pack = tensor.cast %pack : tensor to tensor<16x4x16x32xi32> @@ -85,7 +85,7 @@ func.func @dynamic_pack_large() { %tiled_d0 = arith.ceildivui %d0, %c32 : index %tiled_d1 = arith.ceildivui %d1, %c16 : index %dyn_init_pack = tensor.empty(%tiled_d0, %tiled_d1) : tensor - %pack = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dyn_init_pack + %pack = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dyn_init_pack : tensor -> tensor %cast_pack = tensor.cast %pack : tensor to tensor<4x16x32x16xi32> @@ -113,7 +113,7 @@ func.func @dynamic_pack_transpose_inner_dims_large() { %tiled_d0 = arith.ceildivui %d0, %c32 : index %tiled_d1 = arith.ceildivui %d1, %c16 : index %dyn_init_pack = tensor.empty(%tiled_d0, %tiled_d1) : tensor - %pack = tensor.pack %source inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %dyn_init_pack + %pack = linalg.pack %source inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %dyn_init_pack : tensor -> tensor %cast_pack = tensor.cast %pack : tensor to tensor<4x16x16x32xi32> @@ -143,7 +143,7 @@ func.func @dynamic_pack_pad_large() { %tiled_d0 = arith.ceildivui %d0, %c32 : index %tiled_d1 = arith.ceildivui %d1, %c16 : index %dyn_init_pack = tensor.empty(%tiled_d0, %tiled_d1) : tensor - %pack = tensor.pack %source padding_value(%padding_value : i32) + %pack = linalg.pack %source padding_value(%padding_value : i32) inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dyn_init_pack : tensor -> tensor %cast_pack = tensor.cast %pack : tensor to tensor<4x16x32x16xi32> @@ -178,7 +178,7 @@ func.func @dynamic_pack_pad_transpose_outer_dims_large() { %tiled_d0 = arith.ceildivui %d0, %c32 : index %tiled_d1 = arith.ceildivui %d1, %c16 : index %dyn_init_pack = tensor.empty(%tiled_d1, %tiled_d0) : tensor - %pack = tensor.pack %source padding_value(%padding_value : i32) + %pack = linalg.pack %source padding_value(%padding_value : i32) outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dyn_init_pack : tensor -> tensor %cast_pack = tensor.cast %pack : tensor to tensor<16x4x32x16xi32> @@ -213,7 +213,7 @@ func.func @dynamic_pack_pad_transpose_inner_dims_large() { %tiled_d0 = arith.ceildivui %d0, %c32 : index %tiled_d1 = arith.ceildivui %d1, %c16 : index %init_pack = tensor.empty(%tiled_d0, %tiled_d1) : tensor - %pack = tensor.pack %source padding_value(%padding_value : i32) + %pack = linalg.pack %source padding_value(%padding_value : i32) inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack : tensor -> tensor %cast_pack = tensor.cast %pack : tensor to tensor<4x16x16x32xi32> @@ -249,7 +249,7 @@ func.func @dynamic_pack_pad_transpose_inner_and_outer_dims_large() { %tiled_d0 = arith.ceildivui %d0, %c32 : index %tiled_d1 = arith.ceildivui %d1, %c16 : index %init_pack = tensor.empty(%tiled_d1, %tiled_d0) : tensor - %pack = tensor.pack %source padding_value(%padding_value : i32) + %pack = linalg.pack %source padding_value(%padding_value : i32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack : tensor -> tensor %cast_pack = tensor.cast %pack : tensor to tensor<16x4x16x32xi32> diff --git a/tests/e2e/tensor_ops/pack_i8.mlir b/tests/e2e/tensor_ops/pack_i8.mlir index cd8016906a43..07312c0ca711 100644 --- a/tests/e2e/tensor_ops/pack_i8.mlir +++ b/tests/e2e/tensor_ops/pack_i8.mlir @@ -30,7 +30,7 @@ func.func @static_pack_vnni_lhs_large() { %source = tensor.cast %0 : tensor to tensor<128x256xi8> %init_pack = tensor.empty() : tensor<8x128x16x2xi8> - %pack = tensor.pack %source + %pack = linalg.pack %source outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %init_pack : tensor<128x256xi8> -> tensor<8x128x16x2xi8> @@ -52,7 +52,7 @@ func.func @static_pack_vnni_rhs_large() { %source = tensor.cast %0 : tensor to tensor<256x512xi8> %init_pack = tensor.empty() : tensor<32x128x16x2xi8> - %pack = tensor.pack %source + %pack = linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %init_pack : tensor<256x512xi8> -> tensor<32x128x16x2xi8> @@ -75,7 +75,7 @@ func.func @static_pack_vnni_lhs_large_with_pad() { %c0_i8 = arith.constant 0 : i8 %init_pack = tensor.empty() : tensor<8x128x16x2xi8> - %pack = tensor.pack %source padding_value(%c0_i8 : i8) + %pack = linalg.pack %source padding_value(%c0_i8 : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %init_pack : tensor<127x255xi8> -> tensor<8x128x16x2xi8> @@ -101,7 +101,7 @@ func.func @static_pack_vnni_rhs_large_with_pad() { %c0_i8 = arith.constant 0 : i8 %init_pack = tensor.empty() : tensor<32x128x16x2xi8> - %pack = tensor.pack %source padding_value(%c0_i8 : i8) + %pack = linalg.pack %source padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %init_pack : tensor<255x511xi8> -> tensor<32x128x16x2xi8> diff --git a/tests/e2e/tensor_ops/unpack.mlir b/tests/e2e/tensor_ops/unpack.mlir index e5b0df40a81d..8a8af619c693 100644 --- a/tests/e2e/tensor_ops/unpack.mlir +++ b/tests/e2e/tensor_ops/unpack.mlir @@ -24,7 +24,7 @@ func.func private @generate_4D_source(%d0: index, %d1: index, %d2: index, %d3: i func.func @static_unpack_simple() { %iree_input = util.unfoldable_constant dense<[[[[0, 1], [4, 5]], [[2, 3], [6, 7]]], [[[8, 9], [12, 13]], [[10 ,11], [14, 15]]]]> : tensor<2x2x2x2xi32> %init = tensor.empty() : tensor<4x4xi32> - %unpack = tensor.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init + %unpack = linalg.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init : tensor<2x2x2x2xi32> -> tensor<4x4xi32> check.expect_eq_const(%unpack, dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : tensor<4x4xi32>) : tensor<4x4xi32> return @@ -40,7 +40,7 @@ func.func @dynamic_unpack_simple() { %out_d0 = arith.muli %in_d0, %c2 : index %out_d1 = arith.muli %in_d1, %c2 : index %init = tensor.empty(%out_d0, %out_d1) : tensor - %unpack = tensor.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init + %unpack = linalg.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init : tensor -> tensor %cast = tensor.cast %unpack : tensor to tensor<4x4xi32> check.expect_eq_const(%cast, dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : tensor<4x4xi32>) : tensor<4x4xi32> @@ -53,7 +53,7 @@ func.func @static_unpack_simple_extract_slice() { [[[12, 13, 14], [0, 0, 0], [0, 0, 0]], [[15, 0, 0], [0, 0, 0], [0, 0, 0]]]]> : tensor<2x2x3x3xi32> %init = tensor.empty() : tensor<4x4xi32> - %unpack = tensor.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init + %unpack = linalg.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init : tensor<2x2x3x3xi32> -> tensor<4x4xi32> check.expect_eq_const(%unpack, dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : tensor<4x4xi32>) : tensor<4x4xi32> return @@ -75,7 +75,7 @@ func.func @dynamic_unpack_simple_extract_slice() { %out_d0 = arith.subi %full_out_d0, %c2 : index %out_d1 = arith.subi %full_out_d1, %c2 : index %init = tensor.empty(%out_d0, %out_d1) : tensor - %unpack = tensor.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init + %unpack = linalg.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init : tensor -> tensor %cast = tensor.cast %unpack : tensor to tensor<4x4xi32> check.expect_eq_const(%cast, dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : tensor<4x4xi32>) : tensor<4x4xi32> @@ -91,7 +91,7 @@ func.func @static_unpack_large() { %source = tensor.cast %0 : tensor to tensor<4x16x32x16xi32> %init_unpack = tensor.empty() : tensor<128x256xi32> - %unpack = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack + %unpack = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack : tensor<4x16x32x16xi32> -> tensor<128x256xi32> %init_transpose = tensor.empty() : tensor<4x32x16x16xi32> @@ -116,7 +116,7 @@ func.func @dynamic_unpack_large() { %packed_d0 = util.unfoldable_constant 128 : index %packed_d1 = util.unfoldable_constant 256 : index %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor - %unpack = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack + %unpack = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack : tensor -> tensor %cast_unpack = tensor.cast %unpack : tensor to tensor<128x256xi32> @@ -144,7 +144,7 @@ func.func @dynamic_unpack_transpose_inner_dims_large() { %packed_d0 = util.unfoldable_constant 128 : index %packed_d1 = util.unfoldable_constant 256 : index %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor - %unpack = tensor.unpack %source inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack + %unpack = linalg.unpack %source inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack : tensor -> tensor %cast_unpack = tensor.cast %unpack : tensor to tensor<128x256xi32> @@ -173,7 +173,7 @@ func.func @dynamic_unpack_transpose_outer_dims_large() { %packed_d0 = util.unfoldable_constant 128 : index %packed_d1 = util.unfoldable_constant 256 : index %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor - %unpack = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack + %unpack = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack : tensor -> tensor %cast_unpack = tensor.cast %unpack : tensor to tensor<128x256xi32> @@ -202,7 +202,7 @@ func.func @dynamic_unpack_transpose_inner_and_outer_dims_large() { %packed_d0 = util.unfoldable_constant 128 : index %packed_d1 = util.unfoldable_constant 256 : index %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor - %unpack = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack + %unpack = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack : tensor -> tensor %cast_unpack = tensor.cast %unpack : tensor to tensor<128x256xi32> @@ -229,7 +229,7 @@ func.func @static_unpack_extract_slice_large() { %source = tensor.cast %0 : tensor to tensor<4x16x32x16xi32> %init_unpack = tensor.empty() : tensor<100x250xi32> - %unpack = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack + %unpack = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack : tensor<4x16x32x16xi32> -> tensor<100x250xi32> %init_transpose = tensor.empty() : tensor<4x32x16x16xi32> @@ -256,7 +256,7 @@ func.func @dynamic_unpack_extract_slice_large() { %packed_d0 = util.unfoldable_constant 100 : index %packed_d1 = util.unfoldable_constant 250 : index %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor - %unpack = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack + %unpack = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack : tensor -> tensor %cast_unpack = tensor.cast %unpack : tensor to tensor<100x250xi32> @@ -284,7 +284,7 @@ func.func @static_unpack_extract_slice_transpose_inner_dims_large() { %source = tensor.cast %0 : tensor to tensor<4x16x16x32xi32> %init_unpack = tensor.empty() : tensor<100x250xi32> - %unpack = tensor.unpack %source + %unpack = linalg.unpack %source inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack : tensor<4x16x16x32xi32> -> tensor<100x250xi32> @@ -310,7 +310,7 @@ func.func @static_unpack_extract_slice_transpose_outer_dims_large() { %source = tensor.cast %0 : tensor to tensor<16x4x32x16xi32> %init_unpack = tensor.empty() : tensor<100x250xi32> - %unpack = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack + %unpack = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack : tensor<16x4x32x16xi32> -> tensor<100x250xi32> %init_transpose = tensor.empty() : tensor<4x32x16x16xi32> @@ -335,7 +335,7 @@ func.func @static_unpack_extract_slice_transpose_inner_and_outer_dims_large() { %source = tensor.cast %0 : tensor to tensor<16x4x16x32xi32> %init_unpack = tensor.empty() : tensor<100x250xi32> - %unpack = tensor.unpack %source + %unpack = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack : tensor<16x4x16x32xi32> -> tensor<100x250xi32> @@ -364,7 +364,7 @@ func.func @dynamic_unpack_extract_slice_transpose_inner_dims_large() { %packed_d0 = util.unfoldable_constant 100 : index %packed_d1 = util.unfoldable_constant 250 : index %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor - %unpack = tensor.unpack %source + %unpack = linalg.unpack %source inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack : tensor -> tensor %cast_unpack = tensor.cast %unpack : tensor to tensor<100x250xi32> @@ -395,7 +395,7 @@ func.func @dynamic_unpack_extract_slice_transpose_outer_dims_large() { %packed_d0 = util.unfoldable_constant 100 : index %packed_d1 = util.unfoldable_constant 250 : index %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor - %unpack = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack + %unpack = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack : tensor -> tensor %cast_unpack = tensor.cast %unpack : tensor to tensor<100x250xi32> @@ -425,7 +425,7 @@ func.func @dynamic_unpack_extract_slice_transpose_inner_and_outer_dims_large() { %packed_d0 = util.unfoldable_constant 100 : index %packed_d1 = util.unfoldable_constant 250 : index %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor - %unpack = tensor.unpack %source + %unpack = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack : tensor -> tensor %cast_unpack = tensor.cast %unpack : tensor to tensor<100x250xi32>