diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td index 98a5fd278a997..f5cf53117d773 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td @@ -178,6 +178,16 @@ def LinalgConvolutionOpInterface : OpInterface<"ConvolutionOpInterface"> { ]; } +def LinalgRelayoutOpInterface : OpInterface<"RelayoutOpInterface"> { + let description = [{ + A Linalg relayout-op is either linalg.pack or linalg.unpack. + + While we could extend this interface with methods from Linalg_RelayoutOp, + this is currently not needed and left as a TODO. + }]; + let cppNamespace = "::mlir::linalg"; +} + def LinalgFillOpInterface : OpInterface<"FillOpInterface"> { let description = [{ A fill operation is defined in general terms: diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td index 6fbc7c0c25e6f..a08a778fc25e1 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td @@ -22,6 +22,7 @@ include "mlir/Dialect/Linalg/IR/LinalgBase.td" include "mlir/Interfaces/DestinationStyleOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Interfaces/InferTypeOpInterface.td" +include "mlir/Dialect/Linalg/IR/LinalgInterfaces.td" include "mlir/IR/OpAsmInterface.td" //===----------------------------------------------------------------------===// @@ -31,7 +32,7 @@ include "mlir/IR/OpAsmInterface.td" class Linalg_RelayoutOp traits = []> : Op, - DestinationStyleOpInterface, + DestinationStyleOpInterface, LinalgRelayoutOpInterface, ConditionallySpeculatable, NoMemoryEffect, DeclareOpInterfaceMethods, TypesMatchWith<"result type matches type of dest", diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index e86d175489775..12080cee85c9d 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -45,7 +45,7 @@ def ApplyDecomposeTensorPackUnpackPatternsOp : Op]> { let description = [{ - Collect patterns to decompose tensor.pack and tensor.unpack into e.g. + Collect patterns to decompose linalg.pack and linalg.unpack into e.g. tensor::PadOp, linalg::transposeOp Ops. Requires all outer dims to be unit. }]; @@ -126,6 +126,28 @@ def ApplyPadVectorizationPatternsOp : Op]> { + let description = [{ + Indicates that operations like tensor.pad and tensor.extract_slice should + be folded into linalg.pack and linalg.unpack operations, respectively. + }]; + + let assemblyFormat = "attr-dict"; +} + +def ApplyFoldPackUnpackIntoEmptyPatternsOp : Op]> { + let description = [{ + // TODO: + }]; + + let arguments = (ins DefaultValuedAttr:$fold_single_use_only); + let assemblyFormat = "attr-dict"; +} + //===----------------------------------------------------------------------===// // BufferizeToAllocationOp //===----------------------------------------------------------------------===// @@ -547,19 +569,18 @@ def LowerPackOp : Op { let description = [{ - Rewrite a tensor.pack into tensor.pad + tensor.expand_shape + linalg.transpose. + Rewrite a linalg.pack into tensor.pad + tensor.expand_shape + linalg.transpose. #### Return modes - This operation ignores non-pack ops and drops them in the return. - This operation produces a silenceable failure if the rewrite fails for any - reason. - If all the operations referred to by the `target` are rewritten, the - transform succeeds. - Return handles to the newly produced pad, expand_shape and transpose ops. + This operation ignores non-pack ops and drops them in the return. This + operation produces a silenceable failure if the rewrite fails for any + reason. If all the operations referred to by the `target` are rewritten, + the transform succeeds. Return handles to the newly produced pad, + expand_shape and transpose ops. }]; - let arguments = (ins Transform_ConcreteOpType<"tensor.pack">:$target, + let arguments = (ins Transform_ConcreteOpType<"linalg.pack">:$target, DefaultValuedAttr:$lowerPadLikeWithInsertSlice); let results = (outs Transform_ConcreteOpType<"tensor.pad">:$pad_op, Transform_ConcreteOpType<"tensor.expand_shape">:$expand_shape_op, @@ -571,7 +592,7 @@ def LowerPackOp : Op { let description = [{ - Lower a tensor.unpack into empty + linalg.transpose + tensor.collapse_shape + + Lower a linalg.unpack into empty + linalg.transpose + tensor.collapse_shape + tensor.extract_slice. #### Return modes - This operation ignores non-unpack ops and drops them in the return. - This operation produces a silenceable failure if the rewrite fails for any - reason. - If all the operations referred to by the `target` are rewritten, the - transform succeeds. - Return handles to the newly produced empty, transpose, collapse_shape and extract_slice ops. + This operation ignores non-unpack ops and drops them in the return. This + operation produces a silenceable failure if the rewrite fails for any + reason. If all the operations referred to by the `target` are rewritten, + the transform succeeds. Return handles to the newly produced empty, + transpose, collapse_shape and extract_slice ops. }]; - let arguments = (ins Transform_ConcreteOpType<"tensor.unpack">:$target, + let arguments = (ins Transform_ConcreteOpType<"linalg.unpack">:$target, DefaultValuedAttr:$lowerUnpadLikeWithExtractSlice); let results = (outs Transform_ConcreteOpType<"tensor.empty">:$empty_op, Transform_ConcreteOpType<"linalg.transpose">:$transpose_op, @@ -613,7 +633,7 @@ def LowerUnPackOp : Op, ReportTrackingListenerFailuresOpTrait]> { let description = [{ - Apply a transposition to a single `tensor.pack` (resp. `tensor.unpack`) and + Apply a transposition to a single `linalg.pack` (resp. `linalg.unpack`) and update the `linalg.generic` op that consumes (resp. produces) the operation. This transform allows composing a simple `structured.pack` with additional @@ -989,19 +1009,19 @@ def PackTransposeOp : Op lowerPack(RewriterBase &rewriter, - tensor::PackOp packOp, + linalg::PackOp packOp, bool lowerPadLikeWithInsertSlice = true); struct LowerUnPackOpResult { @@ -1133,14 +1133,14 @@ struct LowerUnPackOpResult { /// Rewrite pack as empty + transpose + reshape + extract_slice. FailureOr -lowerUnPack(RewriterBase &rewriter, tensor::UnPackOp unPackOp, +lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp, bool lowerUnpadLikeWithExtractSlice = true); /// Struct to hold the result of a `pack` call. struct PackResult { - SmallVector packOps; + SmallVector packOps; linalg::LinalgOp packedLinalgOp; - SmallVector unPackOps; + SmallVector unPackOps; }; /// Implement packing of a single LinalgOp by `packedSizes`. /// There must be one packedSizes entry per `linalgOp` iterator. @@ -1150,9 +1150,9 @@ FailureOr pack(RewriterBase &rewriter, linalg::LinalgOp linalgOp, /// Struct to hold the result of a `packTranspose` call. struct PackTransposeResult { - tensor::PackOp transposedPackOp; + linalg::PackOp transposedPackOp; linalg::LinalgOp transposedLinalgOp; - tensor::UnPackOp transposedUnPackOp; + linalg::UnPackOp transposedUnPackOp; }; /// Transpose a single PackOp -> LinalgOp -> UnPackOp chain and return the /// transposed PackOp -> LinalgOp -> UnPackOp chain after replacements. @@ -1163,8 +1163,8 @@ struct PackTransposeResult { /// 3. `outerPerm` (resp. `innerPerm`) must be valid permutations of /// `packOp.getOuterDimsPerm` (resp. `packOp.getInnerDimsPerm`) or empty. FailureOr -packTranspose(RewriterBase &rewriter, tensor::PackOp packOp, - linalg::LinalgOp linalgOp, tensor::UnPackOp maybeUnPackOp, +packTranspose(RewriterBase &rewriter, linalg::PackOp packOp, + linalg::LinalgOp linalgOp, linalg::UnPackOp maybeUnPackOp, ArrayRef outerPerm, ArrayRef innerPerm); /// Pack a LinalgOp by greedily inferring matmul dimensions (m, n, k) where m @@ -1525,15 +1525,15 @@ struct DecomposePadOpPattern : public OpRewritePattern { const SmallVector &dynSizes) const; }; -/// Rewrites a tensor::PackOp into a sequence of: +/// Rewrites a linalg::PackOp into a sequence of: /// * tensor::PadOp + linalg::TransposeOp + tensor::EmptyOp + /// tensor::InsertSliceOp ops. /// -/// Requires that all the outer dims of the input tensor::PackOp are 1. +/// Requires that all the outer dims of the input linalg::PackOp are 1. /// /// Before: /// ``` -/// %packed = tensor.pack %input +/// %packed = linalg.pack %input /// padding_value(%pad : f32) /// inner_dims_pos = [1, 0] /// inner_tiles = [2, %high] @@ -1559,20 +1559,20 @@ struct DecomposePadOpPattern : public OpRewritePattern { /// : tensor<2x?xf32> into tensor<1x1x2x?xf32> /// ``` struct DecomposeOuterUnitDimsPackOpPattern - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(tensor::PackOp packOp, + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(linalg::PackOp packOp, PatternRewriter &rewriter) const override; }; -/// Rewrites a tensor::UnPackOp into a sequence of rank-reduced +/// Rewrites a linalg::UnPackOp into a sequence of rank-reduced /// * tensor::ExtractSliceOp + linalg::TransposeOp + tensor::InsertSliceOp /// -/// Requires that all the outer dims of the input tensor::PackOp are 1. +/// Requires that all the outer dims of the input linalg::PackOp are 1. /// /// Before: /// ``` -/// %packed = tensor.unpack %input +/// %packed = linalg.unpack %input /// inner_dims_pos = [1, 0] /// inner_tiles = [2, 8] /// into %output : tensor<1x1x2x8xf32> -> tensor<5x1xf32> @@ -1593,9 +1593,9 @@ struct DecomposeOuterUnitDimsPackOpPattern /// : tensor<8x2xf32> to tensor<5x1xf32> /// ``` struct DecomposeOuterUnitDimsUnPackOpPattern - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(tensor::UnPackOp unpackOp, + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(linalg::UnPackOp unpackOp, PatternRewriter &rewriter) const override; }; @@ -1717,7 +1717,7 @@ void populateLinalgGenericOpsSpecializationPatterns( void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns, PatternBenefit benefit = 1); -/// Populates patterns to decompose tensor.pack and tensor.unpack Ops into e.g. +/// Populates patterns to decompose linalg.pack and linalg.unpack Ops into e.g. /// tensor.pad, linalg.transpose, tensor.{insert|extract}_slice. Require all /// outer dims to be unit. void populateDecomposePackUnpackPatterns(RewritePatternSet &patterns); @@ -1779,7 +1779,7 @@ void populateElementwiseOpsFusionPatterns( RewritePatternSet &patterns, const ControlFusionFn &controlElementwiseOpFusion); -/// Function type which is used to control propagation of tensor.pack/unpack +/// Function type which is used to control propagation of linalg.pack/unpack /// ops. using ControlPropagationFn = std::function; @@ -1888,6 +1888,19 @@ void populateDecomposeWinogradOpsPatterns(RewritePatternSet &patterns); /// convert to a `linalg.dot`. void populateContractionOpRankReducingPatterns(RewritePatternSet &patterns); +/// Populates `patterns` with patterns that fold operations like `tensor.pad` +/// and `tensor.extract_slice` into `tensor.pack` and `tensor.unpack` operations +/// respectively. +void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns); + +/// Populates `patterns` with patterns that fold operations like `linalg.pack` +/// and `linalg.unpack` into `tensor.empty`. +void populateFoldPackUnpackIntoTensorEmptyPatterns(RewritePatternSet &patterns); + +/// Populates `patterns` with patterns that simplify `tensor.pack` and +/// `tensor.unpack` operations. +void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns); + } // namespace linalg } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index 1e4f3004dec7e..80aa034d2199d 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -33,6 +33,24 @@ namespace linalg { //===----------------------------------------------------------------------===// // Utilities for inferring various semantics properties of Linalg ops. //===----------------------------------------------------------------------===// +/// Shell function to compute the Destination Permutation of PackOp +/// This function uses the helper function `computePackUnPackPerm` to get +/// the permutation vector. Only major difference between UnPack and Pack is +/// that packOp uses destination rank whereas unpack Uses source rank. +SmallVector getPackInverseDestPerm(linalg::PackOp packOp); + +/// Shell function to compute the Source Permutation of unPackOp. +/// This function, like the getPackInverseDestPerm uses the helper function +/// computePackUnPackPerm` to get the permutation vector. +/// Only major difference between UnPack and Pack is that packOp uses +/// destination rank whereas unpack Uses source rank. +SmallVector getUnPackInverseSrcPerm(linalg::UnPackOp unpackOp); + +/// Shell function to compute the Source rank permutation for unpackOp +/// Unpack requires some packing metadata data information, so created +/// another function where this value is passed by reference. +SmallVector getUnPackInverseSrcPerm(linalg::UnPackOp, + PackingMetadata &metadata); //===----------------------------------------------------------------------===// // General utilities diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td index 81bab1b0c82f7..fcb10f55d556d 100644 --- a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td +++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td @@ -53,16 +53,6 @@ def ApplyFoldTensorEmptyPatternsOp : Op:$fold_single_use_only); let assemblyFormat = "attr-dict"; } -def ApplyFoldIntoPackAndUnpackPatternsOp : Op]> { - let description = [{ - Indicates that operations like tensor.pad and tensor.extract_slice should - be folded into tensor.pack and tensor.unpack operations, respectively. - }]; - - let assemblyFormat = "attr-dict"; -} def ApplyFoldTensorSubsetOpsPatternsOp : Op; /// Populates `patterns` with patterns that replace tensor ops (such as diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 1aabb693de57c..075887ecc0afe 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -22,6 +22,7 @@ #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tensor/Utils/Utils.h" #include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" @@ -847,7 +848,7 @@ struct FoldFillWithTensorExtract : public OpRewritePattern { /// 1. The pack op does not have padding value, or /// 2. The filled value and padding value are the same. static FailureOr foldFillPackIntoFillOp(RewriterBase &rewriter, - tensor::PackOp packOp) { + linalg::PackOp packOp) { auto fillOp = packOp.getSource().getDefiningOp(); if (!fillOp) return failure(); @@ -865,12 +866,12 @@ static FailureOr foldFillPackIntoFillOp(RewriterBase &rewriter, } /// Wrapper pattern that applies foldFillPackIntoFillOp method. -struct FoldFillWithPack : public OpRewritePattern { +struct FoldFillWithPack : public OpRewritePattern { public: FoldFillWithPack(MLIRContext *context) - : OpRewritePattern(context) {} + : OpRewritePattern(context) {} - LogicalResult matchAndRewrite(tensor::PackOp packOp, + LogicalResult matchAndRewrite(linalg::PackOp packOp, PatternRewriter &rewriter) const override { auto fillOp = foldFillPackIntoFillOp(rewriter, packOp); if (failed(fillOp)) @@ -3414,20 +3415,9 @@ FailureOr WinogradOutputTransformOp::getTiledImplementation( //===----------------------------------------------------------------------===// // LinalgDialect +// TODO: Merge with the LinalgDialect block at the bottom //===----------------------------------------------------------------------===// -void LinalgDialect::getCanonicalizationPatterns( - RewritePatternSet &results) const { - results.add(getContext()); -} - -Operation *LinalgDialect::materializeConstant(OpBuilder &builder, - Attribute value, Type type, - Location loc) { - return arith::ConstantOp::materialize(builder, value, type, loc); -} - // Returns true if the result expression of `subMap` are a subset of `fullMap`. static bool areResultExprsSubsetOf(AffineMap subMap, AffineMap fullMap) { auto explicitRange = subMap.getResults(); @@ -4064,6 +4054,78 @@ Speculation::Speculatability BatchMatmulOp::getSpeculatability() { //===----------------------------------------------------------------------===// // PackOp/UnPackOp Common //===----------------------------------------------------------------------===// +// FIXME: Duplicates similar hook from TensorOps.cpp! +bool foldTensorCastPrecondition(DestinationStyleOpInterface op) { + // If no operand comes from a tensor::CastOp and can be folded then fail. + bool hasTensorCastOperand = + llvm::any_of(op->getOpOperands(), [&](OpOperand &opOperand) { + if (llvm::isa(opOperand.get())) + return false; + auto castOp = opOperand.get().getDefiningOp(); + return castOp && canFoldIntoConsumerOp(castOp); + }); + + return hasTensorCastOperand; +} + +// FIXME: Duplicates similar hook from TensorOps.cpp! +static SmallVector getNewOperands(DestinationStyleOpInterface op, + SmallVector &newResTy) { + SmallVector newOperands; + newOperands.reserve(op->getNumOperands()); + + // Assumes that the result has dpsInits followed by nonDpsInits. + int64_t dpsInitIdx = 0; + for (OpOperand &opOperand : op->getOpOperands()) { + auto tensorCastOp = opOperand.get().getDefiningOp(); + bool fold = canFoldIntoConsumerOp(tensorCastOp); + newOperands.push_back(fold ? tensorCastOp.getOperand() : opOperand.get()); + if (op.isDpsInit(&opOperand) && + !llvm::isa(newOperands.back().getType())) + newResTy[dpsInitIdx++] = newOperands.back().getType(); + } + return newOperands; +} + +// Given the (potentially) updated packed type, `newPackedTy`, generates an +// updated mixed-tile-sizes attribute. A tile size is updated only +// when: +// * a dim from newPackedTy is static, and +// * the corresponding size from mixedTiles is still dynamic. +// Otherwise, the original tile size is preserved. +// Note - packed-type-dim and mixed-tile-size should always match! +// +// FIXME: Duplicates similar hook from TensorOps.cpp! +static SmallVector +getNewMixedTileSizes(PatternRewriter &rewriter, Type newPackedTy, + SmallVector mixedTiles) { + SmallVector newMixedTileSizes; + for (auto it : llvm::zip(cast(newPackedTy) + .getShape() + .take_back(mixedTiles.size()), + mixedTiles)) { + int64_t shape = std::get<0>(it); + if (shape == ShapedType::kDynamic) { + newMixedTileSizes.push_back(std::get<1>(it)); + continue; + } + + // If the current result dim is static, update the dynamic mixed-size + // (provided the original value is dynamic). + OpFoldResult tile = std::get<1>(it); + if (Attribute attr = llvm::dyn_cast_if_present(tile)) { + // Already a constant + newMixedTileSizes.push_back(tile); + } else { + assert(getConstantIntValue(tile).value() == shape && + "tile size and dim size don't match!"); + newMixedTileSizes.push_back( + (rewriter.getIntegerAttr(rewriter.getIndexType(), shape))); + } + } + + return newMixedTileSizes; +} template static LogicalResult @@ -4757,6 +4819,58 @@ OpFoldResult PackOp::fold(FoldAdaptor adaptor) { return {}; } +/// Folds a tensor.cast op into a consuming PackOp op if the +/// `tensor.cast` has source that is more static than the consuming op. +/// +/// Example: +/// ```mlir +/// %1 = tensor.cast %0 : tensor<8x16xf32> to tensor +/// %2 = tensor.pack %1 ... : tensor ... +/// ``` +/// +/// folds into: +/// +/// ```mlir +/// %2 = tensor.pack %0 ... : tensor<8x16xf32> ... +/// ``` +struct FoldTensorCastPackOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(PackOp op, + PatternRewriter &rewriter) const override { + if (!foldTensorCastPrecondition(op)) + return failure(); + + SmallVector newResultTypes(op->getResultTypes()); + SmallVector newOperands = getNewOperands(op, newResultTypes); + + // Get the updated mixed-tile-sizes attribute. + SmallVector newMixedTileSizes = + getNewMixedTileSizes(rewriter, newResultTypes[0], op.getMixedTiles()); + + // Clone op. + // TODO: Strictly speaking, discardable attributes should be _discarded_ at + // this point. However, in practice, we use them for things that we'd like + // to preserve. Implement a better abstraction. + PackOp newOp = rewriter.create( + op.getLoc(), newOperands[0], newOperands[1], op.getInnerDimsPos(), + newMixedTileSizes, op.getPaddingValue(), op.getOuterDimsPerm()); + newOp->setDiscardableAttrs(op->getDiscardableAttrDictionary()); + + // Replace op. + Value oldResult = op.getResult(); + Value newResult = newOp.getResult(); + Value replacement = (newResult.getType() != oldResult.getType()) + ? rewriter.create( + op->getLoc(), oldResult.getType(), newResult) + : newResult; + + rewriter.replaceOp(op, {replacement}); + + return success(); + } +}; + //===----------------------------------------------------------------------===// // UnPackOp //===----------------------------------------------------------------------===// @@ -4972,5 +5086,74 @@ OpFoldResult UnPackOp::fold(FoldAdaptor adaptor) { return {}; } +/// Folds a tensor.cast op into a consuming UnPackOp op if the +/// `tensor.cast` has source that is more static than the consuming op. +/// +/// Example: +/// ```mlir +/// %1 = tensor.cast %0 : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32> +/// %2 = tensor.unpack %1 ... : tensor<1x1x?x1xi32> -> tensor<7x?xi32> +/// ``` +/// +/// folds into: +/// +/// ```mlir +/// %2 = tensor.unpack %0 ... tensor<1x1x8x1xi32> -> tensor<7x?xi32> +/// ``` +struct FoldTensorCastUnPackOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(UnPackOp op, + PatternRewriter &rewriter) const override { + if (!foldTensorCastPrecondition(op)) + return failure(); + + SmallVector newResultTypes(op->getResultTypes()); + SmallVector newOperands = getNewOperands(op, newResultTypes); + Value sourceTensor = newOperands[0]; + + // Get the updated mixed-tile-sizes attribute. + SmallVector newMixedTileSizes = getNewMixedTileSizes( + rewriter, sourceTensor.getType(), op.getMixedTiles()); + + // Clone op. + // TODO: Strictly speaking, discardable attributes should be _discarded_ at + // this point. However, in practice, we use them for things that we'd like + // to preserve. Implement a better abstraction. + UnPackOp newOp = rewriter.create( + op.getLoc(), sourceTensor, newOperands[1], op.getInnerDimsPos(), + newMixedTileSizes, op.getOuterDimsPerm()); + newOp->setDiscardableAttrs(op->getDiscardableAttrDictionary()); + + // Replace op. + Value oldResult = op.getResult(); + Value newResult = newOp.getResult(); + Value replacement = (newResult.getType() != oldResult.getType()) + ? rewriter.create( + op->getLoc(), oldResult.getType(), newResult) + : newResult; + + rewriter.replaceOp(op, {replacement}); + + return success(); + } +}; + } // namespace linalg } // namespace mlir + +//===----------------------------------------------------------------------===// +// LinalgDialect +//===----------------------------------------------------------------------===// + +void LinalgDialect::getCanonicalizationPatterns( + RewritePatternSet &results) const { + results.add(getContext()); +} + +Operation *LinalgDialect::materializeConstant(OpBuilder &builder, + Attribute value, Type type, + Location loc) { + return arith::ConstantOp::materialize(builder, value, type, loc); +} diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 51d1df52598c7..2f54e780093a2 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -267,6 +267,16 @@ void transform::ApplyPadVectorizationPatternsOp::populatePatterns( linalg::populatePadOpVectorizationPatterns(patterns); } +void transform::ApplyFoldIntoPackAndUnpackPatternsOp::populatePatterns( + RewritePatternSet &patterns) { + linalg::populateFoldIntoPackAndUnpackPatterns(patterns); +} + +void transform::ApplyFoldPackUnpackIntoEmptyPatternsOp::populatePatterns( + RewritePatternSet &patterns) { + linalg::populateFoldPackUnpackIntoTensorEmptyPatterns(patterns); +} + //===----------------------------------------------------------------------===// // BufferizeToAllocationOp //===----------------------------------------------------------------------===// @@ -1170,7 +1180,7 @@ LogicalResult transform::InterchangeOp::verify() { //===----------------------------------------------------------------------===// DiagnosedSilenceableFailure transform::LowerPackOp::applyToOne( - transform::TransformRewriter &rewriter, tensor::PackOp target, + transform::TransformRewriter &rewriter, linalg::PackOp target, transform::ApplyToEachResultList &transformResults, transform::TransformState &state) { rewriter.setInsertionPoint(target); @@ -1192,7 +1202,7 @@ DiagnosedSilenceableFailure transform::LowerPackOp::applyToOne( //===----------------------------------------------------------------------===// DiagnosedSilenceableFailure transform::LowerUnPackOp::applyToOne( - transform::TransformRewriter &rewriter, tensor::UnPackOp target, + transform::TransformRewriter &rewriter, linalg::UnPackOp target, transform::ApplyToEachResultList &transformResults, transform::TransformState &state) { rewriter.setInsertionPoint(target); @@ -1622,7 +1632,7 @@ bool isValidPackingPermutation( RelayoutOpTy op, ArrayRef permutation, OuterOrInnerPerm outerOrInnerPerm = OuterOrInnerPerm::Outer) { static_assert( - llvm::is_one_of::value, + llvm::is_one_of::value, "applies to only pack or unpack operations"); if (!op || permutation.empty()) return true; @@ -1631,7 +1641,7 @@ bool isValidPackingPermutation( return permutation.size() == innerRank && isPermutationVector(permutation); // op.getOuterDimsPerm() may be empty, in which case it is identity. // Don't rely on it. - if (std::is_same::value) { + if (std::is_same::value) { return permutation.size() == op.getSourceRank() && isPermutationVector(permutation); } @@ -1665,11 +1675,11 @@ transform::PackTransposeOp::apply(transform::TransformRewriter &rewriter, } // Step 2.2. Fail on wrong type. - auto packOp = dyn_cast(*packOrUnpackOps.begin()); - auto unPackOp = dyn_cast(*packOrUnpackOps.begin()); + auto packOp = dyn_cast(*packOrUnpackOps.begin()); + auto unPackOp = dyn_cast(*packOrUnpackOps.begin()); if ((!packOp && !unPackOp)) { return emitSilenceableError() << "requires target to map to a " - "tensor.pack or tensor.unpack"; + "linalg.pack or linalg.unpack"; } LinalgOp linalgOpTarget = dyn_cast(*linalgOps.begin()); if (!linalgOpTarget) @@ -1694,7 +1704,7 @@ transform::PackTransposeOp::apply(transform::TransformRewriter &rewriter, assert(!packOp && "packOp must be null on entry when unPackOp is not null"); OpOperand *packUse = linalgOp.getDpsInitOperand( cast(unPackOp.getSource()).getResultNumber()); - packOp = dyn_cast_or_null(packUse->get().getDefiningOp()); + packOp = dyn_cast_or_null(packUse->get().getDefiningOp()); if (!packOp || !packOp.getResult().hasOneUse()) return emitSilenceableError() << "could not find matching pack op"; } diff --git a/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp b/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp index 7f9a0f7a6ca43..81842e4bea631 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp @@ -88,7 +88,7 @@ static bool validateFullTilesOnDims(linalg::LinalgOp linalgOp, /// Return failure or packed matmul with one of its operands transposed. static FailureOr transposePackedMatmul(RewriterBase &rewriter, linalg::LinalgOp linalgOp, - tensor::PackOp packOp, AffineMap operandMap, + linalg::PackOp packOp, AffineMap operandMap, ArrayRef blocksStartDimPos, bool transposeOuterBlocks, bool transposeInnerBlocks) { assert(operandMap.getNumDims() >= 4 && diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt index 3594b08413812..d18b6f8afc43b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -26,6 +26,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms MeshShardingInterfaceImpl.cpp NamedOpConversions.cpp BlockPackMatmul.cpp + PackAndUnpackPatterns.cpp Padding.cpp Promotion.cpp RuntimeOpVerification.cpp diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp index d79399b6588be..d826f72afa1c1 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp @@ -61,7 +61,7 @@ template static FailureOr getPackingInfoFromOperand(OpOperand *opOperand, linalg::GenericOp genericOp, OpTy packOrUnPackOp) { - static_assert(llvm::is_one_of::value, + static_assert(llvm::is_one_of::value, "applies to only pack or unpack operations"); LLVM_DEBUG( { llvm::dbgs() << "--- Construct PackInfo From an operand ---\n"; }); @@ -210,7 +210,7 @@ static SmallVector computeOuterDims(ArrayRef perm, /// %4 = arith.addf %arg3, %arg4 : f32 /// linalg.yield %4 : f32 /// } -> tensor -/// %1 = tensor.pack %0 +/// %1 = linalg.pack %0 /// inner_dims_pos = [0, 1] /// inner_tiles = [8, 2] /// into %dest : tensor -> tensor @@ -219,7 +219,7 @@ static SmallVector computeOuterDims(ArrayRef perm, /// 8. Thus, the below operation and `affine_map<(d0, d1, d2, d3)> -> /// affine_map<(d1, d3)>` will be returned. /// -/// %pack = tensor.pack %arg0 +/// %pack = linalg.pack %arg0 /// inner_dims_pos = [0] /// inner_tiles = [8] /// into %init : tensor -> tensor @@ -290,9 +290,9 @@ getOrCreatePackedViewOfOperand(OpBuilder &b, Location loc, PackInfo packInfo, if (innerDimsPos.empty() && outerDimsPerm.empty()) return std::make_tuple(opOperand->get(), indexingMap); - auto empty = tensor::PackOp::createDestinationTensor( + auto empty = linalg::PackOp::createDestinationTensor( b, loc, opOperand->get(), innerTileSizes, innerDimsPos, outerDimsPerm); - auto packedOperand = b.create( + auto packedOperand = b.create( loc, opOperand->get(), empty, innerDimsPos, innerTileSizes, /*padding=*/std::nullopt, outerDimsPerm); return std::make_tuple(packedOperand, indexingMap); @@ -327,7 +327,7 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp, return newGenericOp; } -/// Bubbles up tensor.pack op through a producer generic op. This +/// Bubbles up linalg.pack op through a producer generic op. This /// swap pack(generic) to generic(pack). The new generic op works on packed /// domain; pack ops are created for input and output operands. E.g., /// @@ -343,7 +343,7 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp, /// %4 = arith.addf %arg3, %arg3 : f32 /// linalg.yield %4 : f32 /// } -> tensor -/// %4 = tensor.pack %3 +/// %4 = linalg.pack %3 /// inner_dims_pos = [0, 1] /// inner_tiles = [8, 2] /// into %dest : tensor -> tensor @@ -358,7 +358,7 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp, /// %0 = affine.apply #map()[%dim] /// %1 = affine.apply #map1()[%dim_0] /// %2 = tensor.empty(%0, %1) : tensor -/// %pack = tensor.pack %arg0 +/// %pack = linalg.pack %arg0 /// inner_dims_pos = [0, 1] /// inner_tiles = [8, 2] /// into %2 : tensor -> tensor @@ -371,7 +371,7 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp, /// linalg.yield %4 : f32 /// } -> tensor static FailureOr -bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, tensor::PackOp packOp, +bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, linalg::PackOp packOp, const ControlPropagationFn &controlFn) { auto genericOp = packOp.getSource().getDefiningOp(); if (!genericOp) @@ -404,11 +404,11 @@ bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, tensor::PackOp packOp, rewriter.setInsertionPoint(genericOp); // We need to handle two cases: - // 1) The tensor.pack destination is a tensor.empty. If this is the case, we + // 1) The linalg.pack destination is a tensor.empty. If this is the case, we // create a new tensor.empty to avoid breaking dominance, as we are moving the - // tensor.pack above the linalg.generic. + // linalg.pack above the linalg.generic. // 2) The destination is not a tensor.empty. In this case we can replace only - // if the destination of the tensor.pack dominates the linalg.generic. + // if the destination of the linalg.pack dominates the linalg.generic. Value packOpDest = packOp.getDest(); if (!packOpDest.hasOneUse()) return failure(); @@ -453,13 +453,13 @@ bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, tensor::PackOp packOp, /// Wrapper pattern that applies bubbleUpPackOpThroughGenericOp method. struct BubbleUpPackOpThroughGenericOpPattern - : public OpRewritePattern { + : public OpRewritePattern { public: BubbleUpPackOpThroughGenericOpPattern(MLIRContext *context, ControlPropagationFn fun) - : OpRewritePattern(context), controlFn(std::move(fun)) {} + : OpRewritePattern(context), controlFn(std::move(fun)) {} - LogicalResult matchAndRewrite(tensor::PackOp packOp, + LogicalResult matchAndRewrite(linalg::PackOp packOp, PatternRewriter &rewriter) const override { auto genericOp = bubbleUpPackOpThroughGenericOp(rewriter, packOp, controlFn); @@ -473,15 +473,15 @@ struct BubbleUpPackOpThroughGenericOpPattern ControlPropagationFn controlFn; }; -/// Propagate a tensor.pack operation up through a tensor.pad. The idea is to +/// Propagate a linalg.pack operation up through a tensor.pad. The idea is to /// add as many zero padding dimensions in `high` and `low` based on the number /// of point loops. -class BubbleUpPackThroughPadOp final : public OpRewritePattern { +class BubbleUpPackThroughPadOp final : public OpRewritePattern { public: BubbleUpPackThroughPadOp(MLIRContext *context, ControlPropagationFn fun) - : OpRewritePattern(context), controlFn(std::move(fun)) {} + : OpRewritePattern(context), controlFn(std::move(fun)) {} - LogicalResult matchAndRewrite(tensor::PackOp packOp, + LogicalResult matchAndRewrite(linalg::PackOp packOp, PatternRewriter &rewriter) const override { auto padOp = packOp.getSource().getDefiningOp(); if (!padOp) @@ -522,10 +522,10 @@ class BubbleUpPackThroughPadOp final : public OpRewritePattern { ArrayRef outerDimsPerm = packOp.getOuterDimsPerm(); SmallVector mixedTiles = packOp.getMixedTiles(); - auto empty = tensor::PackOp::createDestinationTensor( + auto empty = linalg::PackOp::createDestinationTensor( rewriter, loc, padOp.getSource(), mixedTiles, innerDimsPos, outerDimsPerm); - auto sourcePack = rewriter.create( + auto sourcePack = rewriter.create( loc, padOp.getSource(), empty, innerDimsPos, mixedTiles, /*padding=*/std::nullopt, outerDimsPerm); @@ -549,9 +549,9 @@ class BubbleUpPackThroughPadOp final : public OpRewritePattern { // If the pad has more than one user, create an unpack on the new pad to // replace the other uses. if (!padOp->hasOneUse()) { - auto unpackEmpty = tensor::UnPackOp::createDestinationTensor( + auto unpackEmpty = linalg::UnPackOp::createDestinationTensor( rewriter, loc, newPadOp, mixedTiles, innerDimsPos, outerDimsPerm); - Value unpackedPad = rewriter.create( + Value unpackedPad = rewriter.create( loc, newPadOp, unpackEmpty, innerDimsPos, mixedTiles, outerDimsPerm); rewriter.replaceAllUsesExcept(padOp, unpackedPad, sourcePack); } @@ -636,20 +636,20 @@ static int64_t applyPermutationAndReindexReassoc( /// /// %collapsed = tensor.collapse_shape %in [[0, 1], 2] /// : tensor into tensor -/// %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] +/// %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] /// inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %empty /// : tensor -> tensor /// /// can be transformed into: /// -/// %pack = tensor.pack %in outer_dims_perm = [1, 2] +/// %pack = linalg.pack %in outer_dims_perm = [1, 2] /// inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %empty /// : tensor -> tensor /// %collapsed = tensor.collapse_shape %pack [[0, 1], 2, 3, 4] /// : tensor into tensor static LogicalResult bubbleUpPackOpThroughCollapseShape(tensor::CollapseShapeOp collapseOp, - tensor::PackOp packOp, + linalg::PackOp packOp, PatternRewriter &rewriter) { SmallVector innerTileSizes = packOp.getStaticTiles(); ArrayRef innerDimsPos = packOp.getInnerDimsPos(); @@ -682,10 +682,10 @@ bubbleUpPackOpThroughCollapseShape(tensor::CollapseShapeOp collapseOp, reassocIndices[outerPos].end()); } - auto emptyOp = tensor::PackOp::createDestinationTensor( + auto emptyOp = linalg::PackOp::createDestinationTensor( rewriter, packOp.getLoc(), collapseOp.getSrc(), packOp.getMixedTiles(), projectedInnerDimsPos, newOuterDimsPerm); - auto newPackOp = rewriter.create( + auto newPackOp = rewriter.create( packOp.getLoc(), collapseOp.getSrc(), emptyOp, projectedInnerDimsPos, packOp.getMixedTiles(), packOp.getPaddingValue(), newOuterDimsPerm); @@ -742,20 +742,20 @@ projectDimsPosIntoReassocPos(ArrayRef dimsPos, /// /// %expand = tensor.expand_shape %in [[0], [1, 2]] /// : tensor into tensor -/// %pack = tensor.pack %expand outer_dims_perm = [0, 1] +/// %pack = linalg.pack %expand outer_dims_perm = [0, 1] /// inner_dims_pos = [2] inner_tiles = [8] into %empty /// : tensor -> tensor /// /// can be transformed into: /// -/// %pack = tensor.pack %in outer_dims_perm = [1, 2] +/// %pack = linalg.pack %in outer_dims_perm = [1, 2] /// inner_dims_pos = [1] inner_tiles = [8] into %empty /// : tensor -> tensor /// %expand = tensor.expand_shape %pack [[0], [1, 2], [3]] /// : tensor into tensor static LogicalResult bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp, - tensor::PackOp packOp, + linalg::PackOp packOp, PatternRewriter &rewriter) { // Outer dimensions permutation is not supported currently. // TODO: Handle outer_dims_perm variants. @@ -808,7 +808,7 @@ bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp, // If reassociation is not possible, then reordering cannot happen. // This can be caused by pack padding affecting previously expanded // dimensions or packing extending dimensions. - RankedTensorType newPackType = tensor::PackOp::inferPackedType( + RankedTensorType newPackType = linalg::PackOp::inferPackedType( expandOp.getSrcType(), packOp.getStaticInnerTiles(), projectedInnerDimsPos, /*outerDimsPerm=*/SmallVector{}); auto reassocExpand = @@ -817,10 +817,10 @@ bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp, return rewriter.notifyMatchFailure( packOp, "could not reassociate dims after bubbling up"); - Value destTensor = tensor::PackOp::createDestinationTensor( + Value destTensor = linalg::PackOp::createDestinationTensor( rewriter, packOp.getLoc(), expandOp.getSrc(), packOp.getMixedTiles(), projectedInnerDimsPos, /*outerDimsPerm=*/SmallVector{}); - Value packedVal = rewriter.create( + Value packedVal = rewriter.create( packOp.getLoc(), expandOp.getSrc(), destTensor, projectedInnerDimsPos, packOp.getMixedTiles(), packOp.getPaddingValue(), /*outerDimsPerm=*/SmallVector{}); @@ -833,12 +833,12 @@ bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp, } class BubbleUpPackOpThroughReshapeOp final - : public OpRewritePattern { + : public OpRewritePattern { public: BubbleUpPackOpThroughReshapeOp(MLIRContext *context, ControlPropagationFn fun) - : OpRewritePattern(context), controlFn(std::move(fun)) {} + : OpRewritePattern(context), controlFn(std::move(fun)) {} - LogicalResult matchAndRewrite(tensor::PackOp packOp, + LogicalResult matchAndRewrite(linalg::PackOp packOp, PatternRewriter &rewriter) const override { Operation *srcOp = packOp.getSource().getDefiningOp(); // Currently only support when the pack op is the only user. @@ -877,7 +877,7 @@ class BubbleUpPackOpThroughReshapeOp final /// /// For example: /// -/// %unpack = tensor.unpack %in outer_dims_perm = [0, 1] +/// %unpack = linalg.unpack %in outer_dims_perm = [0, 1] /// inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %empty /// : tensor -> tensor /// %expanded = tensor.expand_shape %unpack [[0, 1], [2]] @@ -887,11 +887,11 @@ class BubbleUpPackOpThroughReshapeOp final /// /// %expanded = tensor.expand_shape %ain [[0, 1], [2], [3], [4]] /// : tensor into tensor -/// %unpack = tensor.unpack %expanded outer_dims_perm = [0, 1, 2] +/// %unpack = linalg.unpack %expanded outer_dims_perm = [0, 1, 2] /// inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %empty /// : tensor -> tensor static LogicalResult pushDownUnPackOpThroughExpandShape( - tensor::UnPackOp unPackOp, tensor::ExpandShapeOp expandOp, + linalg::UnPackOp unPackOp, tensor::ExpandShapeOp expandOp, PatternRewriter &rewriter, ControlPropagationFn controlFn) { // User controlled propagation function. if (!controlFn(&expandOp.getSrcMutable())) @@ -943,16 +943,16 @@ static LogicalResult pushDownUnPackOpThroughExpandShape( nextPos += 1; } - RankedTensorType newExpandType = tensor::PackOp::inferPackedType( + RankedTensorType newExpandType = linalg::PackOp::inferPackedType( expandTy, innerTileSizes, projectedInnerDimsPos, newOuterDimsPerm); auto newExpandOp = rewriter.create( expandOp.getLoc(), newExpandType, unPackOp.getSource(), newReassocIndices); - auto emptyOp = tensor::UnPackOp::createDestinationTensor( + auto emptyOp = linalg::UnPackOp::createDestinationTensor( rewriter, unPackOp.getLoc(), newExpandOp, unPackOp.getMixedTiles(), projectedInnerDimsPos, newOuterDimsPerm); - auto newUnPackOp = rewriter.create( + auto newUnPackOp = rewriter.create( unPackOp.getLoc(), newExpandOp.getResult(), emptyOp, projectedInnerDimsPos, unPackOp.getMixedTiles(), newOuterDimsPerm); rewriter.replaceOp(expandOp, newUnPackOp); @@ -961,14 +961,14 @@ static LogicalResult pushDownUnPackOpThroughExpandShape( } class PushDownUnPackOpThroughReshapeOp final - : public OpRewritePattern { + : public OpRewritePattern { public: PushDownUnPackOpThroughReshapeOp(MLIRContext *context, ControlPropagationFn fun) - : OpRewritePattern(context), controlFn(std::move(fun)) { + : OpRewritePattern(context), controlFn(std::move(fun)) { } - LogicalResult matchAndRewrite(tensor::UnPackOp unPackOp, + LogicalResult matchAndRewrite(linalg::UnPackOp unPackOp, PatternRewriter &rewriter) const override { Value result = unPackOp.getResult(); // Currently only support unpack op with the single user. @@ -1001,7 +1001,7 @@ class PushDownUnPackOpThroughReshapeOp final static FailureOr getUnPackedOperand(GenericOp genericOp) { OpOperand *unPackedOperand = nullptr; for (OpOperand &operand : genericOp->getOpOperands()) { - auto unPackOp = operand.get().getDefiningOp(); + auto unPackOp = operand.get().getDefiningOp(); if (!unPackOp) continue; if (unPackedOperand) @@ -1013,9 +1013,9 @@ static FailureOr getUnPackedOperand(GenericOp genericOp) { return unPackedOperand; } -/// Push down a tensor.unpack op through a generic op. +/// Push down a linalg.unpack op through a generic op. /// The new generic op works on packed domain; pack ops are created for input -/// and output operands. A tensor.unpack op is inserted right after the packed +/// and output operands. A linalg.unpack op is inserted right after the packed /// generic. E.g. /// /// #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> @@ -1023,7 +1023,7 @@ static FailureOr getUnPackedOperand(GenericOp genericOp) { /// %arg0 = tensor<12x2x56x56x32xf32> // packed arg. /// /// %0 = tensor.empty() : tensor<12x56x56x64xf32> -/// %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] +/// %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] /// inner_dims_pos = [3] inner_tiles = [32] into %0 /// %2 = linalg.generic {indexing_maps = [#map], /// iterator_types = ["parallel", "parallel", "parallel", "parallel"]} @@ -1044,7 +1044,7 @@ static FailureOr getUnPackedOperand(GenericOp genericOp) { /// ^bb0(%out : f32): /// linalg.yield %out : f32 /// } -> tensor<12x2x56x56x32xf32> -/// %2 = tensor.unpack %1 outer_dims_perm = [0, 3, 1, 2] +/// %2 = linalg.unpack %1 outer_dims_perm = [0, 3, 1, 2] /// inner_dims_pos = [3] inner_tiles = [32] into %0 /// static FailureOr> @@ -1063,8 +1063,8 @@ pushDownUnPackOpThroughGenericOp(RewriterBase &rewriter, GenericOp genericOp, OpOperand *unPackedOperand = *(maybeUnPackedOperand); // Extract packing information. - tensor::UnPackOp producerUnPackOp = - unPackedOperand->get().getDefiningOp(); + linalg::UnPackOp producerUnPackOp = + unPackedOperand->get().getDefiningOp(); assert(producerUnPackOp && "expect a valid UnPackOp"); if (!controlFn(unPackedOperand)) @@ -1079,7 +1079,7 @@ pushDownUnPackOpThroughGenericOp(RewriterBase &rewriter, GenericOp genericOp, auto [packedOutOperand, packedOutIndexingMap] = getOrCreatePackedViewOfOperand(rewriter, genericOp.getLoc(), *packInfo, genericOp, genericOp.getDpsInitOperand(0)); - auto destPack = packedOutOperand.getDefiningOp(); + auto destPack = packedOutOperand.getDefiningOp(); // If the dps init operand of the generic is a tensor.empty, do not pack it // and forward the new tensor.empty as a destination. @@ -1108,7 +1108,7 @@ pushDownUnPackOpThroughGenericOp(RewriterBase &rewriter, GenericOp genericOp, // Insert an unPackOp right after the packed generic. Value unPackOpRes = rewriter - .create(genericOp.getLoc(), newResult, + .create(genericOp.getLoc(), newResult, destPack.getSource(), innerDimsPos, mixedTiles, outerDimsPerm) .getResult(); @@ -1137,7 +1137,7 @@ struct PushDownUnPackOpThroughGenericOp : public OpRewritePattern { ControlPropagationFn controlFn; }; -/// Propagate a tensor.unpack operation through a tensor.pad. The idea is to +/// Propagate a linalg.unpack operation through a tensor.pad. The idea is to /// add as many zero padding dimensions in `high` and `low` based on the number /// of point loops. struct PushDownUnPackThroughPadOp : public OpRewritePattern { @@ -1146,8 +1146,8 @@ struct PushDownUnPackThroughPadOp : public OpRewritePattern { LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override { - tensor::UnPackOp unpackOp = - padOp.getSource().getDefiningOp(); + linalg::UnPackOp unpackOp = + padOp.getSource().getDefiningOp(); if (!unpackOp) return failure(); @@ -1185,12 +1185,12 @@ struct PushDownUnPackThroughPadOp : public OpRewritePattern { loc, /*result=*/Type(), unpackOp.getSource(), lowPad, highPad, paddingVal, padOp.getNofold()); - // Inject the tensor.unpack right after the packed padOp. + // Inject the linalg.unpack right after the packed padOp. Value outputUnPack = rewriter.create( loc, padOp.getResultType().getShape(), padOp.getResultType().getElementType()); - Value replacement = rewriter.create( + Value replacement = rewriter.create( loc, newPadOp.getResult(), outputUnPack, innerDimsPos, unpackOp.getMixedTiles(), outerDimsPerm); rewriter.replaceOp(padOp, replacement); diff --git a/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp b/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp similarity index 90% rename from mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp rename to mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp index 3566714c6529e..0984b6988b93b 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp @@ -13,7 +13,7 @@ #include "mlir/IR/PatternMatch.h" namespace mlir { -namespace tensor { +namespace linalg { namespace { /// Returns the number of shape sizes that is either dynamic or greater than 1. @@ -201,7 +201,7 @@ struct FoldPadWithPackOp : public OpRewritePattern { LogicalResult matchAndRewrite(PackOp packOp, PatternRewriter &rewriter) const override { - auto padOp = packOp.getSource().getDefiningOp(); + auto padOp = packOp.getSource().getDefiningOp(); if (!padOp || padOp.getNofold() || !padOp.hasZeroLowPad()) return failure(); @@ -224,10 +224,11 @@ struct FoldPadWithPackOp : public OpRewritePattern { /// Fold a `unpack` -> `extract_slice` into the `unpack` since it already /// has extract_slice semantics. -struct FoldUnpackWithExtractSliceOp : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +struct FoldUnpackWithExtractSliceOp + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(ExtractSliceOp sliceOp, + LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const override { auto unpackOp = sliceOp.getSource().getDefiningOp(); if (!unpackOp) @@ -247,7 +248,7 @@ struct FoldUnpackWithExtractSliceOp : public OpRewritePattern { // Create a new empty output tensor. Type elementType = unpackOp.getDestType().getElementType(); - Value output = rewriter.create( + Value output = rewriter.create( sliceOp.getLoc(), sliceOp.getMixedSizes(), elementType); rewriter.replaceOpWithNewOp( sliceOp, unpackOp.getSource(), output, unpackOp.getInnerDimsPos(), @@ -474,6 +475,50 @@ struct FoldConsumerUnPackWithProducerLinalgTransposeOp return success(); } }; + +/// tensor.empty does not define any tensor contents, so an unpadded pack +/// can be folded away. +struct FoldEmptyTensorWithPackOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(PackOp packOp, + PatternRewriter &rewriter) const override { + // Check for tensor.empty source. + auto emptyOp = packOp.getSource().getDefiningOp(); + if (!emptyOp) + return failure(); + + // Check for padding. + // Packing with padding cannot be simply removed. + if (packOp.getPaddingValue()) + return rewriter.notifyMatchFailure(packOp, "expects no padding value"); + + // Replace the pack directly with its destination. + rewriter.replaceOp(packOp, packOp.getDest()); + + return success(); + } +}; + +/// tensor.empty does not define any tensor contents, so an unpack +/// can be folded away. +struct FoldEmptyTensorWithUnPackOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(UnPackOp unPackOp, + PatternRewriter &rewriter) const override { + // Check for tensor.empty source. + auto emptyOp = unPackOp.getSource().getDefiningOp(); + if (!emptyOp) + return failure(); + + // Replace the unpack directly with its destination. + rewriter.replaceOp(unPackOp, unPackOp.getDest()); + + return success(); + } +}; + } // namespace void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns) { @@ -490,5 +535,11 @@ void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns) { patterns.getContext()); } -} // namespace tensor +void populateFoldPackUnpackIntoTensorEmptyPatterns( + RewritePatternSet &patterns) { + patterns.add( + patterns.getContext()); +} + +} // namespace linalg } // namespace mlir diff --git a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp index b7764da26a7f4..faa7bbf9d168a 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp @@ -10,14 +10,17 @@ #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/Utils.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/Interfaces/TilingInterface.h" +#include "mlir/Interfaces/ValueBoundsOpInterface.h" #include using namespace mlir; @@ -563,6 +566,648 @@ struct LinalgOpPartialReductionInterface } }; +template +static SmallVector getPackUnPackIterationDomain(OpTy op, + OpBuilder &builder) { + static_assert(llvm::is_one_of::value, + "applies to only pack or unpack operations"); + OpBuilder::InsertionGuard g(builder); + int64_t rank = (std::is_same::value) ? op.getSourceRank() + : op.getDestRank(); + OpFoldResult zero = builder.getIndexAttr(0); + OpFoldResult one = builder.getIndexAttr(1); + ReifiedRankedShapedTypeDims resultShape; + (void)reifyResultShapes(builder, op, resultShape); + SmallVector loopBounds(rank); + for (auto dim : llvm::seq(0, rank)) { + loopBounds[dim].offset = zero; + loopBounds[dim].stride = one; + loopBounds[dim].size = resultShape[0][dim]; + } + return loopBounds; +} + +static void applyPermToRange(SmallVector &offsets, + SmallVector &sizes, + ArrayRef permutation) { + if (permutation.empty()) + return; + applyPermutationToVector(offsets, permutation); + applyPermutationToVector(sizes, permutation); +} + +struct PackOpTiling + : public TilingInterface::ExternalModel { + + SmallVector getLoopIteratorTypes(Operation *op) const { + // Note that here we only consider untiled dimensions and outer tiled data + // dimensions, the inner tiled data dimensions are materialized when + // building the body of the operation. + auto packOp = cast(op); + SmallVector iteratorTypes( + packOp.getSourceRank(), utils::IteratorType::parallel); + return iteratorTypes; + } + + SmallVector getIterationDomain(Operation *op, OpBuilder &b) const { + return getPackUnPackIterationDomain(cast(op), b); + } + + FailureOr + getTiledImplementation(Operation *op, OpBuilder &b, + ArrayRef offsets, + ArrayRef sizes) const { + auto packOp = cast(op); + Location loc = packOp.getLoc(); + + // The tiling is applied on interchanged dimensions. We have to undo the + // interchange to map sizes and offsets to the original input. + int64_t inputRank = packOp.getSourceRank(); + SmallVector origOffsets(offsets); + SmallVector origSizes(sizes); + applyPermToRange(origOffsets, origSizes, + invertPermutationVector(packOp.getOuterDimsPerm())); + + DenseMap dimAndTileMapping = + packOp.getDimAndTileMapping(); + SmallVector srcDimValues = + tensor::getMixedSizes(b, loc, packOp.getSource()); + SmallVector inputIndices, inputSizes; + for (auto dim : llvm::seq(0, inputRank)) { + using AV = affine::AffineValueExpr; + affine::AffineBuilder ab(b, loc); + AffineExpr dim0, dim1, sym; + bindDims(b.getContext(), dim0, dim1); + bindSymbols(b.getContext(), sym); + if (dimAndTileMapping.count(dim)) { + // If the data dimension is tiled, the i-th index is the product of + // offset_i and tile_i, and the i-th size is the product of sizes_i and + // tile_i. + auto avOffset = AV(dim0).bind(origOffsets[dim]); + auto avSize = AV(dim0).bind(origSizes[dim]); + auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]); + inputIndices.push_back(ab.mul(avOffset, avTileSize)); + inputSizes.push_back(ab.mul(avSize, avTileSize)); + } else { + inputIndices.push_back(origOffsets[dim]); + inputSizes.push_back(origSizes[dim]); + } + + // Limit the size of the input operand for incomplete tiles. + if (packOp.getPaddingValue()) { + OpFoldResult dimSize = srcDimValues[dim]; + auto avDimSize = AV(dim0).bind(dimSize); + auto avInputIdx = AV(dim1).bind(inputIndices.back()); + inputSizes.back() = + ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)}); + } + } + + auto oneAttr = b.getI64IntegerAttr(1); + SmallVector strides(inputRank, oneAttr); + + SmallVector tiledOperands; + auto sourceSlice = b.create( + loc, packOp.getSource(), inputIndices, inputSizes, strides); + tiledOperands.push_back(sourceSlice); + + SmallVector outputOffsets, outputSizes; + if (failed(getResultTilePosition(op, b, 0, offsets, sizes, outputOffsets, + outputSizes))) + return {}; + + strides.append(packOp.getDestRank() - inputRank, oneAttr); + auto outSlice = b.create( + loc, packOp.getDest(), outputOffsets, outputSizes, strides); + tiledOperands.push_back(outSlice); + + if (auto val = packOp.getPaddingValue()) + tiledOperands.push_back(val); + for (auto tile : packOp.getInnerTiles()) + tiledOperands.push_back(tile); + + Operation *tiledPackOp = b.create( + loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs()); + + return TilingResult{ + {tiledPackOp}, + SmallVector(tiledPackOp->getResults()), + llvm::to_vector(ArrayRef{sourceSlice, outSlice})}; + } + + LogicalResult + getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber, + ArrayRef offsets, + ArrayRef sizes, + SmallVector &resultOffsets, + SmallVector &resultSizes) const { + // The iteration domain is over outer dimensions of packed layout. In this + // context, the outer dimensions of `resultOffsets` are `offsets`. The + // inner dimensions of `resultOffsets` are zeros because tiling is not + // applied to them. + auto packOp = cast(op); + int64_t inputRank = packOp.getSourceRank(); + int64_t outputRank = packOp.getDestRank(); + auto zeroAttr = b.getI64IntegerAttr(0); + resultOffsets.assign(offsets.begin(), offsets.end()); + resultOffsets.append(outputRank - inputRank, zeroAttr); + + ReifiedRankedShapedTypeDims outputShape; + (void)reifyResultShapes(b, packOp, outputShape); + resultSizes.assign(sizes.begin(), sizes.end()); + for (auto dataTileDim : llvm::seq(inputRank, outputRank)) + resultSizes.push_back(outputShape[0][dataTileDim]); + + return success(); + } + + FailureOr + generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber, + ArrayRef offsets, + ArrayRef sizes) const { + auto packOp = cast(op); + int64_t numTiles = packOp.getInnerDimsPos().size(); + + // tensor.pack op is fusible (as a producer) only if full inner tiles are + // iterated or inner dims are not tiled. Otherwise, it will generate a + // sequence of non-trivial ops (for partial tiles). + for (auto offset : offsets.take_back(numTiles)) + if (!isConstantIntValue(offset, 0)) + return failure(); + + for (auto iter : + llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles))) + if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter))) + return failure(); + + FailureOr tilingResult = getTiledImplementation( + op, b, offsets.drop_back(numTiles), sizes.drop_back(numTiles)); + if (failed(tilingResult)) + return failure(); + return tilingResult.value(); + } + + /// Method to return the position of iteration domain tile computed by the + /// tiled operation. In current `tensor.pack` context, the `resultOffsets` and + /// `resultSizes` only cover outer dimensions. + LogicalResult getIterationDomainTileFromOperandTile( + Operation *op, OpBuilder &b, unsigned operandNumber, + ArrayRef offsets, ArrayRef sizes, + SmallVectorImpl &resultOffsets, + SmallVectorImpl &resultSizes) const { + if (operandNumber != 0) + return failure(); + + auto packOp = cast(op); + // It is not trivial to infer dest tile from source tile if `packOp` has + // padding semantic. + if (packOp.getPaddingValue()) + return failure(); + + Location loc = packOp.getLoc(); + + SmallVector outerDimOffsets, outerDimSizes; + DenseMap dimAndTileMapping = + packOp.getDimAndTileMapping(); + for (auto dim : llvm::seq(packOp.getSourceRank())) { + if (dimAndTileMapping.count(dim)) { + FailureOr cstSize = + ValueBoundsConstraintSet::computeConstantBound( + presburger::BoundType::UB, sizes[dim], + /*stopCondition=*/nullptr, /*closedUB=*/true); + std::optional cstInnerSize = + getConstantIntValue(dimAndTileMapping[dim]); + // Currently fusing `packOp` as consumer only expects perfect tiling + // scenario because even if without padding semantic, the `packOp` may + // also yield incomplete tiles. E.g. tensor<30xf32> -> tensor<5x6xf32>, + // where the `tileSize` from operand of `packOp` is 5, which is not + // exactly divided by `innerTile`(=6) of `packOp`. As the result: + // 1. the first slice is extracted from (0) to (4) and inserted into + // (0,0)~(0,4) at first row. + // 2. the second slice is extracted from (5) to (9) and SHOULD BE + // respectively inserted into two rows with different length, including + // first row: (0,5) and second row (1,0)~(1,3). It is hard to coordinate + // them, thus adding below constraint to bypass them temporarily. In + // another word, we can only support tiling with consumer if the tile + // size for the producer is a multiple of the inner tile size for the + // packed dimensions at this moment. + if (failed(cstSize) || !cstInnerSize || *cstSize % *cstInnerSize != 0) { + return failure(); + } + + using AV = affine::AffineValueExpr; + affine::AffineBuilder ab(b, loc); + AffineExpr dim0, sym; + bindDims(b.getContext(), dim0); + bindSymbols(b.getContext(), sym); + auto avOffset = AV(dim0).bind(offsets[dim]); + auto avSize = AV(dim0).bind(sizes[dim]); + auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]); + outerDimOffsets.push_back(ab.floor(avOffset, avTileSize)); + outerDimSizes.push_back(ab.ceil(avSize, avTileSize)); + } else { + outerDimOffsets.push_back(offsets[dim]); + outerDimSizes.push_back(sizes[dim]); + } + } + applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm()); + resultOffsets = outerDimOffsets; + resultSizes = outerDimSizes; + return success(); + } + + /// Method to return the tiled implementation of tensor.pack as a consumer. + FailureOr getTiledImplementationFromOperandTile( + Operation *op, OpBuilder &b, unsigned operandNumber, + ArrayRef offsets, ArrayRef sizes) const { + if (operandNumber != 0) + return failure(); + + auto packOp = cast(op); + Location loc = packOp.getLoc(); + + int64_t inputRank = packOp.getSourceRank(); + auto oneAttr = b.getI64IntegerAttr(1); + SmallVector strides(inputRank, oneAttr); + + SmallVector tiledOperands; + auto sourceSlice = b.create( + loc, packOp.getSource(), offsets, sizes, strides); + tiledOperands.push_back(sourceSlice); + + SmallVector outerDimOffsets, outerDimSizes; + if (failed(getIterationDomainTileFromOperandTile( + op, b, /*operandNumber=*/0, offsets, sizes, outerDimOffsets, + outerDimSizes))) + return failure(); + + SmallVector outputOffsets, outputSizes; + if (failed(getResultTilePosition(op, b, 0, outerDimOffsets, outerDimSizes, + outputOffsets, outputSizes))) + return failure(); + + strides.append(packOp.getDestRank() - inputRank, oneAttr); + auto outSlice = b.create( + loc, packOp.getDest(), outputOffsets, outputSizes, strides); + tiledOperands.push_back(outSlice); + + assert(!packOp.getPaddingValue() && "Expect no padding semantic"); + for (auto tile : packOp.getInnerTiles()) + tiledOperands.push_back(tile); + + Operation *tiledPackOp = b.create( + loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs()); + + return TilingResult{ + {tiledPackOp}, + SmallVector(tiledPackOp->getResults()), + llvm::to_vector(ArrayRef{sourceSlice, outSlice})}; + } +}; + +struct UnpackTileDimInfo { + bool isAlignedToInnerTileSize; + OpFoldResult sourceOffset; + OpFoldResult sourceSize; + OpFoldResult resultOffset; + OpFoldResult destExpandedSize; +}; + +/// Returns the needed information for tiling unpack op on `tileDim` with given +/// `tileOffset` and `tileSize`. For more details, see the comment of the +/// `getTiledImplementation`. +static UnpackTileDimInfo getUnpackTileDimInfo(OpBuilder &b, UnPackOp unpackOp, + int64_t tileDim, + OpFoldResult tileOffset, + OpFoldResult tileSize) { + UnpackTileDimInfo info; + Attribute zeroAttr = b.getIndexAttr(0); + Attribute oneAttr = b.getIndexAttr(1); + DenseMap dimAndTileMapping = + unpackOp.getDimAndTileMapping(); + // The dimension is not one of packed data dimension. + if (!dimAndTileMapping.count(tileDim)) { + info.isAlignedToInnerTileSize = true; + info.sourceOffset = tileOffset; + info.sourceSize = tileSize; + info.resultOffset = zeroAttr; + info.destExpandedSize = tileSize; + return info; + } + + Location loc = unpackOp.getLoc(); + using AV = affine::AffineValueExpr; + affine::AffineBuilder ab(b, loc); + AffineExpr dim0, dim1, sym0; + bindDims(b.getContext(), dim0, dim1); + bindSymbols(b.getContext(), sym0); + + OpFoldResult innerTileSize = dimAndTileMapping[tileDim]; + + info.isAlignedToInnerTileSize = false; + FailureOr cstSize = ValueBoundsConstraintSet::computeConstantBound( + presburger::BoundType::UB, tileSize, + /*stopCondition=*/nullptr, /*closedUB=*/true); + std::optional cstInnerSize = getConstantIntValue(innerTileSize); + if (!failed(cstSize) && cstInnerSize) { + if (*cstSize % *cstInnerSize == 0) + info.isAlignedToInnerTileSize = true; + + // If the tiling size equals to the inner tiling size, the outer dims are + // always 1. + if (*cstInnerSize == *cstSize) { + auto lhs = AV(dim0).bind(tileOffset); + auto rhs = AV(dim1).bind(innerTileSize); + info.sourceOffset = ab.floor(lhs, rhs); + info.sourceSize = oneAttr; + info.resultOffset = zeroAttr; + info.destExpandedSize = tileSize; + return info; + } + } + + if (info.isAlignedToInnerTileSize) { + info.sourceOffset = + ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize)); + info.resultOffset = zeroAttr; + info.destExpandedSize = tileSize; + + // The ceilDiv is needed here because there could be incomplete tile even + // it is perfect tiling cases. E.g., + // %0 = unpack tensor<33x2xf32> into tensor<64xf32> + // If the tiling size is 32, there will be 3 tiles. Two of them have + // size=32; one of them have size=2. The size is represented using + // affine_min op; we need ceilDiv. + info.sourceSize = + ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize)); + return info; + } + + affine::DivModValue firstCoord = affine::getDivMod( + b, loc, getValueOrCreateConstantIndexOp(b, loc, tileOffset), + getValueOrCreateConstantIndexOp(b, loc, innerTileSize)); + OpFoldResult tileExclusiveBound = + ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize)); + affine::DivModValue lastCoord = affine::getDivMod( + b, loc, + getValueOrCreateConstantIndexOp( + b, loc, + ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))), + getValueOrCreateConstantIndexOp(b, loc, innerTileSize)); + + OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient), + AV(dim1).bind(firstCoord.quotient)); + info.sourceSize = + ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr)); + info.sourceOffset = firstCoord.quotient; + info.resultOffset = firstCoord.remainder; + // Do not create an Affine ops for expanded size because the affine op is too + // complicated which would trigger an issue in affine ops simplification. + info.destExpandedSize = b.createOrFold( + loc, getValueOrCreateConstantIndexOp(b, loc, info.sourceSize), + getValueOrCreateConstantIndexOp(b, loc, innerTileSize)); + return info; +} + +struct UnPackOpTiling + : public TilingInterface::ExternalModel { + + SmallVector getLoopIteratorTypes(Operation *op) const { + auto unpackOp = cast(op); + SmallVector iteratorTypes( + unpackOp.getDestRank(), utils::IteratorType::parallel); + return iteratorTypes; + } + + SmallVector getIterationDomain(Operation *op, OpBuilder &b) const { + return getPackUnPackIterationDomain(cast(op), b); + } + + /// There are two cases in tiling unpack ops. If the tiling size is aligned to + /// the inner tile size, the corresponding tiles of source are all complete. + /// Otherwise, there are in-complete tiles. We will need to expand the slice + /// of source for getting complete tiles. The tiled unpack op unpacks more + /// data from source, so We'll need an extract_slice op to shift and truncate + /// the output. + /// Take Nn_to_N as an example. Say that N=32, n=8, and tiling_size=15. The + /// coordinates of second tile (i.e., result[15..31]) are + /// [(1, 7), (2, 0,), (2, 1) ... (3, 6), (3, 7)]. The first row and the last + /// row are incomplete tiles. To represent the unpack op, we have to complete + /// the rows. I.e., the input coordinates would start with (1, 0); end with + /// (3, 7). In this context, the tiled unpack produces a (3 * n) elements + /// because there are 3 rows in total. Follow by a tensor.extract_slice op, we + /// can get the actual result. + FailureOr + getTiledImplementation(Operation *op, OpBuilder &b, + ArrayRef offsets, + ArrayRef sizes) const { + auto unpackOp = cast(op); + int64_t srcRank = unpackOp.getSourceRank(); + int64_t destRank = unpackOp.getDestRank(); + int64_t numInnerTiles = srcRank - destRank; + Location loc = unpackOp.getLoc(); + + // The perfect tiling case indicates that the tiling sizes are multiple of + // inner_tile_size. In this context, no extra data is needed when + // representing the tiled unpack op. + bool isPerfectTilingCase = true; + Attribute oneAttr = b.getIndexAttr(1); + SmallVector sliceSrcStrides(destRank, oneAttr); + SmallVector sliceSrcIndices, sliceSrcSizes; + SmallVector destExpandedSizes, resultOffsetsFromDest; + for (auto dim : llvm::seq(0, destRank)) { + UnpackTileDimInfo info = + getUnpackTileDimInfo(b, unpackOp, dim, offsets[dim], sizes[dim]); + if (!info.isAlignedToInnerTileSize) + isPerfectTilingCase = false; + sliceSrcIndices.push_back(info.sourceOffset); + sliceSrcSizes.push_back(info.sourceSize); + destExpandedSizes.push_back(info.destExpandedSize); + resultOffsetsFromDest.push_back(info.resultOffset); + } + + // The tiling is applied on destination dimensions. We have to apply the + // interchange on source dimensions if outer_dims_perm is set. + applyPermToRange(sliceSrcIndices, sliceSrcSizes, + unpackOp.getOuterDimsPerm()); + Attribute zeroAttr = b.getIndexAttr(0); + sliceSrcIndices.append(numInnerTiles, zeroAttr); + sliceSrcSizes.append(unpackOp.getMixedTiles()); + sliceSrcStrides.append(numInnerTiles, oneAttr); + SmallVector generatedSlices; + tensor::ExtractSliceOp sliceSource = b.create( + loc, unpackOp.getSource(), sliceSrcIndices, sliceSrcSizes, + sliceSrcStrides); + generatedSlices.push_back(sliceSource); + + SmallVector destStrides(destRank, oneAttr); + Value sliceDest; + if (isPerfectTilingCase) { + auto destSliceOp = b.create( + loc, unpackOp.getDest(), offsets, sizes, destStrides); + sliceDest = destSliceOp; + generatedSlices.push_back(destSliceOp); + } else { + sliceDest = b.create( + loc, destExpandedSizes, unpackOp.getDestType().getElementType()); + } + + SmallVector tiledOperands = {sliceSource.getResult(), sliceDest}; + for (auto tile : unpackOp.getInnerTiles()) + tiledOperands.push_back(tile); + + Operation *tiledUnpackOp = b.create( + loc, TypeRange{sliceDest.getType()}, tiledOperands, op->getAttrs()); + + if (isPerfectTilingCase) + return TilingResult{{tiledUnpackOp}, + SmallVector(tiledUnpackOp->getResults()), + generatedSlices}; + + auto extractSlice = b.create( + loc, tiledUnpackOp->getResult(0), resultOffsetsFromDest, sizes, + destStrides); + return TilingResult{ + {tiledUnpackOp}, {extractSlice.getResult()}, generatedSlices}; + } + + LogicalResult + getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber, + ArrayRef offsets, + ArrayRef sizes, + SmallVector &resultOffsets, + SmallVector &resultSizes) const { + resultOffsets = llvm::to_vector(offsets); + resultSizes = llvm::to_vector(sizes); + return success(); + } + + FailureOr + generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber, + ArrayRef offsets, + ArrayRef sizes) const { + FailureOr tilingResult = + getTiledImplementation(op, b, offsets, sizes); + if (failed(tilingResult)) + return failure(); + return tilingResult.value(); + } + + /// Method to return the position of iteration domain tile computed by the + /// tiled operation. + LogicalResult getIterationDomainTileFromOperandTile( + Operation *op, OpBuilder &b, unsigned operandNumber, + ArrayRef offsets, ArrayRef sizes, + SmallVectorImpl &resultOffsets, + SmallVectorImpl &resultSizes) const { + auto unPackOp = cast(op); + // If the operand tile is the dest, then no adjustment is needed. + if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) { + resultOffsets = llvm::to_vector(offsets); + resultSizes = llvm::to_vector(sizes); + return success(); + } + Location loc = unPackOp.getLoc(); + + int64_t numTiles = unPackOp.getInnerDimsPos().size(); + auto destOffsets = offsets.drop_back(numTiles); + auto destSizes = sizes.drop_back(numTiles); + // The tiling is applied on interchanged dimensions. We have to undo the + // interchange to map sizes and offsets to the original input. + int64_t outputRank = unPackOp.getDestRank(); + ReifiedRankedShapedTypeDims reifiedReturnShapes; + if (failed(reifyResultShapes(b, unPackOp, reifiedReturnShapes))) + return failure(); + SmallVector outputMixedSizes = reifiedReturnShapes.front(); + SmallVector origOffsets(destOffsets); + SmallVector origSizes(destSizes); + applyPermToRange(origOffsets, origSizes, + invertPermutationVector(unPackOp.getOuterDimsPerm())); + + DenseMap dimAndTileMapping = + unPackOp.getDimAndTileMapping(); + + for (auto dim : llvm::seq(0, outputRank)) { + using AV = affine::AffineValueExpr; + affine::AffineBuilder ab(b, loc); + AffineExpr dim0, dim1, sym0; + bindDims(b.getContext(), dim0, dim1); + bindSymbols(b.getContext(), sym0); + if (dimAndTileMapping.count(dim)) { + // If the data dimension is tiled, the i-th index is the product of + // offset_i and tile_i, and the i-th size is the product of sizes_i and + // tile_i. The sizes must be clamped to the sizes of the unpack result. + auto avOffset = AV(dim0).bind(origOffsets[dim]); + auto avSize = AV(dim0).bind(origSizes[dim]); + auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]); + auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]); + resultOffsets.push_back(ab.mul(avOffset, avTileSize)); + auto avResultOffset = AV(dim1).bind(resultOffsets.back()); + resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize), + ab.sub(avResultSize, avResultOffset)})); + } else { + resultOffsets.push_back(origOffsets[dim]); + resultSizes.push_back(origSizes[dim]); + } + } + return success(); + } + + /// Method to return the tiled implementation of tensor.unpack as a consumer. + FailureOr getTiledImplementationFromOperandTile( + Operation *op, OpBuilder &b, unsigned operandNumber, + ArrayRef offsets, ArrayRef sizes) const { + auto unPackOp = cast(op); + // tensor.unpack op is fusible (as a consumer) only if inner dims are not + // tiled. + int64_t numTiles = unPackOp.getInnerDimsPos().size(); + for (auto iter : + llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) { + if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter))) + return failure(); + } + + Location loc = unPackOp.getLoc(); + + // Fetch offset/size for creating the slice of the dest operand of + // unpack op. + SmallVector outputOffsets, outputSizes; + if (failed(getIterationDomainTileFromOperandTile( + op, b, /*operandNumber=*/0, offsets, sizes, outputOffsets, + outputSizes))) + return failure(); + + auto oneAttr = b.getI64IntegerAttr(1); + int64_t outputRank = unPackOp.getDestRank(); + SmallVector strides(outputRank, oneAttr); + + SmallVector tiledOperands; + // Create slice of the dest operand. + auto extractDestSlice = b.create( + loc, unPackOp.getDest(), outputOffsets, outputSizes, strides); + tiledOperands.push_back(extractDestSlice); + + SmallVector inputOffsets, inputSizes; + strides.append(unPackOp.getSourceRank() - outputRank, oneAttr); + // Create slice of the source operand. + auto extractSourceSlice = b.create( + loc, unPackOp.getSource(), offsets, sizes, strides); + tiledOperands.insert(tiledOperands.begin(), extractSourceSlice); + for (auto tile : unPackOp.getInnerTiles()) + tiledOperands.push_back(tile); + + // Create tiled unpack op. + Operation *tiledUnPackOp = + b.create(loc, TypeRange{extractDestSlice.getType()}, + tiledOperands, op->getAttrs()); + + return TilingResult{{tiledUnPackOp}, + SmallVector(tiledUnPackOp->getResults()), + llvm::to_vector(ArrayRef{ + extractSourceSlice, extractDestSlice})}; + } +}; + } // namespace template @@ -584,8 +1229,18 @@ void mlir::linalg::registerTilingInterfaceExternalModels( DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) { registerOne(ctx); + linalg::PackOp::attachInterface(*ctx); + linalg::UnPackOp::attachInterface(*ctx); registerAll< #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" >(ctx); }); } + +void mlir::linalg::registerTilingInterfaceExternalModelsForPackUnPackOps( + DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, LinalgDialect *dialect) { + linalg::PackOp::attachInterface(*ctx); + linalg::UnPackOp::attachInterface(*ctx); + }); +} diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index 50593b08ad74b..dcd50cc44f81b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -217,7 +217,7 @@ struct PackedOperandsDimList { } // namespace FailureOr linalg::lowerPack(RewriterBase &rewriter, - tensor::PackOp packOp, + linalg::PackOp packOp, bool lowerPadLikeWithInsertSlice) { // 1. Filter out NYI cases. auto packedTensorType = @@ -238,7 +238,7 @@ FailureOr linalg::lowerPack(RewriterBase &rewriter, PackingMetadata packingMetadata = computePackingMetadata( packedTensorType.getRank(), packOp.getInnerDimsPos()); SmallVector packedToStripMinedShapePerm = - tensor::getPackInverseDestPerm(packOp); + getPackInverseDestPerm(packOp); // 3. Compute the stripMinedShape: this is the packed shape before any outer // or inner permutations have been applied. @@ -353,7 +353,7 @@ FailureOr linalg::lowerPack(RewriterBase &rewriter, } FailureOr -linalg::lowerUnPack(RewriterBase &rewriter, tensor::UnPackOp unPackOp, +linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp, bool lowerUnpadLikeWithExtractSlice) { Location loc = unPackOp->getLoc(); OpBuilder::InsertionGuard g(rewriter); @@ -388,7 +388,7 @@ linalg::lowerUnPack(RewriterBase &rewriter, tensor::UnPackOp unPackOp, // before any outer or inner permutations have been applied. PackingMetadata packingMetadata; SmallVector packedToStripMinedShapePerm = - tensor::getUnPackInverseSrcPerm(unPackOp, packingMetadata); + getUnPackInverseSrcPerm(unPackOp, packingMetadata); // 2. Compute the stripMinedShape: this is the packed shape without outer and // inner permutations. @@ -493,8 +493,8 @@ FailureOr linalg::pack(RewriterBase &rewriter, llvm::interleaveComma(iteratorTypes, DBGS() << "iterators: "); DBGSNL();); - SmallVector packOps; - SmallVector unPackOps; + SmallVector packOps; + SmallVector unPackOps; // Step 1. Pack each dim of the LinalgOp metadata by packedSizes[i]. PackedOperandsDimList listOfPackedOperandsDim; for (int64_t i = 0, e = packedSizes.size(); i < e; ++i) { @@ -545,7 +545,7 @@ FailureOr linalg::pack(RewriterBase &rewriter, inputsAndInits.push_back(operand); continue; } - Value dest = tensor::PackOp::createDestinationTensor( + Value dest = linalg::PackOp::createDestinationTensor( rewriter, loc, operand, innerPackSizes, innerPos, /*outerDimsPerm=*/{}); ShapedType operandType = cast(operand.getType()); @@ -554,11 +554,11 @@ FailureOr linalg::pack(RewriterBase &rewriter, return getConstantIntValue(tile).has_value(); }); if (areConstantTiles && operandType.hasStaticShape() && - !tensor::PackOp::requirePaddingValue( + !linalg::PackOp::requirePaddingValue( operandType.getShape(), innerPos, cast(dest.getType()).getShape(), {}, innerPackSizes)) { - packOps.push_back(rewriter.create( + packOps.push_back(rewriter.create( loc, operand, dest, innerPos, innerPackSizes)); } else { // TODO: value of the padding attribute should be determined by @@ -566,7 +566,7 @@ FailureOr linalg::pack(RewriterBase &rewriter, auto zeroAttr = rewriter.getZeroAttr(getElementTypeOrSelf(dest.getType())); Value zero = rewriter.create(loc, zeroAttr); - packOps.push_back(rewriter.create( + packOps.push_back(rewriter.create( loc, operand, dest, innerPos, innerPackSizes, zero)); } inputsAndInits.push_back(packOps.back()); @@ -586,14 +586,14 @@ FailureOr linalg::pack(RewriterBase &rewriter, // Step 4. Propagate packing to all the op results. for (OpResult result : packedLinalgOp->getResults()) { int64_t resultNum = result.getResultNumber(); - tensor::PackOp maybePackedInit = - inits[resultNum].getDefiningOp(); + linalg::PackOp maybePackedInit = + inits[resultNum].getDefiningOp(); if (!maybePackedInit) { results.push_back(result); continue; } // Build the symmetrical UnPackOp to the existing PackOp. - unPackOps.push_back(rewriter.create( + unPackOps.push_back(rewriter.create( packedLinalgOp->getLoc(), result, maybePackedInit.getSource(), maybePackedInit.getInnerDimsPos(), maybePackedInit.getMixedTiles())); results.push_back(unPackOps.back()); @@ -674,15 +674,15 @@ static LinalgOp transposeOneLinalgOperandAndReplace( } FailureOr -linalg::packTranspose(RewriterBase &rewriter, tensor::PackOp packOp, - linalg::LinalgOp linalgOp, tensor::UnPackOp maybeUnPackOp, +linalg::packTranspose(RewriterBase &rewriter, linalg::PackOp packOp, + linalg::LinalgOp linalgOp, linalg::UnPackOp maybeUnPackOp, ArrayRef outerPerm, ArrayRef innerPerm) { Location loc = linalgOp.getLoc(); // Step 1. Transpose packOp. rewriter.setInsertionPoint(packOp); - tensor::PackOp transposedPackOp = + linalg::PackOp transposedPackOp = packOp.createTransposedClone(rewriter, loc, innerPerm, outerPerm); if (!packOp.getResult().hasOneUse()) @@ -733,7 +733,7 @@ linalg::packTranspose(RewriterBase &rewriter, tensor::PackOp packOp, rewriter, linalgOp, packUse, permutation, transposedPackOp.getResult()); // Step 3. Maybe transpose unPackOp. - tensor::UnPackOp transposedUnPackOp; + linalg::UnPackOp transposedUnPackOp; if (maybeUnPackOp) { OpOperand &opOperand = transposedLinalgOp->getOpOperand(packUseOperandNumber); @@ -1024,7 +1024,7 @@ LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite( /// /// This method assumes that all outer dims for this pack Op are 1. static Value getPackOpSourceOrPaddedSource(OpBuilder &builder, - tensor::PackOp packOp) { + linalg::PackOp packOp) { Value input = packOp.getSource(); if (!packOp.getPaddingValue()) { return input; @@ -1141,7 +1141,7 @@ getPackUnpackRankReducedPerm(ArrayRef shape, } LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite( - tensor::PackOp packOp, PatternRewriter &rewriter) const { + linalg::PackOp packOp, PatternRewriter &rewriter) const { // TODO: support the case that outer dimensions are not all 1s. A // tensor.expand_shape will be generated in this case. if (llvm::any_of(packOp.getAllOuterDims(), @@ -1242,7 +1242,7 @@ LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite( } LogicalResult DecomposeOuterUnitDimsUnPackOpPattern::matchAndRewrite( - tensor::UnPackOp unpackOp, PatternRewriter &rewriter) const { + linalg::UnPackOp unpackOp, PatternRewriter &rewriter) const { int64_t srcRank = unpackOp.getSourceRank(); int64_t destRank = unpackOp.getDestRank(); ArrayRef srcShape = unpackOp.getSourceType().getShape(); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index f2c23c49a78e8..ae04c2b6b2a5b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1499,11 +1499,11 @@ vectorizeAsLinalgGeneric(RewriterBase &rewriter, VectorizationState &state, return success(); } -/// Given a tensor::PackOp, return the `dest` shape before any packing +/// Given a linalg::PackOp, return the `dest` shape before any packing /// permutations. -static SmallVector getTiledPackShape(tensor::PackOp packOp, +static SmallVector getTiledPackShape(linalg::PackOp packOp, ArrayRef destShape) { - return applyPermutation(destShape, tensor::getPackInverseDestPerm(packOp)); + return applyPermutation(destShape, linalg::getPackInverseDestPerm(packOp)); } /// Given an input, the mixed destSizes, and the vector sizes for vectorization, @@ -1558,7 +1558,7 @@ static Operation *createWriteOrMaskedWrite(OpBuilder &builder, Location loc, return write; } -/// Vectorize tensor::PackOp with (1) static innerTiles (2) constant +/// Vectorize linalg::PackOp with (1) static innerTiles (2) constant /// padding value and (3) input vector sizes into: /// masked_transfer_read->shape_cast->transpose->transfer_write_in_bounds /// As in the following example: @@ -1585,7 +1585,7 @@ static Operation *createWriteOrMaskedWrite(OpBuilder &builder, Location loc, /// determined by the result tensor shape. Also, we update the inBounds /// attribute instead of masking. static LogicalResult -vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp, +vectorizeAsTensorPackOp(RewriterBase &rewriter, linalg::PackOp packOp, ArrayRef inputVectorSizes, SmallVectorImpl &newResults) { // TODO: Introduce a parent class that will handle the insertion point update. @@ -1639,7 +1639,7 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp, // Create TransposeOp. auto destPermutation = - invertPermutationVector(tensor::getPackInverseDestPerm(packOp)); + invertPermutationVector(getPackInverseDestPerm(packOp)); auto transposeOp = rewriter.create( loc, shapeCastOp.getResult(), destPermutation); @@ -1651,7 +1651,7 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp, return success(); } -/// Vectorize a `tensor::UnPackOp` to these 4 Ops: +/// Vectorize a `linalg::UnPackOp` to these 4 Ops: /// Vector::TransferReadOp - Reads a vector from the source tensor /// vector::TransposeOp - Transpose the Source tensor /// ShapeCastOp - Reshape the data based on the target. @@ -1661,7 +1661,7 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp, /// * the vector sizes are determined by the input operand and attributes, /// * update the inBounds attribute instead of masking. static LogicalResult -vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp, +vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp, ArrayRef inputVectorSizes, SmallVectorImpl &newResults) { @@ -1754,7 +1754,7 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp, PackingMetadata packMetadata; SmallVector lastDimToInsertPosPerm = - tensor::getUnPackInverseSrcPerm(unpackOp, packMetadata); + getUnPackInverseSrcPerm(unpackOp, packMetadata); ShapedType maskedOpShapedType = cast(readResult.getType()); SmallVector stripMineShape(maskedOpShapedType.getShape()); mlir::Type stripMineElemType = maskedOpShapedType.getElementType(); @@ -1887,7 +1887,7 @@ vectorizeDynamicLinalgOpPrecondition(linalg::LinalgOp op, /// Need to check if the inner-tiles are static/constant. static LogicalResult -vectorizeUnPackOpPrecondition(tensor::UnPackOp unpackOp, +vectorizeUnPackOpPrecondition(linalg::UnPackOp unpackOp, ArrayRef inputVectorSizes) { if (llvm::any_of(unpackOp.getInnerTiles(), [](OpFoldResult res) { @@ -2007,7 +2007,7 @@ static LogicalResult vectorizeLinalgOpPrecondition( } static LogicalResult -vectorizePackOpPrecondition(tensor::PackOp packOp, +vectorizePackOpPrecondition(linalg::PackOp packOp, ArrayRef inputVectorSizes) { auto padValue = packOp.getPaddingValue(); Attribute cstAttr; @@ -2203,10 +2203,10 @@ LogicalResult mlir::linalg::vectorizeOpPrecondition( .Case([&](auto padOp) { return vectorizePadOpPrecondition(padOp, inputVectorSizes); }) - .Case([&](auto packOp) { + .Case([&](auto packOp) { return vectorizePackOpPrecondition(packOp, inputVectorSizes); }) - .Case([&](auto unpackOp) { + .Case([&](auto unpackOp) { return vectorizeUnPackOpPrecondition(unpackOp, inputVectorSizes); }) .Case([&](auto sliceOp) { @@ -2231,7 +2231,7 @@ static void convertAffineApply(RewriterBase &rewriter, LinalgOp linalgOp) { } bool mlir::linalg::hasVectorizationImpl(Operation *op) { - return isa(op); } @@ -2308,18 +2308,18 @@ LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op, return vectorizeAsTensorPadOp(rewriter, padOp, inputVectorSizes, results); }) - .Case([&](auto packOp) { + .Case([&](auto packOp) { return vectorizeAsTensorPackOp(rewriter, packOp, inputVectorSizes, results); }) + .Case([&](auto unpackOp) { + return vectorizeAsTensorUnpackOp(rewriter, unpackOp, + inputVectorSizes, results); + }) .Case([&](auto sliceOp) { return vectorizeAsInsertSliceOp(rewriter, sliceOp, inputVectorSizes, results); }) - .Case([&](auto unpackOp) { - return vectorizeAsTensorUnpackOp(rewriter, unpackOp, - inputVectorSizes, results); - }) .Default([](auto) { return failure(); }); if (failed(vectorizeResult)) { diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index d148067fe6343..d3d301ca093b1 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -142,10 +142,64 @@ static void unpackRanges(OpBuilder &builder, Location loc, //===----------------------------------------------------------------------===// // General utilities //===----------------------------------------------------------------------===// +// +/// The permutation can be obtained from two permutations: +/// a) Compute the permutation vector to move the last `numPackedDims` into +/// the `innerPosDims` of a shape of rank `rank`. +/// b) Compute the permutation vector to move outer dims if the +/// `outerPerm` parameter is not empty. +/// Apply (b) permutation on (a) permutation to get the final permutation. +static SmallVector +computePackUnPackPerm(int64_t rank, ArrayRef &innerDimsPos, + ArrayRef &outerPerm, + PackingMetadata &packingMetadata) { + int64_t numPackedDims = innerDimsPos.size(); + auto lastDims = + llvm::to_vector(llvm::seq(rank - numPackedDims, rank)); + packingMetadata = computePackingMetadata(rank, innerDimsPos); + SmallVector innerPositionsPerm = + computePermutationVector(rank, lastDims, packingMetadata.insertPositions); + + SmallVector outerPos = packingMetadata.outerPositions; + if (!outerPerm.empty()) + applyPermutationToVector(outerPos, outerPerm); + SmallVector outerPositionPerm = + computePermutationVector(rank, packingMetadata.outerPositions, outerPos); + + SmallVector packInverseDestPermutation = innerPositionsPerm; + applyPermutationToVector(packInverseDestPermutation, outerPositionPerm); + return packInverseDestPermutation; +} namespace mlir { namespace linalg { +SmallVector getPackInverseDestPerm(PackOp packOp) { + + PackingMetadata pMetadata; + int64_t packedRank = packOp.getDestType().getRank(); + ArrayRef innerDimPos = packOp.getInnerDimsPos(); + ArrayRef outerPerm = packOp.getOuterDimsPerm(); + SmallVector packInvDestPerm = + computePackUnPackPerm(packedRank, innerDimPos, outerPerm, pMetadata); + return packInvDestPerm; +} + +SmallVector getUnPackInverseSrcPerm(UnPackOp unpackOp) { + PackingMetadata metadata; + return getUnPackInverseSrcPerm(unpackOp, metadata); +} + +SmallVector getUnPackInverseSrcPerm(UnPackOp unpackOp, + PackingMetadata &metadata) { + int64_t unpackRank = unpackOp.getSourceType().getRank(); + ArrayRef innerDimPos = unpackOp.getInnerDimsPos(); + ArrayRef outerPerm = unpackOp.getOuterDimsPerm(); + SmallVector unpackInvSrcPerm = + computePackUnPackPerm(unpackRank, innerDimPos, outerPerm, metadata); + return unpackInvSrcPerm; +} + bool allIndexingsAreProjectedPermutation(LinalgOp op) { return llvm::all_of(op.getIndexingMapsArray(), [](AffineMap m) { return m.isProjectedPermutation(/*allowZeroInResults=*/true); diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index d8d52a80a9645..b29d35f446694 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -10,7 +10,9 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tensor/Utils/Utils.h" #include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" @@ -4490,8 +4492,8 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) { template static bool isLikePadUnPad(PackOrUnpackOp packOp, RankedTensorType packedTensorType) { - static_assert(std::is_same::value || - std::is_same::value, + static_assert(std::is_same::value || + std::is_same::value, "Function meant for pack/unpack"); // This is a pad if packing only adds ones and we don't transpose dimensions. @@ -4692,7 +4694,7 @@ static bool inferStaticShape(UnPackOp op, SmallVectorImpl &srcShape, LogicalResult UnPackOp::canonicalize(UnPackOp unPackOp, PatternRewriter &rewriter) { /// unpack(pack(x)) -> x - if (PackOp packOp = unPackOp.getSource().getDefiningOp()) { + if (PackOp packOp = unPackOp.getSource().getDefiningOp()) { if (packOp.getSourceType() != unPackOp.getDestType()) return failure(); if (packOp.getPaddingValue() || @@ -4728,7 +4730,7 @@ LogicalResult UnPackOp::canonicalize(UnPackOp unPackOp, dest = rewriter.create(loc, newDestType, unPackOp.getDest()); } - Value newOp = rewriter.create( + Value newOp = rewriter.create( loc, source, dest, unPackOp.getInnerDimsPos(), unPackOp.getMixedTiles(), unPackOp.getOuterDimsPerm()); rewriter.replaceOpWithNewOp( @@ -4831,7 +4833,7 @@ getNewMixedTileSizes(PatternRewriter &rewriter, Type newPackedTy, return newMixedTileSizes; } -/// Folds a tensor.cast op into a consuming tensor::PackOp op if the +/// Folds a tensor.cast op into a consuming PackOp op if the /// `tensor.cast` has source that is more static than the consuming op. /// /// Example: @@ -4883,7 +4885,7 @@ struct FoldTensorCastPackOp : public OpRewritePattern { } }; -/// Folds a tensor.cast op into a consuming tensor::UnPackOp op if the +/// Folds a tensor.cast op into a consuming UnPackOp op if the /// `tensor.cast` has source that is more static than the consuming op. /// /// Example: @@ -4960,9 +4962,11 @@ struct FoldTensorCastProducerOp LogicalResult matchAndRewrite(DestinationStyleOpInterface op, PatternRewriter &rewriter) const override { - // Reject tensor::PackOp - there's dedicated pattern for that instead. + // Reject PackOp/UnpackOp (i.e. RelayoutOps) - there are dedicated patterns + // for that instead. if (!foldTensorCastPrecondition(op) || - isa(*op)) + isa(*op) || + isa(*op)) return failure(); SmallVector newResultTypes(op->getResultTypes()); diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp index 052dee402b79e..bd1a09be6b9bc 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp @@ -118,7 +118,7 @@ static void applyPermToRange(SmallVector &offsets, } struct PackOpTiling - : public TilingInterface::ExternalModel { + : public TilingInterface::ExternalModel { SmallVector getLoopIteratorTypes(Operation *op) const { // Note that here we only consider untiled dimensions and outer tiled data @@ -491,7 +491,7 @@ static UnpackTileDimInfo getUnpackTileDimInfo(OpBuilder &b, UnPackOp unpackOp, } struct UnPackOpTiling - : public TilingInterface::ExternalModel { + : public TilingInterface::ExternalModel { SmallVector getLoopIteratorTypes(Operation *op) const { auto unpackOp = cast(op); diff --git a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp index 99199252710f9..f3560d08ff769 100644 --- a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp +++ b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp @@ -100,11 +100,6 @@ void transform::ApplyFoldTensorEmptyPatternsOp::populatePatterns( tensor::populateFoldTensorEmptyPatterns(patterns, getFoldSingleUseOnly()); } -void transform::ApplyFoldIntoPackAndUnpackPatternsOp::populatePatterns( - RewritePatternSet &patterns) { - tensor::populateFoldIntoPackAndUnpackPatterns(patterns); -} - void transform::ApplyFoldTensorSubsetOpsPatternsOp::populatePatterns( RewritePatternSet &patterns) { tensor::populateFoldTensorSubsetOpPatterns(patterns); diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt index cc6275fee671a..7880d1c5a0c5d 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt @@ -6,7 +6,6 @@ add_mlir_dialect_library(MLIRTensorTransforms FoldTensorSubsetOps.cpp IndependenceTransforms.cpp MergeConsecutiveInsertExtractSlicePatterns.cpp - PackAndUnpackPatterns.cpp ReshapePatterns.cpp RewriteAsConstant.cpp SwapExtractSliceWithProducerPatterns.cpp diff --git a/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp index 60b0c3e759b6c..fa748cf01977f 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp @@ -93,49 +93,6 @@ struct FoldEmptyTensorWithExtractSliceOp bool foldSingleUseOnly = false; }; -/// tensor.empty does not define any tensor contents, so an unpadded pack -/// can be folded away. -struct FoldEmptyTensorWithPackOp : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(PackOp packOp, - PatternRewriter &rewriter) const override { - // Check for tensor.empty source. - auto emptyOp = packOp.getSource().getDefiningOp(); - if (!emptyOp) - return failure(); - - // Check for padding. - // Packing with padding cannot be simply removed. - if (packOp.getPaddingValue()) - return rewriter.notifyMatchFailure(packOp, "expects no padding value"); - - // Replace the pack directly with its destination. - rewriter.replaceOp(packOp, packOp.getDest()); - - return success(); - } -}; - -/// tensor.empty does not define any tensor contents, so an unpack -/// can be folded away. -struct FoldEmptyTensorWithUnPackOp : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(UnPackOp unPackOp, - PatternRewriter &rewriter) const override { - // Check for tensor.empty source. - auto emptyOp = unPackOp.getSource().getDefiningOp(); - if (!emptyOp) - return failure(); - - // Replace the unpack directly with its destination. - rewriter.replaceOp(unPackOp, unPackOp.getDest()); - - return success(); - } -}; - // Fold concat operation where all the operands are empty. struct FoldConcatsOfEmpty : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -176,7 +133,6 @@ void mlir::tensor::populateFoldTensorEmptyPatterns(RewritePatternSet &patterns, FoldEmptyTensorWithReshapeOp, FoldEmptyTensorWithReshapeOp>( patterns.getContext(), /*benefit=*/1, foldSingleUseOnly); - patterns.add(patterns.getContext(), - /*benefit=*/1); + patterns.add(patterns.getContext(), + /*benefit=*/1); } diff --git a/mlir/test/Dialect/Linalg/block-pack-matmul-layout.mlir b/mlir/test/Dialect/Linalg/block-pack-matmul-layout.mlir index 01ca4374da046..4ba4b09f52163 100644 --- a/mlir/test/Dialect/Linalg/block-pack-matmul-layout.mlir +++ b/mlir/test/Dialect/Linalg/block-pack-matmul-layout.mlir @@ -38,64 +38,64 @@ func.func @block_matmul_transpose_b( // MMT4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> // MMT4D-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)> // MMT4D-LABEL: func @block_matmul -// MMT4D-COUNT-3: tensor.pack +// MMT4D-COUNT-3: linalg.pack // MMT4D: linalg.generic // MMT4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // MMT4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] -// MMT4D-COUNT-1: tensor.unpack +// MMT4D-COUNT-1: linalg.unpack // MMT4D-LABEL: func @block_matmul_transpose_a -// MMT4D-COUNT-3: tensor.pack +// MMT4D-COUNT-3: linalg.pack // MMT4D: linalg.generic // MMT4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // MMT4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] -// MMT4D-COUNT-1: tensor.unpack +// MMT4D-COUNT-1: linalg.unpack // MMT4D-LABEL: func @block_matmul_transpose_b -// MMT4D-COUNT-3: tensor.pack +// MMT4D-COUNT-3: linalg.pack // MMT4D: linalg.generic // MMT4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // MMT4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] -// MMT4D-COUNT-1: tensor.unpack +// MMT4D-COUNT-1: linalg.unpack // MM4D-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)> // MM4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d5, d4)> // MM4D-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)> // MM4D-LABEL: func @block_matmul -// MM4D-COUNT-3: tensor.pack +// MM4D-COUNT-3: linalg.pack // MM4D: linalg.generic // MM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // MM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] -// MM4D-COUNT-1: tensor.unpack +// MM4D-COUNT-1: linalg.unpack // MM4D-LABEL: func @block_matmul_transpose_a -// MM4D-COUNT-3: tensor.pack +// MM4D-COUNT-3: linalg.pack // MM4D: linalg.generic // MM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // MM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] -// MM4D-COUNT-1: tensor.unpack +// MM4D-COUNT-1: linalg.unpack // MM4D-LABEL: func @block_matmul_transpose_b -// MM4D-COUNT-3: tensor.pack +// MM4D-COUNT-3: linalg.pack // MM4D: linalg.generic // MM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // MM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] -// MM4D-COUNT-1: tensor.unpack +// MM4D-COUNT-1: linalg.unpack // MTM4D-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d0, d5, d3)> // MTM4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d5, d4)> // MTM4D-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)> // MTM4D-LABEL: func @block_matmul -// MTM4D-COUNT-3: tensor.pack +// MTM4D-COUNT-3: linalg.pack // MTM4D: linalg.generic // MTM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // MTM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] -// MTM4D-COUNT-1: tensor.unpack +// MTM4D-COUNT-1: linalg.unpack // MTM4D-LABEL: func @block_matmul_transpose_a -// MTM4D-COUNT-3: tensor.pack +// MTM4D-COUNT-3: linalg.pack // MTM4D: linalg.generic // MTM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // MTM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] -// MTM4D-COUNT-1: tensor.unpack +// MTM4D-COUNT-1: linalg.unpack // MTM4D-LABEL: func @block_matmul_transpose_b -// MTM4D-COUNT-3: tensor.pack +// MTM4D-COUNT-3: linalg.pack // MTM4D: linalg.generic // MTM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // MTM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] -// MTM4D-COUNT-1: tensor.unpack +// MTM4D-COUNT-1: linalg.unpack diff --git a/mlir/test/Dialect/Linalg/block-pack-matmul-padding.mlir b/mlir/test/Dialect/Linalg/block-pack-matmul-padding.mlir index 9e396ba08d246..e667879ceea0e 100644 --- a/mlir/test/Dialect/Linalg/block-pack-matmul-padding.mlir +++ b/mlir/test/Dialect/Linalg/block-pack-matmul-padding.mlir @@ -21,17 +21,17 @@ func.func @block_matmul_padding( // CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<123x125xf32>, %[[B:[0-9a-z]+]]: tensor<125x124xf32>, %[[C:[0-9a-z]+]]: tensor<123x124xf32> // CHECK-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<4x2x32x64xf32> -// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]] +// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]] // CHECK-SAME: padding_value(%[[ZERO]] : f32) // CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64] // CHECK-SAME: into %[[PACK_DST_0]] : tensor<123x125xf32> -> tensor<4x2x32x64xf32> // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<8x2x16x64xf32> -// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]] +// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]] // CHECK-SAME: padding_value(%[[ZERO]] : f32) // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64] // CHECK-SAME: into %[[PACK_DST_1]] : tensor<125x124xf32> -> tensor<8x2x16x64xf32> // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<4x8x32x16xf32> -// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]] +// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]] // CHECK-SAME: padding_value(%[[ZERO]] : f32) // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[PACK_DST_2]] : tensor<123x124xf32> -> tensor<4x8x32x16xf32> @@ -39,17 +39,17 @@ func.func @block_matmul_padding( // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<4x8x32x16xf32>) -// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[C]] : tensor<4x8x32x16xf32> -> tensor<123x124xf32> // CHECK: return %[[RES_UNPACKED]] : tensor<123x124xf32> // NOPAD-LABEL: func @block_matmul_padding( // NOPAD-SAME: %[[A:[0-9a-z]+]]: tensor<123x125xf32>, %[[B:[0-9a-z]+]]: tensor<125x124xf32>, %[[C:[0-9a-z]+]]: tensor<123x124xf32> -// NOPAD-NOT: tensor.pack +// NOPAD-NOT: linalg.pack // NOPAD: linalg.matmul ins(%[[A]], %[[B]] : tensor<123x125xf32>, tensor<125x124xf32>) // NOPAD-SAME: outs(%[[C]] : tensor<123x124xf32>) -> tensor<123x124xf32> -// NOPAD-NOT: tensor.unpack +// NOPAD-NOT: linalg.unpack // PAD-MULT-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)> // PAD-MULT-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)> @@ -58,17 +58,17 @@ func.func @block_matmul_padding( // PAD-MULT-SAME: %[[A:[0-9a-z]+]]: tensor<123x125xf32>, %[[B:[0-9a-z]+]]: tensor<125x124xf32>, %[[C:[0-9a-z]+]]: tensor<123x124xf32> // PAD-MULT-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00 : f32 // PAD-MULT: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<1x1x256x384xf32> -// PAD-MULT: %[[A_PACKED:.+]] = tensor.pack %[[A]] +// PAD-MULT: %[[A_PACKED:.+]] = linalg.pack %[[A]] // PAD-MULT-SAME: padding_value(%[[ZERO]] : f32) // PAD-MULT-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [256, 384] // PAD-MULT-SAME: into %[[PACK_DST_0]] : tensor<123x125xf32> -> tensor<1x1x256x384xf32> // PAD-MULT: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<1x1x512x384xf32> -// PAD-MULT: %[[B_PACKED:.+]] = tensor.pack %[[B]] +// PAD-MULT: %[[B_PACKED:.+]] = linalg.pack %[[B]] // PAD-MULT-SAME: padding_value(%[[ZERO]] : f32) // PAD-MULT-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [512, 384] // PAD-MULT-SAME: into %[[PACK_DST_1]] : tensor<125x124xf32> -> tensor<1x1x512x384xf32> // PAD-MULT: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<1x1x256x512xf32> -// PAD-MULT: %[[C_PACKED:.+]] = tensor.pack %[[C]] +// PAD-MULT: %[[C_PACKED:.+]] = linalg.pack %[[C]] // PAD-MULT-SAME: padding_value(%[[ZERO]] : f32) // PAD-MULT-SAME: inner_dims_pos = [0, 1] inner_tiles = [256, 512] // PAD-MULT-SAME: into %[[PACK_DST_2]] : tensor<123x124xf32> -> tensor<1x1x256x512xf32> @@ -76,7 +76,7 @@ func.func @block_matmul_padding( // PAD-MULT-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // PAD-MULT-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] // PAD-MULT-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<1x1x256x384xf32>, tensor<1x1x512x384xf32>) outs(%[[C_PACKED]] : tensor<1x1x256x512xf32>) -// PAD-MULT: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// PAD-MULT: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // PAD-MULT-SAME: inner_dims_pos = [0, 1] inner_tiles = [256, 512] // PAD-MULT-SAME: into %[[C]] : tensor<1x1x256x512xf32> -> tensor<123x124xf32> // PAD-MULT: return %[[RES_UNPACKED]] : tensor<123x124xf32> diff --git a/mlir/test/Dialect/Linalg/block-pack-matmul.mlir b/mlir/test/Dialect/Linalg/block-pack-matmul.mlir index 8a82608177692..aa860dbd581a9 100644 --- a/mlir/test/Dialect/Linalg/block-pack-matmul.mlir +++ b/mlir/test/Dialect/Linalg/block-pack-matmul.mlir @@ -14,22 +14,22 @@ func.func @block_matmul( // CHECK-LABEL: func @block_matmul( // CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<128x128xf32>, %[[B:[0-9a-z]+]]: tensor<128x128xf32>, %[[C:[0-9a-z]+]]: tensor<128x128xf32> // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<4x2x32x64xf32> -// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]] +// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]] // CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64] // CHECK-SAME: into %[[PACK_DST_0]] : tensor<128x128xf32> -> tensor<4x2x32x64xf32> // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<8x2x16x64xf32> -// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]] +// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]] // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64] // CHECK-SAME: into %[[PACK_DST_1]] : tensor<128x128xf32> -> tensor<8x2x16x64xf32> // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<4x8x32x16xf32> -// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]] +// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[PACK_DST_2]] : tensor<128x128xf32> -> tensor<4x8x32x16xf32> // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<4x8x32x16xf32>) -// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[C]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32> // CHECK: return %[[RES_UNPACKED]] : tensor<128x128xf32> @@ -60,7 +60,7 @@ func.func @block_matmul_dynamic( // CHECK-DAG: %[[A_OUTER_TILE_M:.+]] = affine.apply #[[$MAP_M]]()[%[[A_M]]] // CHECK-DAG: %[[A_OUTER_TILE_K:.+]] = affine.apply #[[$MAP_K]]()[%[[A_K]]] // CHECK: %[[PACK_DST_0:.+]] = tensor.empty(%[[A_OUTER_TILE_M]], %[[A_OUTER_TILE_K]]) : tensor -// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]] +// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]] // CHECK-SAME: padding_value(%[[ZERO]] : f32) // CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64] // CHECK-SAME: into %[[PACK_DST_0]] : tensor -> tensor @@ -69,7 +69,7 @@ func.func @block_matmul_dynamic( // CHECK-DAG: %[[B_OUTER_TILE_K:.+]] = affine.apply #[[$MAP_K]]()[%[[B_K]]] // CHECK-DAG: %[[B_OUTER_TILE_N:.+]] = affine.apply #[[$MAP_N]]()[%[[B_N]]] // CHECK: %[[PACK_DST_1:.+]] = tensor.empty(%[[B_OUTER_TILE_N]], %[[B_OUTER_TILE_K]]) : tensor -// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]] +// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]] // CHECK-SAME: padding_value(%[[ZERO]] : f32) // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64] // CHECK-SAME: into %[[PACK_DST_1]] : tensor -> tensor @@ -78,7 +78,7 @@ func.func @block_matmul_dynamic( // CHECK-DAG: %[[C_OUTER_TILE_M:.+]] = affine.apply #[[$MAP_M]]()[%[[C_M]]] // CHECK-DAG: %[[C_OUTER_TILE_N:.+]] = affine.apply #[[$MAP_N]]()[%[[C_N]]] // CHECK: %[[PACK_DST_2:.+]] = tensor.empty(%[[C_OUTER_TILE_M]], %[[C_OUTER_TILE_N]]) : tensor -// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]] +// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]] // CHECK-SAME: padding_value(%[[ZERO]] : f32) // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[PACK_DST_2]] : tensor -> tensor @@ -86,7 +86,7 @@ func.func @block_matmul_dynamic( // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor, tensor) outs(%[[C_PACKED]] : tensor) -// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[C]] : tensor -> tensor // CHECK: return %[[RES_UNPACKED]] : tensor @@ -107,7 +107,7 @@ func.func @block_matmul_with_constant( // CHECK-DAG: %[[RES_DST:.+]] = arith.constant dense<0.000000e+00> : tensor<128x128xf32> // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic // CHECK-SAME: ins({{.*}} : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[CST_ACC_PACKED]] : tensor<4x8x32x16xf32>) -// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[RES_DST]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32> // CHECK: return %[[RES_UNPACKED]] : tensor<128x128xf32> @@ -130,7 +130,7 @@ func.func @block_matmul_with_producer( // CHECK: %[[ACC_PACKED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[FILL_DST_PACKED]] : tensor<4x8x32x16xf32>) -> tensor<4x8x32x16xf32> // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic // CHECK-SAME: ins({{.*}} : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[ACC_PACKED]] : tensor<4x8x32x16xf32>) -// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[C]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32> // CHECK: return %[[RES_UNPACKED]] : tensor<128x128xf32> @@ -152,7 +152,7 @@ func.func @block_matmul_with_consumer( // CHECK-DAG: %[[RES_DST:.+]] = tensor.empty() : tensor<128x128xf32> // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic // CHECK-SAME: outs({{.*}} : tensor<4x8x32x16xf32>) -// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[C]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32> // CHECK: %[[ADD_RES:.+]] = linalg.add @@ -175,22 +175,22 @@ func.func @block_batch_matmul( // CHECK-LABEL: func @block_batch_matmul( // CHECK-SAME: %[[A:.+]]: tensor<512x64x128xf32>, %[[B:.+]]: tensor<512x128x64xf32>, %[[C:.+]]: tensor<512x64x64xf32> // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<512x2x2x32x64xf32> -// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]] +// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]] // CHECK-SAME: outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [32, 64] // CHECK-SAME: into %[[PACK_DST_0]] : tensor<512x64x128xf32> -> tensor<512x2x2x32x64xf32> // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<512x4x2x16x64xf32> -// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]] +// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]] // CHECK-SAME: outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 64] // CHECK-SAME: into %[[PACK_DST_1]] : tensor<512x128x64xf32> -> tensor<512x4x2x16x64xf32> // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<512x2x4x32x16xf32> -// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]] +// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]] // CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16] // CHECK-SAME: into %[[PACK_DST_2]] : tensor<512x64x64xf32> -> tensor<512x2x4x32x16xf32> // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<512x2x2x32x64xf32>, tensor<512x4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<512x2x4x32x16xf32>) -// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16] // CHECK-SAME: into %[[C]] : tensor<512x2x4x32x16xf32> -> tensor<512x64x64xf32> // CHECK: return %[[RES_UNPACKED]] : tensor<512x64x64xf32> @@ -211,22 +211,22 @@ func.func @block_matmul_transpose_a( // CHECK-LABEL: func @block_matmul_transpose_a( // CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<128x64xf32>, %[[B:[0-9a-z]+]]: tensor<128x64xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32> // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32> -// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]] +// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]] // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [32, 64] // CHECK-SAME: into %[[PACK_DST_0]] : tensor<128x64xf32> -> tensor<2x2x32x64xf32> // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32> -// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]] +// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]] // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64] // CHECK-SAME: into %[[PACK_DST_1]] : tensor<128x64xf32> -> tensor<4x2x16x64xf32> // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32> -// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]] +// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32> // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>) -// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32> // CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32> @@ -247,22 +247,22 @@ func.func @block_batch_matmul_transpose_a( // CHECK-LABEL: func @block_batch_matmul_transpose_a( // CHECK-SAME: %[[A:.+]]: tensor<512x128x64xf32>, %[[B:.+]]: tensor<512x128x64xf32>, %[[C:.+]]: tensor<512x64x64xf32> // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<512x2x2x32x64xf32> -// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]] +// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]] // CHECK-SAME: outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [32, 64] // CHECK-SAME: into %[[PACK_DST_0]] : tensor<512x128x64xf32> -> tensor<512x2x2x32x64xf32> // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<512x4x2x16x64xf32> -// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]] +// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]] // CHECK-SAME: outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 64] // CHECK-SAME: into %[[PACK_DST_1]] : tensor<512x128x64xf32> -> tensor<512x4x2x16x64xf32> // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<512x2x4x32x16xf32> -// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]] +// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]] // CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16] // CHECK-SAME: into %[[PACK_DST_2]] : tensor<512x64x64xf32> -> tensor<512x2x4x32x16xf32> // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<512x2x2x32x64xf32>, tensor<512x4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<512x2x4x32x16xf32>) -// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16] // CHECK-SAME: into %[[C]] : tensor<512x2x4x32x16xf32> -> tensor<512x64x64xf32> // CHECK: return %[[RES_UNPACKED]] : tensor<512x64x64xf32> @@ -283,22 +283,22 @@ func.func @block_matmul_transpose_b( // CHECK-LABEL: func @block_matmul_transpose_b( // CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<64x128xf32>, %[[B:[0-9a-z]+]]: tensor<64x128xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32> // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32> -// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]] +// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]] // CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64] // CHECK-SAME: into %[[PACK_DST_0]] : tensor<64x128xf32> -> tensor<2x2x32x64xf32> // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32> -// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]] +// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]] // CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 64] // CHECK-SAME: into %[[PACK_DST_1]] : tensor<64x128xf32> -> tensor<4x2x16x64xf32> // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32> -// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]] +// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32> // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>) -// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32> // CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32> @@ -319,22 +319,22 @@ func.func @block_batch_matmul_transpose_b( // CHECK-LABEL: func @block_batch_matmul_transpose_b( // CHECK-SAME: %[[A:.+]]: tensor<512x64x128xf32>, %[[B:.+]]: tensor<512x64x128xf32>, %[[C:.+]]: tensor<512x64x64xf32> // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<512x2x2x32x64xf32> -// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]] +// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]] // CHECK-SAME: outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [32, 64] // CHECK-SAME: into %[[PACK_DST_0]] : tensor<512x64x128xf32> -> tensor<512x2x2x32x64xf32> // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<512x4x2x16x64xf32> -// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]] +// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]] // CHECK-SAME: outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 64] // CHECK-SAME: into %[[PACK_DST_1]] : tensor<512x64x128xf32> -> tensor<512x4x2x16x64xf32> // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<512x2x4x32x16xf32> -// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]] +// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]] // CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16] // CHECK-SAME: into %[[PACK_DST_2]] : tensor<512x64x64xf32> -> tensor<512x2x4x32x16xf32> // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<512x2x2x32x64xf32>, tensor<512x4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<512x2x4x32x16xf32>) -// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16] // CHECK-SAME: into %[[C]] : tensor<512x2x4x32x16xf32> -> tensor<512x64x64xf32> // CHECK: return %[[RES_UNPACKED]] : tensor<512x64x64xf32> @@ -365,22 +365,22 @@ func.func @block_generic_matmul( // CHECK-LABEL: func @block_generic_matmul( // CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<128x128xf32>, %[[B:[0-9a-z]+]]: tensor<128x128xf32>, %[[C:[0-9a-z]+]]: tensor<128x128xf32> // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<4x2x32x64xf32> -// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]] +// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]] // CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64] // CHECK-SAME: into %[[PACK_DST_0]] : tensor<128x128xf32> -> tensor<4x2x32x64xf32> // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<8x2x16x64xf32> -// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]] +// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]] // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64] // CHECK-SAME: into %[[PACK_DST_1]] : tensor<128x128xf32> -> tensor<8x2x16x64xf32> // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<4x8x32x16xf32> -// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]] +// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[PACK_DST_2]] : tensor<128x128xf32> -> tensor<4x8x32x16xf32> // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<4x8x32x16xf32>) -// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[C]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32> // CHECK: return %[[RES_UNPACKED]] : tensor<128x128xf32> @@ -411,22 +411,22 @@ func.func @block_generic_matmul_transpose_a( // CHECK-LABEL: func @block_generic_matmul_transpose_a( // CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<128x64xf32>, %[[B:[0-9a-z]+]]: tensor<128x64xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32> // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32> -// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]] +// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]] // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [32, 64] // CHECK-SAME: into %[[PACK_DST_0]] : tensor<128x64xf32> -> tensor<2x2x32x64xf32> // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32> -// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]] +// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]] // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64] // CHECK-SAME: into %[[PACK_DST_1]] : tensor<128x64xf32> -> tensor<4x2x16x64xf32> // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32> -// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]] +// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32> // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>) -// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32> // CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32> @@ -457,22 +457,22 @@ func.func @block_generic_matmul_transpose_b( // CHECK-LABEL: func @block_generic_matmul_transpose_b( // CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<64x128xf32>, %[[B:[0-9a-z]+]]: tensor<64x128xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32> // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32> -// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]] +// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]] // CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64] // CHECK-SAME: into %[[PACK_DST_0]] : tensor<64x128xf32> -> tensor<2x2x32x64xf32> // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32> -// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]] +// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]] // CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 64] // CHECK-SAME: into %[[PACK_DST_1]] : tensor<64x128xf32> -> tensor<4x2x16x64xf32> // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32> -// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]] +// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32> // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>) -// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]] +// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32> // CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32> @@ -498,10 +498,10 @@ func.func @non_contraction_generic( // CHECK-LABEL: func @non_contraction_generic( // CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<64x128xf32> // CHECK-DAG: %[[C0:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK-NOT: tensor.pack +// CHECK-NOT: linalg.pack // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: outs(%[[A]] : tensor<64x128xf32>) -// CHECK-NOT: tensor.unpack +// CHECK-NOT: linalg.unpack // CHECK: return %[[GENERIC]] : tensor<64x128xf32> diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index cd439cd23ecd0..db4f6181f517c 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -357,7 +357,7 @@ func.func @fill_pack() -> tensor<24x32x16x16xf32> { %cst = arith.constant 0.000000e+00 : f32 %0 = tensor.empty() : tensor<24x32x16x16xf32> %1 = linalg.fill ins(%cst : f32) outs(%dest : tensor<384x512xf32>) -> tensor<384x512xf32> - %pack = tensor.pack %1 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %0 : tensor<384x512xf32> -> tensor<24x32x16x16xf32> + %pack = linalg.pack %1 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %0 : tensor<384x512xf32> -> tensor<24x32x16x16xf32> return %pack : tensor<24x32x16x16xf32> } // CHECK-LABEL: func.func @fill_pack @@ -374,7 +374,7 @@ func.func @fill_pack_general() -> tensor<1x1x8x4x4x8xi32>{ %extracted_slice_15 = tensor.extract_slice %9[0, 0, 0, 0] [1, 1, 16, 64] [1, 1, 1, 1] : tensor<1x1x16x64xi32> to tensor<1x1x16x64xi32> %16 = linalg.fill ins(%c0_i32 : i32) outs(%extracted_slice_15 : tensor<1x1x16x64xi32>) -> tensor<1x1x16x64xi32> %0 = bufferization.to_tensor %alloc restrict writable : memref<1x1x8x4x4x8xi32> to tensor<1x1x8x4x4x8xi32> - %pack_18 = tensor.pack %16 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %0 : tensor<1x1x16x64xi32> -> tensor<1x1x8x4x4x8xi32> + %pack_18 = linalg.pack %16 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %0 : tensor<1x1x16x64xi32> -> tensor<1x1x8x4x4x8xi32> return %pack_18 : tensor<1x1x8x4x4x8xi32> } @@ -397,7 +397,7 @@ func.func @dynamic_fill_pack(%arg0: tensor) -> tensor { %1 = affine.apply #map()[%dim] %2 = affine.apply #map()[%dim_0] %3 = tensor.empty(%1, %2) : tensor - %pack = tensor.pack %0 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor -> tensor + %pack = linalg.pack %0 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor -> tensor return %pack : tensor } // CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)> @@ -1249,3 +1249,499 @@ func.func @recursive_effect(%arg : tensor<1xf32>) { // CHECK-LABEL: @recursive_effect // CHECK: linalg.map + +//===----------------------------------------------------------------------===// +// linalg.pack +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: func @fold_pack_constant_splat +// CHECK-NOT: linalg.pack +// CHECK: arith.constant dense<1.000000e-01> : tensor<8x16x8x32xf32> +func.func @fold_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> { + %cst = arith.constant dense<1.000000e-01> : tensor<64x128xf32> + %0 = linalg.pack %cst outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] + inner_tiles = [8, 32] into %dest : tensor<64x128xf32> -> tensor<8x16x8x32xf32> + return %0 : tensor<8x16x8x32xf32> +} + +// ----- + +// CHECK-LABEL: func @fold_padding_value_pack_constant_splat +// CHECK-NOT: linalg.pack +// CHECK: arith.constant dense<1.000000e-01> : tensor<8x16x8x32xf32> +func.func @fold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> { + %pad = arith.constant 1.000000e-01 : f32 + %cst = arith.constant dense<1.000000e-01> : tensor<63x127xf32> + %0 = linalg.pack %cst + padding_value(%pad : f32) + outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] + inner_tiles = [8, 32] into %dest : tensor<63x127xf32> -> tensor<8x16x8x32xf32> + return %0 : tensor<8x16x8x32xf32> +} + + +// ----- + +// CHECK-LABEL: func @nofold_padding_value_pack_constant_splat +// CHECK: arith.constant dense<1.000000e-01> : tensor<63x127xf32> +// CHECK: linalg.pack +func.func @nofold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> { + %pad = arith.constant 0.0 : f32 + %cst = arith.constant dense<1.000000e-01> : tensor<63x127xf32> + %0 = linalg.pack %cst + padding_value(%pad : f32) + outer_dims_perm = [1, 0] + inner_dims_pos = [0, 1] + inner_tiles = [8, 32] + into %dest : tensor<63x127xf32> -> tensor<8x16x8x32xf32> + return %0 : tensor<8x16x8x32xf32> +} + +// ----- + +func.func @fold_padding_value_pack(%arg0: tensor<1200x500000xf32>) -> tensor<31250x1200x16x1xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %0 = tensor.empty() : tensor<31250x1200x16x1xf32> + %pack = linalg.pack %arg0 + padding_value(%cst : f32) + outer_dims_perm = [1, 0] + inner_dims_pos = [1, 0] + inner_tiles = [16, 1] + into %0 : tensor<1200x500000xf32> -> tensor<31250x1200x16x1xf32> + return %pack : tensor<31250x1200x16x1xf32> +} +// CHECK-LABEL: func @fold_padding_value_pack +// CHECK-NOT: padding_value + +// ----- + +func.func @infer_src_shape_pack(%src: tensor, %dest: tensor<10x20x30x40x16xf32>) -> tensor<10x20x30x40x16xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %pack = linalg.pack %src + padding_value(%cst : f32) + outer_dims_perm = [2, 1, 3, 0] + inner_dims_pos = [2] + inner_tiles = [16] + into %dest : tensor -> tensor<10x20x30x40x16xf32> + return %pack : tensor<10x20x30x40x16xf32> +} +// CHECK-LABEL: func.func @infer_src_shape_pack +// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] +// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]] +// CHECK: %[[CAST_SRC:.+]] = tensor.cast %[[SRC]] : tensor to tensor<40x20x?x30xf32> +// CHECK: %[[PACK:.+]] = linalg.pack %[[CAST_SRC]] {{.+}} into %[[DEST]] +// CHECK: return %[[PACK]] + +// ----- + +func.func @infer_dest_shape_pack(%src: tensor<30x20x?x10xf32>, %dest: tensor) -> tensor { + %cst = arith.constant 0.000000e+00 : f32 + %pack = linalg.pack %src + padding_value(%cst : f32) + outer_dims_perm = [2, 1, 3, 0] + inner_dims_pos = [2] + inner_tiles = [16] + into %dest : tensor<30x20x?x10xf32> -> tensor + return %pack : tensor +} +// CHECK-LABEL: func.func @infer_dest_shape_pack +// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] +// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]] +// CHECK: %[[CAST_DEST:.+]] = tensor.cast %[[DEST]] : tensor to tensor +// CHECK: %[[PACK:.+]] = linalg.pack %[[SRC]] {{.+}} into %[[CAST_DEST]] +// CHECK: %[[CAST_PACK:.+]] = tensor.cast %[[PACK]] : tensor to tensor +// CHECK: return %[[CAST_PACK]] + +// ----- + +func.func @no_infer_pack_shape(%arg0: tensor, %arg1: index) -> tensor<32x7x?x16x1xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %0 = tensor.empty(%arg1) : tensor<32x7x?x16x1xf32> + %pack = linalg.pack %arg0 padding_value(%cst : f32) outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 0] inner_tiles = [16, 1] into %0 : tensor -> tensor<32x7x?x16x1xf32> + return %pack : tensor<32x7x?x16x1xf32> +} +// CHECK-LABEL: func.func @no_infer_pack_shape +// CHECK-NOT: tensor.cast + +// ----- + +func.func @fold_padding_value_pack_negative1(%arg0: tensor<1200x499999xf32>) -> tensor<31250x1200x16x1xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %0 = tensor.empty() : tensor<31250x1200x16x1xf32> + %pack = linalg.pack %arg0 + padding_value(%cst : f32) + outer_dims_perm = [1, 0] + inner_dims_pos = [1, 0] + inner_tiles = [16, 1] + into %0 : tensor<1200x499999xf32> -> tensor<31250x1200x16x1xf32> + return %pack : tensor<31250x1200x16x1xf32> +} +// CHECK-LABEL: func @fold_padding_value_pack_negative1 +// CHECK: linalg.pack +// CHECK-SAME: padding_value + +// ----- + +func.func @fold_padding_value_pack_negative2(%arg0: tensor<1200x?xf32>, %arg1: tensor) -> tensor { + %cst = arith.constant 0.000000e+00 : f32 + %pack = linalg.pack %arg0 + padding_value(%cst : f32) + outer_dims_perm = [1, 0] + inner_dims_pos = [1, 0] + inner_tiles = [16, 1] + into %arg1 : tensor<1200x?xf32> -> tensor + return %pack : tensor +} +// CHECK-LABEL: func @fold_padding_value_pack_negative2 +// CHECK: linalg.pack +// CHECK-SAME: padding_value + +// ----- + +func.func @fold_padding_value_pack_negative3(%arg0: tensor<1200x500000xf32>, %arg1: tensor, %tile : index) -> tensor { + %cst = arith.constant 0.000000e+00 : f32 + %pack = linalg.pack %arg0 + padding_value(%cst : f32) + outer_dims_perm = [1, 0] + inner_dims_pos = [1, 0] + inner_tiles = [%tile, 1] + into %arg1 : tensor<1200x500000xf32> -> tensor + return %pack : tensor +} +// CHECK-LABEL: func @fold_padding_value_pack_negative3 +// CHECK: linalg.pack +// CHECK-SAME: padding_value + +// ----- + +//===----------------------------------------------------------------------===// +// linalg.unpack +//===----------------------------------------------------------------------===// + + +// CHECK-LABEL: func @fold_unpack_constant_splat +// CHECK-NOT: linalg.unpack +// CHECK: arith.constant dense<1.000000e-01> : tensor<128x256xf32> +func.func @fold_unpack_constant_splat(%dest : tensor<128x256xf32>) -> tensor<128x256xf32> { + %cst = arith.constant dense<1.000000e-01> : tensor<16x8x8x32xf32> + %0 = linalg.unpack %cst inner_dims_pos = [0, 1] + inner_tiles = [8, 32] into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32> + return %0 : tensor<128x256xf32> +} + +// ----- + +func.func @infer_dest_shape_unpack(%src: tensor<10x20x30x40x16xf32>, %dest: tensor) -> tensor { + %unpack = linalg.unpack %src + outer_dims_perm = [2, 1, 3, 0] + inner_dims_pos = [2] + inner_tiles = [16] + into %dest : tensor<10x20x30x40x16xf32> -> tensor + return %unpack : tensor +} +// CHECK-LABEL: func.func @infer_dest_shape_unpack +// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] +// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]] +// CHECK: %[[CAST_DEST:.+]] = tensor.cast %[[DEST]] : tensor to tensor<40x20x?x30xf32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[SRC]] {{.+}} into %[[CAST_DEST]] +// CHECK: %[[CAST_UNPACK:.+]] = tensor.cast %[[UNPACK]] : tensor<40x20x?x30xf32> to tensor +// CHECK: return %[[CAST_UNPACK]] + +// ----- + +func.func @infer_src_shape_unpack(%src: tensor, %dest: tensor<30x20x?x10xf32>) -> tensor<30x20x?x10xf32> { + %unpack = linalg.unpack %src + outer_dims_perm = [2, 1, 3, 0] + inner_dims_pos = [2] + inner_tiles = [16] + into %dest : tensor -> tensor<30x20x?x10xf32> + return %unpack : tensor<30x20x?x10xf32> +} +// CHECK-LABEL: func.func @infer_src_shape_unpack +// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] +// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]] +// CHECK: %[[CAST_SRC:.+]] = tensor.cast %[[SRC]] : tensor to tensor +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[CAST_SRC]] +// CHECK: return %[[UNPACK]] + +// ----- + +func.func @no_infer_unpack_shape(%arg1: tensor<32x7x?x16x1xf32>, %arg2: index) -> tensor { + %cst = arith.constant 0.000000e+00 : f32 + %0 = tensor.empty(%arg2) : tensor + %unpack = linalg.unpack %arg1 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 0] inner_tiles = [16, 1] into %0 : tensor<32x7x?x16x1xf32> -> tensor + return %unpack : tensor +} +// CHECK-LABEL: func.func @no_infer_unpack_shape +// CHECK-NOT: tensor.cast + +// ----- + +//===----------------------------------------------------------------------===// +// linalg.pack + linalg.unpack +//===----------------------------------------------------------------------===// + +// Chain: NC -> NCnc -> NCnc -> NC +// CHECK: func.func @unpack_pack( +// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>) +// CHECK: return %[[T]] : tensor<128x128xf32> +func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> { + %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32> + %packed = linalg.pack %t inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32> + %tensor_empty1 = tensor.empty() : tensor<128x128xf32> + %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor<128x128xf32> + return %unpacked : tensor<128x128xf32> +} + +// ----- + +// Chain: NC -> NCcn -> NCnc -> NC +// CHECK: func.func @unpack_pack( +// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>) +// CHECK-NOT: return %[[T]] : tensor<128x128xf32> +func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> { + %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32> + %packed = linalg.pack %t inner_dims_pos = [1, 0] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32> + %tensor_empty1 = tensor.empty() : tensor<128x128xf32> + %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor +<128x128xf32> + return %unpacked : tensor<128x128xf32> +} + +// ----- + +// Chain: NC -> CNcn -> NCnc -> NC +// CHECK: func.func @unpack_pack( +// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>) +// CHECK-NOT: return %[[T]] : tensor<128x128xf32> +func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> { + %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32> + %packed = linalg.pack %t outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32> + %tensor_empty1 = tensor.empty() : tensor<128x128xf32> + %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor +<128x128xf32> + return %unpacked : tensor<128x128xf32> +} + +// ----- + +// Chain: NC -> NCnc -> NCnc -> NC +// CHECK: func.func @unpack_pack( +// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>, +// CHECK: return %[[T]] : tensor<128x128xf32> +func.func @unpack_pack(%t: tensor<128x128xf32>, %tile1: index, %tile2: index) -> tensor<128x128xf32> { + %tensor_empty = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32> + %packed = linalg.pack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x?x?xf32> + %tensor_empty1 = tensor.empty() : tensor<128x128xf32> + %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<16x16x?x?xf32> -> tensor +<128x128xf32> + return %unpacked : tensor<128x128xf32> +} + +// ----- + +// CHECK: func.func @unpack_pack_with_padding_no_canonicalization( +// CHECK: linalg.pack +// CHECK: linalg.unpack +func.func @unpack_pack_with_padding_no_canonicalization(%t: tensor<256x512xbf16>) -> tensor<224x512xbf16> { + %tensor_empty = tensor.empty() : tensor<4x16x64x32xbf16> + %tensor_empty1 = tensor.empty() : tensor<224x512xbf16> + %packed = linalg.pack %t outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty : tensor<256x512xbf16> -> tensor<4x16x64x32xbf16> + %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty1 : tensor<4x16x64x32xbf16> -> tensor<224x512xbf16> + return %unpacked : tensor<224x512xbf16> +} + +// ----- + +// Chain NCnc -> NC -> NC -> NCnc +// CHECK: func.func @pack_unpack( +// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>, +// CHECK: return %[[T]] : tensor<16x16x?x?xf32> +func.func @pack_unpack(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> { + %tensor_empty = tensor.empty() : tensor<128x128xf32> + %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32> + %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32> + %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32> + return %packed : tensor<16x16x?x?xf32> +} + +// ----- + +// Chain NCnc -> NC -> NC -> NCnc +// CHECK: func.func @pack_unpack( +// CHECK-SAME: %[[T:.+]]: tensor<16x16x8x8xf32> +// CHECK: return %[[T]] : tensor<16x16x8x8xf32> +func.func @pack_unpack(%t: tensor<16x16x8x8xf32>) -> tensor<16x16x8x8xf32> { + %tensor_empty = tensor.empty() : tensor<128x128xf32> + %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty : tensor<16x16x8x8xf32> -> tensor<128x128xf32> + %tensor_empty1 = tensor.empty() : tensor<16x16x8x8xf32> + %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x8x8xf32> + return %packed : tensor<16x16x8x8xf32> +} + +// ----- + +// CHECK: func.func @pack_unpack_same_tiles( +// CHECK-SAME: %[[T:.+]]: tensor, +// CHECK: return %[[T]] : tensor +func.func @pack_unpack_same_tiles(%t: tensor, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index, + %tile1: index, %tile2: index) -> tensor { + %tensor_empty = tensor.empty(%dim1, %dim2) : tensor + %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor -> tensor + %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor + %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor -> tensor + return %packed : tensor +} + +// ----- + +// CHECK: func.func @pack_unpack_different_tiles( +// CHECK-SAME: %[[T:.+]]: tensor, +// CHECK-NOT: return %[[T]] : tensor +func.func @pack_unpack_different_tiles(%t: tensor, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index, + %tile1: index, %tile2: index) -> tensor { + %tensor_empty = tensor.empty(%dim1, %dim2) : tensor + %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor -> tensor + %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor + %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile2, %tile1] into %tensor_empty1 : tensor -> tensor + return %packed : tensor +} + +// ----- + +// CHECK: func.func @pack_unpack_dynamic_with_padding( +// CHECK-SAME: %[[T:.+]]: tensor, +// CHECK-NOT: return %[[T]] : tensor +func.func @pack_unpack_dynamic_with_padding(%t: tensor, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index, + %tile1: index, %tile2: index, %pad: f32) -> tensor { + %tensor_empty = tensor.empty(%dim1, %dim2) : tensor + %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor -> tensor + %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor + %packed = linalg.pack %unpacked padding_value(%pad: f32) inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor -> tensor + return %packed : tensor +} + +// ----- + +// CHECK: func.func @pack_outer_dims_unpack_no_outer_dims( +// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>, +// CHECK: return %[[T]] : tensor<16x16x?x?xf32> +func.func @pack_outer_dims_unpack_no_outer_dims(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> { + %tensor_empty = tensor.empty() : tensor<128x128xf32> + %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32> + %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32> + %packed = linalg.pack %unpacked outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32> + return %packed : tensor<16x16x?x?xf32> +} + +// ----- + +// CHECK: func.func @pack_no_outer_dims_unpack_outer_dims( +// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>, +// CHECK: return %[[T]] : tensor<16x16x?x?xf32> +func.func @pack_no_outer_dims_unpack_outer_dims(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> { + %tensor_empty = tensor.empty() : tensor<128x128xf32> + %unpacked = linalg.unpack %t outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32> + %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32> + %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32> + return %packed : tensor<16x16x?x?xf32> +} + +// ----- + +//===----------------------------------------------------------------------===// +// tensor.cast + linalg.pack +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: func.func @fold_cast_pack_dynamic_tile_size +// CHECK-SAME: %[[DEST:.*]]: tensor<1x1x8x1xi32>, +// CHECK-SAME: %[[SRC:.*]]: tensor<7x?xi32>, +// CHECK-SAME: %[[PAD:.*]]: i32) -> tensor<1x1x8x1xi32> { +// CHECK: %[[PACK:.*]] = linalg.pack %[[SRC]] padding_value(%[[PAD]] : i32) +// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[DEST]] +// CHECK-SAME: test_attr +// CHECK-SAME: : tensor<7x?xi32> -> tensor<1x1x8x1xi32> +// CHECK: return %[[PACK]] : tensor<1x1x8x1xi32> +func.func @fold_cast_pack_dynamic_tile_size( + %dest: tensor<1x1x8x1xi32>, + %src: tensor<7x?xi32>, + %pad: i32) -> tensor<1x1x8x1xi32> { + + %cast = tensor.cast %dest : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32> + %c8 = arith.constant 8 : index + %pack = linalg.pack %src padding_value(%pad : i32) + inner_dims_pos = [0, 1] + inner_tiles = [%c8, 1] + into %cast {test_attr} : tensor<7x?xi32> -> tensor<1x1x?x1xi32> + %res = tensor.cast %pack : tensor<1x1x?x1xi32> to tensor<1x1x8x1xi32> + return %res : tensor<1x1x8x1xi32> +} + +// ----- + +func.func @infer_and_fold_pack_unpack_same_tiles(%t: tensor<10x20x4x4xf32>) -> tensor<10x20x4x4xf32> { + %dim1 = arith.constant 40 : index + %dim2 = arith.constant 80 : index + %tensor_empty = tensor.empty(%dim1, %dim2) : tensor + %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %tensor_empty : tensor<10x20x4x4xf32> -> tensor + %cast = tensor.cast %unpacked : tensor to tensor<40x80xf32> + %tensor_empty1 = tensor.empty() : tensor<10x20x4x4xf32> + %packed = linalg.pack %cast inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %tensor_empty1 : tensor<40x80xf32> -> tensor<10x20x4x4xf32> + return %packed : tensor<10x20x4x4xf32> +} +// CHECK-LABEL: func.func @infer_and_fold_pack_unpack_same_tiles +// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] +// CHECK: return %[[SRC]] + +// ----- + +// CHECK-LABEL: func.func @pack_dont_drop_attributes( +// CHECK: linalg.pack {{.*}} {test_attr} +func.func @pack_dont_drop_attributes(%arg0: tensor, %arg1: tensor<128x?x100x16x1xf16>) -> tensor<128x?x100x16x1xf16> { + %c32_i64 = arith.constant 32 : i64 + %cst = arith.constant 0.000000e+00 : f16 + %pack = linalg.pack %arg0 padding_value(%cst : f16) outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %arg1 {test_attr} : tensor -> tensor<128x?x100x16x1xf16> + return %pack : tensor<128x?x100x16x1xf16> +} +// ----- + +//===----------------------------------------------------------------------===// +// linalg.fill + linalg.unpack +//===----------------------------------------------------------------------===// +// Fold DstStyleOp -> tensor.unpack operations. +func.func @fold_dst_style_ops_into_unpack(%arg0 : tensor, %init : tensor) -> tensor { + %cst = arith.constant 0.0 : f32 + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor) -> tensor + %unpack = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [16, 64] into %fill : tensor -> tensor + return %unpack : tensor +} +// CHECK-LABEL: func @fold_dst_style_ops_into_unpack +// CHECK-SAME: %[[ARG0:.+]]: tensor +// CHECK-SAME: %[[INIT:.+]]: tensor +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] +// CHECK-SAME: into %[[INIT]] +// CHECK: return %[[UNPACK]] + +// ----- + +//===----------------------------------------------------------------------===// +// tensor.cast + linalg.unpack +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: func.func @fold_cast_unpack_dynamic_tile_size( +// CHECK-SAME: %[[SRC:.*]]: tensor<1x1x8x1xi32>, +// CHECK-SAME: %[[DEST:.*]]: tensor<7x?xi32>) -> tensor<7x?xi32> { +// CHECK: %[[RES:.*]] = linalg.unpack %[[SRC]] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[DEST]] {test_attr} : tensor<1x1x8x1xi32> -> tensor<7x?xi32> +// CHECK: return %[[RES]] : tensor<7x?xi32> +func.func @fold_cast_unpack_dynamic_tile_size( + %src: tensor<1x1x8x1xi32>, + %res: tensor<7x?xi32>) -> tensor<7x?xi32> { + + %cast = tensor.cast %src : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32> + %c8 = arith.constant 8 : index + %unpack = linalg.unpack %cast + inner_dims_pos = [0, 1] + inner_tiles = [%c8, 1] + into %res {test_attr} : tensor<1x1x?x1xi32> -> tensor<7x?xi32> + return %unpack : tensor<7x?xi32> +} diff --git a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir index cb8064411bbae..a48807b46b766 100644 --- a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir +++ b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir @@ -15,7 +15,7 @@ func.func @dynamic_elem_pack(%arg0: tensor, %dest: tensor) %4 = arith.addf %arg3, %arg3 : f32 linalg.yield %4 : f32 } -> tensor - %4 = tensor.pack %3 + %4 = linalg.pack %3 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %dest : tensor -> tensor @@ -34,7 +34,7 @@ func.func @dynamic_elem_pack(%arg0: tensor, %dest: tensor) // CHECK-DAG: %[[OUTER_D0:.+]] = affine.apply #[[$MAP0]]()[%[[D0]]] // CHECK-DAG: %[[OUTER_D1:.+]] = affine.apply #[[$MAP1]]()[%[[D1]]] // CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty(%[[OUTER_D0]], %[[OUTER_D1]]) : tensor -// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 2] // CHECK-SAME: into %[[ARG0_EMPTY]] // CHECK: %[[ELEM:.+]] = linalg.generic @@ -56,7 +56,7 @@ func.func @elem_pack_transpose_inner_dims(%arg0: tensor<128x256xi32>, %dest: ten %4 = arith.addi %arg3, %arg3 : i32 linalg.yield %4 : i32 } -> tensor<128x256xi32> - %pack = tensor.pack %elem + %pack = linalg.pack %elem inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %dest : tensor<128x256xi32> -> tensor<4x16x16x32xi32> @@ -67,7 +67,7 @@ func.func @elem_pack_transpose_inner_dims(%arg0: tensor<128x256xi32>, %dest: ten // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] // CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x16x32xi32> -// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32] // CHECK-SAME: into %[[ARG0_EMPTY]] // CHECK: %[[ELEM:.+]] = linalg.generic @@ -89,7 +89,7 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %dest: ten %4 = arith.addi %arg3, %arg3 : i32 linalg.yield %4 : i32 } -> tensor<128x256xi32> - %pack = tensor.pack %elem + %pack = linalg.pack %elem outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] @@ -101,7 +101,7 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %dest: ten // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] // CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32> -// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[ARG0_EMPTY]] : tensor<128x256xi32> -> tensor<16x4x32x16xi32> // CHECK: %[[ELEM:.+]] = linalg.generic @@ -123,7 +123,7 @@ func.func @elem_pack_transpose_inner_and_outer_dims(%arg0: tensor<128x256xi32>, %4 = arith.addi %arg3, %arg3 : i32 linalg.yield %4 : i32 } -> tensor<128x256xi32> - %pack = tensor.pack %elem + %pack = linalg.pack %elem outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] @@ -135,7 +135,7 @@ func.func @elem_pack_transpose_inner_and_outer_dims(%arg0: tensor<128x256xi32>, // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] // CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x16x32xi32> -// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] // CHECK-SAME: into %[[ARG0_EMPTY]] // CHECK: %[[ELEM:.+]] = linalg.generic @@ -163,7 +163,7 @@ func.func @dynamic_broadcast_pack(%arg0: tensor, %arg1: tensor, %d %4 = arith.addf %arg3, %arg4 : f32 linalg.yield %4 : f32 } -> tensor - %4 = tensor.pack %3 + %4 = linalg.pack %3 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %dest : tensor -> tensor @@ -182,13 +182,13 @@ func.func @dynamic_broadcast_pack(%arg0: tensor, %arg1: tensor, %d // CHECK-DAG: %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK-DAG: %[[OUTER_D0:.+]] = affine.apply #[[$MAP0]]()[%[[D0]]] // CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty(%[[OUTER_D0]]) : tensor -// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: inner_dims_pos = [0] inner_tiles = [8] // CHECK-SAME: into %[[ARG0_EMPTY]] // CHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG1]], %[[C0]] // CHECK-DAG: %[[OUTER_D1:.+]] = affine.apply #[[$MAP1]]()[%[[D1]]] // CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty(%[[OUTER_D1]]) : tensor -// CHECK: %[[PACK_ARG1:.+]] = tensor.pack %[[ARG1]] +// CHECK: %[[PACK_ARG1:.+]] = linalg.pack %[[ARG1]] // CHECK-SAME: inner_dims_pos = [0] inner_tiles = [2] // CHECK-SAME: into %[[ARG1_EMPTY]] // CHECK: %[[ELEM:.+]] = linalg.generic @@ -212,7 +212,7 @@ func.func @elem_pack_transpose_inner_and_outer_dims2(%arg0: tensor<64xf32>, %des ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } -> tensor<1x56x57x64xf32> - %2 = tensor.pack %1 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %dest : tensor<1x56x57x64xf32> -> tensor<1x2x56x57x32xf32> + %2 = linalg.pack %1 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %dest : tensor<1x56x57x64xf32> -> tensor<1x2x56x57x32xf32> return %2 : tensor<1x2x56x57x32xf32> } // CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d1, d4)> @@ -221,7 +221,7 @@ func.func @elem_pack_transpose_inner_and_outer_dims2(%arg0: tensor<64xf32>, %des // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] // CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<2x32xf32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32] // CHECK-SAME: into %[[ARG0_EMPTY]] // CHECK: %[[RES:.+]] = linalg.generic @@ -247,7 +247,7 @@ func.func @transpose_pack(%arg0: tensor<100x128x200x256xi32>, %arg1: tensor<100x %1 = arith.addi %0, %b2 : i32 linalg.yield %1 : i32 } -> tensor<100x200x128x256xi32> - %4 = tensor.pack %transpose + %4 = linalg.pack %transpose inner_dims_pos = [3, 2] inner_tiles = [16, 32] into %dest : tensor<100x200x128x256xi32> -> tensor<100x200x4x16x16x32xi32> @@ -263,11 +263,11 @@ func.func @transpose_pack(%arg0: tensor<100x128x200x256xi32>, %arg1: tensor<100x // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]] // CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] // CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<100x4x200x16x16x32xi32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: inner_dims_pos = [3, 1] inner_tiles = [16, 32] // CHECK-SAME: into %[[ARG0_EMPTY]] // CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32> -// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]] +// CHECK: %[[PACKED_ARG2:.+]] = linalg.pack %[[ARG2]] // CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32] // CHECK-SAME: into %[[ARG2_EMPTY]] // CHECK: %[[RES:.+]] = linalg.generic @@ -293,7 +293,7 @@ func.func @affine_constant_expr_pack(%arg0: tensor<100x128x200x256xi32>, %arg1: %1 = arith.addi %0, %b2 : i32 linalg.yield %1 : i32 } -> tensor<100x200x128x256xi32> - %4 = tensor.pack %transpose + %4 = linalg.pack %transpose inner_dims_pos = [3, 2] inner_tiles = [16, 32] into %dest : tensor<100x200x128x256xi32> -> tensor<100x200x4x16x16x32xi32> @@ -309,11 +309,11 @@ func.func @affine_constant_expr_pack(%arg0: tensor<100x128x200x256xi32>, %arg1: // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]] // CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] // CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<100x4x200x16x16x32xi32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: inner_dims_pos = [3, 1] inner_tiles = [16, 32] // CHECK-SAME: into %[[ARG0_EMPTY]] // CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<1x4x1x1x32xi32> -// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]] +// CHECK: %[[PACKED_ARG2:.+]] = linalg.pack %[[ARG2]] // CHECK-SAME: inner_dims_pos = [1] inner_tiles = [32] // CHECK-SAME: into %[[ARG2_EMPTY]] // CHECK: %[[RES:.+]] = linalg.generic @@ -339,7 +339,7 @@ func.func @transpose_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a %1 = arith.addi %0, %b2 : i32 linalg.yield %1 : i32 } -> tensor<100x200x128x256xi32> - %4 = tensor.pack %transpose + %4 = linalg.pack %transpose outer_dims_perm = [1, 2, 3, 0] inner_dims_pos = [3, 2] inner_tiles = [16, 32] @@ -356,11 +356,11 @@ func.func @transpose_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]] // CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] // CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<200x4x16x100x16x32xi32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [2, 1, 3, 0] inner_dims_pos = [3, 1] inner_tiles = [16, 32] // CHECK-SAME: into %[[ARG0_EMPTY]] // CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32> -// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]] +// CHECK: %[[PACKED_ARG2:.+]] = linalg.pack %[[ARG2]] // CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32] // CHECK-SAME: into %[[ARG2_EMPTY]] // CHECK: %[[RES:.+]] = linalg.generic @@ -380,7 +380,7 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: ten linalg.yield %4 : i32 } -> tensor<128x256xi32> %empty = tensor.empty() : tensor<16x4x32x16xi32> - %pack = tensor.pack %elem + %pack = linalg.pack %elem outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] @@ -393,11 +393,11 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: ten // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] // CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32> -// CHECK: %[[PACKED_ARG1:.+]] = tensor.pack %[[ARG1]] +// CHECK: %[[PACKED_ARG1:.+]] = linalg.pack %[[ARG1]] // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[ARG1_EMPTY]] // CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] // CHECK-SAME: into %[[ARG0_EMPTY]] // CHECK: %[[RES:.+]] = linalg.generic @@ -411,7 +411,7 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: ten func.func @unpack_on_output(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56x64xf32> { %0 = tensor.empty() : tensor<12x56x56x64xf32> - %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32> + %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32> %2 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%1 : tensor<12x56x56x64xf32>) { ^bb0(%out: f32): %3 = arith.addf %out, %out : f32 @@ -424,17 +424,17 @@ func.func @unpack_on_output(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56 // CHECK-LABEL: func.func @unpack_on_output // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK: %[[ARG0_EMPTY_UNPACK:.+]] = tensor.empty() : tensor<12x56x56x64xf32> -// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACKED_ARG0:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG0_EMPTY_UNPACK]] // CHECK: %[[ARG0_EMPTY_PACK:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]] +// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[UNPACKED_ARG0]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG0_EMPTY_PACK]] // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]]] // CHECK-SAME: outs(%[[PACKED_ARG0]] -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[RES]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[UNPACKED_ARG0]] @@ -444,7 +444,7 @@ func.func @unpack_on_output(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56 func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56x56x64xf32>) -> tensor<12x56x56x64xf32> { %0 = tensor.empty() : tensor<12x56x56x64xf32> - %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32> + %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32> %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf32>) { ^bb0(%in: f32, %out: f32): %3 = arith.addf %in, %out : f32 @@ -458,22 +458,22 @@ func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56 // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] // CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32> -// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACKED_ARG0:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] // CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> -// CHECK: %[[ARG1_PACK:.+]] = tensor.pack %[[ARG1]] +// CHECK: %[[ARG1_PACK:.+]] = linalg.pack %[[ARG1]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG1_PACK_EMPTY]] // CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> -// CHECK: %[[ARG0_PACK:.+]] = tensor.pack %[[UNPACKED_ARG0]] +// CHECK: %[[ARG0_PACK:.+]] = linalg.pack %[[UNPACKED_ARG0]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG0_PACK_EMPTY]] // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]] // CHECK-SAME: ins(%[[ARG0_PACK]] // CHECK-SAME: outs(%[[ARG1_PACK]] -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[RES]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG1]] @@ -483,7 +483,7 @@ func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56 func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56x56x64xf16>) -> tensor<12x56x56x64xf16> { %0 = tensor.empty() : tensor<12x56x56x64xf32> - %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32> + %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32> %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf16>) { ^bb0(%in: f32, %out: f16): %3 = arith.truncf %in : f32 to f16 @@ -497,22 +497,22 @@ func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: t // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] // CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32> -// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACKED_ARG0:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] // CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf16> -// CHECK: %[[ARG1_PACK:.+]] = tensor.pack %[[ARG1]] +// CHECK: %[[ARG1_PACK:.+]] = linalg.pack %[[ARG1]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG1_PACK_EMPTY]] // CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> -// CHECK: %[[ARG0_PACK:.+]] = tensor.pack %[[UNPACKED_ARG0]] +// CHECK: %[[ARG0_PACK:.+]] = linalg.pack %[[UNPACKED_ARG0]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG0_PACK_EMPTY]] // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]] // CHECK-SAME: ins(%[[ARG0_PACK]] // CHECK-SAME: outs(%[[ARG1_PACK]] -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[RES]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG1]] @@ -523,7 +523,7 @@ func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: t func.func @forward_tensor_empty(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56x64xf32> { %init = tensor.empty() : tensor<12x56x56x64xf32> %0 = tensor.empty() : tensor<12x56x56x64xf32> - %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32> + %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32> %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf32>) { ^bb0(%in: f32, %out: f32): %3 = arith.addf %in, %in : f32 @@ -537,19 +537,19 @@ func.func @forward_tensor_empty(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x5 // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK: %[[FINAL_RES:.+]] = tensor.empty() : tensor<12x56x56x64xf32> // CHECK: %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32> -// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACKED_ARG0:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] // CHECK: %[[DEST:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> // CHECK: %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]] +// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[UNPACKED_ARG0]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG0_PACK_EMPTY]] // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]] // CHECK-SAME: ins(%[[PACKED_ARG0]] // CHECK-SAME: outs(%[[DEST]] -// CHECK: %[[UNPACKED:.+]] = tensor.unpack %[[RES]] +// CHECK: %[[UNPACKED:.+]] = linalg.unpack %[[RES]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[FINAL_RES]] @@ -558,7 +558,7 @@ func.func @forward_tensor_empty(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x5 func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x58x58x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %0 = tensor.empty() : tensor<1x56x56x64xf32> - %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32> + %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32> %padded = tensor.pad %1 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 @@ -571,7 +571,7 @@ func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tens // CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0] // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x58x58x64xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[PADDED]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[PADDED]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[EMPTY]] : tensor<1x2x58x58x32xf32> -> tensor<1x58x58x64xf32> @@ -580,7 +580,7 @@ func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tens func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<2x58x58x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %0 = tensor.empty() : tensor<1x56x56x64xf32> - %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32> + %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32> %padded = tensor.pad %1 low[1, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 @@ -593,7 +593,7 @@ func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tens // CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[1, 0, 1, 1, 0] high[0, 0, 1, 1, 0] // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<2x58x58x64xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[PADDED]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[PADDED]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[EMPTY]] : tensor<2x2x58x58x32xf32> -> tensor<2x58x58x64xf32> @@ -602,7 +602,7 @@ func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tens func.func @pad_along_unpacked_dim(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x58x58x66xf32> { %cst = arith.constant 0.000000e+00 : f32 %0 = tensor.empty() : tensor<1x56x56x64xf32> - %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32> + %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32> %padded = tensor.pad %1 low[0, 1, 1, 1] high[0, 1, 1, 1] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 @@ -614,7 +614,7 @@ func.func @pad_along_unpacked_dim(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x5 // CHECK: %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>) // CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x56x56x64xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[EMPTY]] : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32> // CHECK: %[[PADDED:.+]] = tensor.pad %[[UNPACK]] low[0, 1, 1, 1] high[0, 1, 1, 1] @@ -628,7 +628,7 @@ func.func @pad_valid_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> tensor<1 tensor.yield %cst : f32 } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> %0 = tensor.empty() : tensor<1x2x58x58x32xf32> - %1 = tensor.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32> + %1 = linalg.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32> return %1 : tensor<1x2x58x58x32xf32> } @@ -636,7 +636,7 @@ func.func @pad_valid_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> tensor<1 // CHECK-SAME: %[[ARG0:.+]]: tensor<1x64x56x56xf32>) // CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x2x56x56x32xf32> -// CHECK: %[[PACKED:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [32] +// CHECK: %[[PACKED:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [32] // CHECK-SAME: into %[[EMPTY]] : tensor<1x64x56x56xf32> -> tensor<1x2x56x56x32xf32> // CHECK: %[[PADDED:.+]] = tensor.pad %[[PACKED]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0] // CHECK: return %[[PADDED]] @@ -650,7 +650,7 @@ func.func @pad_valid_outer_dims_pack_propagation(%arg0: tensor<1x64x56x56xf32>) tensor.yield %cst : f32 } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> %0 = tensor.empty() : tensor<1x58x58x2x32xf32> - %1 = tensor.pack %padded outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x58x58x2x32xf32> + %1 = linalg.pack %padded outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x58x58x2x32xf32> return %1 : tensor<1x58x58x2x32xf32> } @@ -658,7 +658,7 @@ func.func @pad_valid_outer_dims_pack_propagation(%arg0: tensor<1x64x56x56xf32>) // CHECK-SAME: %[[ARG0:.+]]: tensor<1x64x56x56xf32>) // CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x56x56x2x32xf32> -// CHECK: %[[PACKED:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACKED:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [1] inner_tiles = [32] // CHECK-SAME: into %[[EMPTY]] : tensor<1x64x56x56xf32> -> tensor<1x56x56x2x32xf32> // CHECK: %[[PADDED:.+]] = tensor.pad %[[PACKED]] low[0, 1, 1, 0, 0] high[0, 1, 1, 0, 0] @@ -673,7 +673,7 @@ func.func @pad_along_packed_dim(%arg0: tensor<1x60x56x56xf32>) -> tensor<1x2x58x tensor.yield %cst : f32 } : tensor<1x60x56x56xf32> to tensor<1x64x58x58xf32> %0 = tensor.empty() : tensor<1x2x58x58x32xf32> - %1 = tensor.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32> + %1 = linalg.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32> return %1 : tensor<1x2x58x58x32xf32> } @@ -682,7 +682,7 @@ func.func @pad_along_packed_dim(%arg0: tensor<1x60x56x56xf32>) -> tensor<1x2x58x // CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[0, 2, 1, 1] high[0, 2, 1, 1] // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x2x58x58x32xf32> -// CHECK: tensor.pack %[[PADDED]] inner_dims_pos = [1] inner_tiles = [32] +// CHECK: linalg.pack %[[PADDED]] inner_dims_pos = [1] inner_tiles = [32] // CHECK-SAME: into %[[EMPTY]] : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32> // ----- @@ -694,7 +694,7 @@ func.func @multi_use_pad_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> (ten tensor.yield %cst : f32 } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32> %0 = tensor.empty() : tensor<1x2x58x58x32xf32> - %1 = tensor.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32> + %1 = linalg.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32> return %padded, %1 : tensor<1x64x58x58xf32>, tensor<1x2x58x58x32xf32> } @@ -702,10 +702,10 @@ func.func @multi_use_pad_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> (ten // CHECK-SAME: %[[ARG0:.+]]: tensor<1x64x56x56xf32>) // CHECK: %[[CST:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x2x56x56x32xf32> -// CHECK: %[[PACKED:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [32] +// CHECK: %[[PACKED:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [32] // CHECK-SAME: into %[[EMPTY]] : tensor<1x64x56x56xf32> -> tensor<1x2x56x56x32xf32> // CHECK: %[[PADDED:.+]] = tensor.pad %[[PACKED]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0] -// CHECK: %[[UNPACKED:.+]] = tensor.unpack %[[PADDED]] inner_dims_pos = [1] inner_tiles = [32] +// CHECK: %[[UNPACKED:.+]] = linalg.unpack %[[PADDED]] inner_dims_pos = [1] inner_tiles = [32] // CHECK: return %[[UNPACKED]], %[[PADDED]] // ----- @@ -721,7 +721,7 @@ func.func @would_break_dominance(%arg0: tensor<128x256xi32>) -> tensor<4x16x16x3 linalg.yield %4 : i32 } -> tensor<128x256xi32> %dest = bufferization.alloc_tensor() : tensor<4x16x16x32xi32> - %pack = tensor.pack %elem + %pack = linalg.pack %elem inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %dest : tensor<128x256xi32> -> tensor<4x16x16x32xi32> @@ -735,7 +735,7 @@ func.func @would_break_dominance(%arg0: tensor<128x256xi32>) -> tensor<4x16x16x3 // CHECK-SAME: ins(%[[ARG0]] // CHECK-SAME: outs(%[[EMPTY]] // CHECK: %[[ALLOC:.+]] = bufferization.alloc_tensor() : tensor<4x16x16x32xi32> -// CHECK-NEXT: %{{.+}} = tensor.pack %[[GEN]] +// CHECK-NEXT: %{{.+}} = linalg.pack %[[GEN]] // CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32] // CHECK-SAME: into %[[ALLOC]] @@ -751,7 +751,7 @@ func.func @scalar_tensor(%arg0 : tensor) -> tensor<1x32x7x7x32xf32> { linalg.yield %in : f32 } -> tensor<1x7x7x1024xf32> %empty_pack = tensor.empty() : tensor<1x32x7x7x32xf32> - %pack = tensor.pack %gen outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %empty_pack : tensor<1x7x7x1024xf32> -> tensor<1x32x7x7x32xf32> + %pack = linalg.pack %gen outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %empty_pack : tensor<1x7x7x1024xf32> -> tensor<1x32x7x7x32xf32> return %pack : tensor<1x32x7x7x32xf32> } @@ -772,7 +772,7 @@ func.func @scalar_tensor(%arg0 : tensor) -> tensor<1x32x7x7x32xf32> { func.func @unpack_empty_inner_dims(%arg0: tensor<12x64x56x56xf32>) -> tensor<12x56x56x64xf32> { %init = tensor.empty() : tensor<12x56x56x64xf32> %0 = tensor.empty() : tensor<12x56x56x64xf32> - %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] into %0 : tensor<12x64x56x56xf32> -> tensor<12x56x56x64xf32> + %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] into %0 : tensor<12x64x56x56xf32> -> tensor<12x56x56x64xf32> %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf32>) { ^bb0(%in: f32, %out: f32): %3 = arith.addf %in, %in : f32 @@ -782,13 +782,13 @@ func.func @unpack_empty_inner_dims(%arg0: tensor<12x64x56x56xf32>) -> tensor<12x } // CHECK-LABEL: func.func @unpack_empty_inner_dims -// CHECK: %[[UNPACKED_ARG0:.+]] = tensor.unpack +// CHECK: %[[UNPACKED_ARG0:.+]] = linalg.unpack // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]] +// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[UNPACKED_ARG0]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: ins(%[[PACKED_ARG0]] -// CHECK: %[[UNPACKED:.+]] = tensor.unpack %[[RES]] +// CHECK: %[[UNPACKED:.+]] = linalg.unpack %[[RES]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] // ----- @@ -805,7 +805,7 @@ func.func @reduction_pack_transpose_inner_dims(%arg0: tensor<128x256x32xi32>, linalg.yield %4 : i32 } -> tensor<128x256xi32> %dest = tensor.empty() : tensor<4x16x16x32xi32> - %pack = tensor.pack %elem + %pack = linalg.pack %elem inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %dest : tensor<128x256xi32> -> tensor<4x16x16x32xi32> @@ -817,11 +817,11 @@ func.func @reduction_pack_transpose_inner_dims(%arg0: tensor<128x256x32xi32>, // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] // CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<4x16x16x32xi32> -// CHECK: %[[PACK_ARG1:.+]] = tensor.pack %[[ARG1]] +// CHECK: %[[PACK_ARG1:.+]] = linalg.pack %[[ARG1]] // CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32] // CHECK-SAME: into %[[ARG1_EMPTY]] // CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x32x16x32xi32> -// CHECK: %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32] // CHECK-SAME: into %[[ARG0_EMPTY]] // CHECK: %[[RED:.+]] = linalg.generic @@ -851,7 +851,7 @@ func.func @reduction_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a linalg.yield %2 : i32 } -> tensor<100x128x256xi32> %init_pack = tensor.empty() : tensor<4x16x100x16x32xi32> - %4 = tensor.pack %reduction + %4 = linalg.pack %reduction outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 32] @@ -869,15 +869,15 @@ func.func @reduction_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]] // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]] // CHECK: %[[ARG3_EMPTY:.+]] = tensor.empty() : tensor<4x16x100x16x32xi32> -// CHECK: %[[PACKED_ARG3:.+]] = tensor.pack %[[ARG3]] +// CHECK: %[[PACKED_ARG3:.+]] = linalg.pack %[[ARG3]] // CHECK-SAME: outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 32] // CHECK-SAME: into %[[ARG3_EMPTY]] // CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x200x100x16x32xi32> -// CHECK: %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [1, 3, 2, 0] inner_dims_pos = [3, 1] inner_tiles = [16, 32] // CHECK-SAME: into %[[ARG0_EMPTY]] // CHECK: %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32> -// CHECK: %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]] +// CHECK: %[[PACKED_ARG2:.+]] = linalg.pack %[[ARG2]] // CHECK-SAME: inner_dims_pos = [0] inner_tiles = [32] // CHECK-SAME: into %[[ARG2_EMPTY]] // CHECK: %[[RES:.+]] = linalg.generic @@ -894,7 +894,7 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32 %filter: tensor<2x2xi32>) -> tensor<16x540x960xi32>{ %init = tensor.empty() : tensor<16x540x960xi32> %empty = tensor.empty() : tensor<1x16x1080x1920xi32> - %unpack = tensor.unpack %arg0 + %unpack = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [16] into %empty : tensor<1x1x1080x1920x16xi32> -> tensor<1x16x1080x1920xi32> @@ -916,7 +916,7 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32 // CHECK: %[[FINAL_RES:.+]] = tensor.empty() : tensor<16x540x960xi32> // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x540x960x16xi32> // CHECK: %[[PACK_EMPTY:.+]] = tensor.empty() : tensor<1x1x1080x1920x16xi32> -// CHECK: %[[PACK_ARG0:.+]] = tensor.pack +// CHECK: %[[PACK_ARG0:.+]] = linalg.pack // CHECK-SAME: inner_dims_pos = [1] inner_tiles = [16] // CHECK-SAME: into %[[PACK_EMPTY]] // CHECK: %[[POOL:.+]] = linalg.generic @@ -924,7 +924,7 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] // CHECK-SAME: ins(%[[PACK_ARG0]], %[[ARG1]] // CHECK-SAME: outs(%[[INIT]] -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[POOL]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[POOL]] // CHECK-SAME: inner_dims_pos = [0] inner_tiles = [16] // CHECK-SAME: into %[[FINAL_RES]] // CHECK: return %[[UNPACK]] : tensor<16x540x960xi32> @@ -934,7 +934,7 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32 func.func @bubble_up_pack_through_collapse(%1: tensor, %dim : index) -> tensor { %collapsed = tensor.collapse_shape %1 [[0, 1], [2]] : tensor into tensor %2 = tensor.empty(%dim) : tensor - %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor -> tensor + %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor -> tensor func.return %pack : tensor } // CHECK-LABEL: func.func @bubble_up_pack_through_collapse @@ -943,7 +943,7 @@ func.func @bubble_up_pack_through_collapse(%1: tensor, %dim : index) // CHECK: %[[C0:.+]] = arith.constant 0 : index // CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor -> tensor +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor -> tensor // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]] : tensor into tensor // CHECK: return %[[COLLAPSED]] : tensor @@ -952,7 +952,7 @@ func.func @bubble_up_pack_through_collapse(%1: tensor, %dim : index) func.func @bubble_up_pack_through_collapse_empty_outer_dims_perm(%1: tensor, %dim : index) -> tensor { %collapsed = tensor.collapse_shape %1 [[0, 1], [2]] : tensor into tensor %2 = tensor.empty(%dim) : tensor - %pack = tensor.pack %collapsed inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor -> tensor + %pack = linalg.pack %collapsed inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor -> tensor func.return %pack : tensor } // CHECK-LABEL: func.func @bubble_up_pack_through_collapse_empty_outer_dims_perm @@ -961,7 +961,7 @@ func.func @bubble_up_pack_through_collapse_empty_outer_dims_perm(%1: tensor // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor -> tensor +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor -> tensor // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]] : tensor into tensor // CHECK: return %[[COLLAPSED]] : tensor @@ -970,13 +970,13 @@ func.func @bubble_up_pack_through_collapse_empty_outer_dims_perm(%1: tensor) -> tensor<4x32x3072x8x1xf32> { %collapsed = tensor.collapse_shape %1 [[0], [1, 2], [3]] : tensor<4x192x16x256xf32> into tensor<4x3072x256xf32> %2 = tensor.empty() : tensor<4x32x3072x8x1xf32> - %pack = tensor.pack %collapsed outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %2 : tensor<4x3072x256xf32> -> tensor<4x32x3072x8x1xf32> + %pack = linalg.pack %collapsed outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %2 : tensor<4x3072x256xf32> -> tensor<4x32x3072x8x1xf32> func.return %pack : tensor<4x32x3072x8x1xf32> } // CHECK-LABEL: func.func @bubble_up_permuted_pack_through_collapse // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<4x32x192x16x8x1xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<4x192x16x256xf32> -> tensor<4x32x192x16x8x1xf32> +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<4x192x16x256xf32> -> tensor<4x32x192x16x8x1xf32> // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %pack {{\[}}[0], [1], [2, 3], [4], [5]] : tensor<4x32x192x16x8x1xf32> into tensor<4x32x3072x8x1xf32> // CHECK: return %[[COLLAPSED]] : tensor<4x32x3072x8x1xf32> @@ -985,13 +985,13 @@ func.func @bubble_up_permuted_pack_through_collapse(%1: tensor<4x192x16x256xf32> func.func @bubble_up_pack_through_unit_collapse(%1: tensor<1x64x1x4xf32>) -> tensor<8x4x8x1xf32> { %collapsed = tensor.collapse_shape %1 [[0, 1, 2], [3]] : tensor<1x64x1x4xf32> into tensor<64x4xf32> %2 = tensor.empty() : tensor<8x4x8x1xf32> - %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<64x4xf32> -> tensor<8x4x8x1xf32> + %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<64x4xf32> -> tensor<8x4x8x1xf32> func.return %pack : tensor<8x4x8x1xf32> } // CHECK-LABEL: func.func @bubble_up_pack_through_unit_collapse // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x8x1x4x8x1xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<1x64x1x4xf32> -> tensor<1x8x1x4x8x1xf32> +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<1x64x1x4xf32> -> tensor<1x8x1x4x8x1xf32> // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1, 2], [3], [4], [5]] : tensor<1x8x1x4x8x1xf32> into tensor<8x4x8x1xf32> // CHECK: return %[[COLLAPSED]] : tensor<8x4x8x1xf32> @@ -1000,7 +1000,7 @@ func.func @bubble_up_pack_through_unit_collapse(%1: tensor<1x64x1x4xf32>) -> ten func.func @bubble_up_pack_through_collapse_on_outer_dims(%1: tensor, %dim : index) -> tensor { %collapsed = tensor.collapse_shape %1 [[0, 1], [2]] : tensor into tensor %2 = tensor.empty(%dim) : tensor - %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [4] into %2 : tensor -> tensor + %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [4] into %2 : tensor -> tensor func.return %pack : tensor } // CHECK-LABEL: func.func @bubble_up_pack_through_collapse_on_outer_dims @@ -1009,7 +1009,7 @@ func.func @bubble_up_pack_through_collapse_on_outer_dims(%1: tensor, // CHECK: %[[C0:.+]] = arith.constant 0 : index // CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [2] inner_tiles = [4] into %[[EMPTY]] : tensor -> tensor +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [2] inner_tiles = [4] into %[[EMPTY]] : tensor -> tensor // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1], [2], [3]] : tensor into tensor // CHECK: return %[[COLLAPSED]] : tensor @@ -1018,13 +1018,13 @@ func.func @bubble_up_pack_through_collapse_on_outer_dims(%1: tensor, func.func @no_bubble_up_pack_through_non_divisible_collapse(%1: tensor<3072x64x4xf32>) -> tensor<384x32x8x8xf32> { %collapsed = tensor.collapse_shape %1 [[0], [1, 2]] : tensor<3072x64x4xf32> into tensor<3072x256xf32> %2 = tensor.empty() : tensor<384x32x8x8xf32> - %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %2 : tensor<3072x256xf32> -> tensor<384x32x8x8xf32> + %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %2 : tensor<3072x256xf32> -> tensor<384x32x8x8xf32> func.return %pack : tensor<384x32x8x8xf32> } // CHECK-LABEL: func.func @no_bubble_up_pack_through_non_divisible_collapse // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0], [1, 2]] : tensor<3072x64x4xf32> into tensor<3072x256xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[COLLAPSED]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[COLLAPSED]] // CHECK: return %[[PACK]] : tensor<384x32x8x8xf32> // ----- @@ -1032,13 +1032,13 @@ func.func @no_bubble_up_pack_through_non_divisible_collapse(%1: tensor<3072x64x4 func.func @bubble_up_pack_outer_expanded_through_expand(%arg0: tensor<32x64xf32>) -> tensor<4x2x64x4xf32> { %empty = tensor.empty() : tensor<4x2x64x4xf32> %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32> - %pack = tensor.pack %expanded inner_dims_pos = [1] inner_tiles = [4] into %empty : tensor<4x8x64xf32> -> tensor<4x2x64x4xf32> + %pack = linalg.pack %expanded inner_dims_pos = [1] inner_tiles = [4] into %empty : tensor<4x8x64xf32> -> tensor<4x2x64x4xf32> return %pack : tensor<4x2x64x4xf32> } // CHECK-LABEL: func.func @bubble_up_pack_outer_expanded_through_expand( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x64x4xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: inner_dims_pos = [0] inner_tiles = [4] into %[[EMPTY]] : tensor<32x64xf32> -> tensor<8x64x4xf32> // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2], [3]] // CHECK-SAME: output_shape [4, 2, 64, 4] : tensor<8x64x4xf32> into tensor<4x2x64x4xf32> @@ -1049,13 +1049,13 @@ func.func @bubble_up_pack_outer_expanded_through_expand(%arg0: tensor<32x64xf32> func.func @bubble_up_pack_inner_expanded_through_expand(%arg0: tensor<32x64xf32>) -> tensor<32x4x4x4xf32> { %empty = tensor.empty() : tensor<32x4x4x4xf32> %expanded = tensor.expand_shape %arg0 [[0], [1, 2]] output_shape [32, 4, 16] : tensor<32x64xf32> into tensor<32x4x16xf32> - %pack = tensor.pack %expanded inner_dims_pos = [2] inner_tiles = [4] into %empty : tensor<32x4x16xf32> -> tensor<32x4x4x4xf32> + %pack = linalg.pack %expanded inner_dims_pos = [2] inner_tiles = [4] into %empty : tensor<32x4x16xf32> -> tensor<32x4x4x4xf32> return %pack : tensor<32x4x4x4xf32> } // CHECK-LABEL: func.func @bubble_up_pack_inner_expanded_through_expand( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x16x4xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: inner_dims_pos = [1] inner_tiles = [4] into %[[EMPTY]] // CHECK-SAME: : tensor<32x64xf32> -> tensor<32x16x4xf32> // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0], [1, 2], [3]] @@ -1067,13 +1067,13 @@ func.func @bubble_up_pack_inner_expanded_through_expand(%arg0: tensor<32x64xf32> func.func @bubble_up_pack_non_expanded_dims_through_expand(%arg0: tensor<32x64x16xf32>) -> tensor<8x2x32x16x4xf32> { %empty = tensor.empty() : tensor<8x2x32x16x4xf32> %expanded = tensor.expand_shape %arg0 [[0], [1, 2], [3]] output_shape [32, 2, 32, 16] : tensor<32x64x16xf32> into tensor<32x2x32x16xf32> - %pack = tensor.pack %expanded inner_dims_pos = [0] inner_tiles = [4] into %empty : tensor<32x2x32x16xf32> -> tensor<8x2x32x16x4xf32> + %pack = linalg.pack %expanded inner_dims_pos = [0] inner_tiles = [4] into %empty : tensor<32x2x32x16xf32> -> tensor<8x2x32x16x4xf32> return %pack : tensor<8x2x32x16x4xf32> } // CHECK-LABEL: func.func @bubble_up_pack_non_expanded_dims_through_expand( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x64x16x4xf32> -// CHECK: %[[PACK:.+]] = tensor.pack +// CHECK: %[[PACK:.+]] = linalg.pack // CHECK-SAME: %[[ARG0]] inner_dims_pos = [0] inner_tiles = [4] into %[[EMPTY]] // CHECK-SAME: : tensor<32x64x16xf32> -> tensor<8x64x16x4xf32> // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0], [1, 2], [3], [4]] @@ -1087,7 +1087,7 @@ func.func @bubble_up_pack_through_expand_dynamic(%arg0: tensor) -> ten %dim = tensor.dim %arg0, %c0 : tensor %empty = tensor.empty(%dim) : tensor %expanded = tensor.expand_shape %arg0 [[0], [1, 2]] output_shape [%dim, 4, 16] : tensor into tensor - %pack = tensor.pack %expanded inner_dims_pos = [2] inner_tiles = [8] into %empty : tensor -> tensor + %pack = linalg.pack %expanded inner_dims_pos = [2] inner_tiles = [8] into %empty : tensor -> tensor return %pack : tensor } // CHECK-LABEL: func.func @bubble_up_pack_through_expand_dynamic( @@ -1095,7 +1095,7 @@ func.func @bubble_up_pack_through_expand_dynamic(%arg0: tensor) -> ten // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK: %[[DIM_INPUT:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM_INPUT]]) : tensor -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: inner_dims_pos = [1] inner_tiles = [8] into %[[EMPTY]] // CHECK-SAME: : tensor -> tensor // CHECK: %[[DIM_PACK:.+]] = tensor.dim %[[PACK]], %[[C0]] : tensor @@ -1109,14 +1109,14 @@ func.func @bubble_up_pack_non_expanded_padding_through_expand(%arg0: tensor<32x6 %cst = arith.constant 3.000000e+00 : f32 %empty = tensor.empty() : tensor<4x2x8x4x8xf32> %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x60xf32> into tensor<4x8x60xf32> - %pack = tensor.pack %expanded padding_value(%cst : f32) inner_dims_pos = [1, 2] inner_tiles = [4, 8] into %empty : tensor<4x8x60xf32> -> tensor<4x2x8x4x8xf32> + %pack = linalg.pack %expanded padding_value(%cst : f32) inner_dims_pos = [1, 2] inner_tiles = [4, 8] into %empty : tensor<4x8x60xf32> -> tensor<4x2x8x4x8xf32> return %pack : tensor<4x2x8x4x8xf32> } // CHECK-LABEL: func.func @bubble_up_pack_non_expanded_padding_through_expand( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK-DAG: %[[CST:.+]] = arith.constant 3.000000e+00 : f32 // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x8x4x8xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] padding_value(%[[CST]] : f32) +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] padding_value(%[[CST]] : f32) // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [4, 8] into %[[EMPTY]] // CHECK-SAME: : tensor<32x60xf32> -> tensor<8x8x4x8xf32> // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]] @@ -1128,13 +1128,13 @@ func.func @bubble_up_pack_non_expanded_padding_through_expand(%arg0: tensor<32x6 func.func @bubble_up_pack_outer_dims_perm_identity_through_expand(%arg0: tensor<32x64xf32>) -> tensor<4x2x32x4x2xf32> { %empty = tensor.empty() : tensor<4x2x32x4x2xf32> %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32> - %pack = tensor.pack %expanded outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %empty : tensor<4x8x64xf32> -> tensor<4x2x32x4x2xf32> + %pack = linalg.pack %expanded outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %empty : tensor<4x8x64xf32> -> tensor<4x2x32x4x2xf32> return %pack : tensor<4x2x32x4x2xf32> } // CHECK-LABEL: func.func @bubble_up_pack_outer_dims_perm_identity_through_expand( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x32x4x2xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [4, 2] into %[[EMPTY]] // CHECK-SAME: : tensor<32x64xf32> -> tensor<8x32x4x2xf32> // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]] @@ -1146,13 +1146,13 @@ func.func @bubble_up_pack_outer_dims_perm_identity_through_expand(%arg0: tensor< func.func @bubble_up_pack_multiple_dims_through_expand(%arg0: tensor<32x64x16xf32>) -> tensor<8x2x4x8x4x8x2xf32> { %empty = tensor.empty() : tensor<8x2x4x8x4x8x2xf32> %expanded = tensor.expand_shape %arg0 [[0], [1, 2], [3]] output_shape [32, 2, 32, 16] : tensor<32x64x16xf32> into tensor<32x2x32x16xf32> - %pack = tensor.pack %expanded inner_dims_pos = [0, 2, 3] inner_tiles = [4, 8, 2] into %empty : tensor<32x2x32x16xf32> -> tensor<8x2x4x8x4x8x2xf32> + %pack = linalg.pack %expanded inner_dims_pos = [0, 2, 3] inner_tiles = [4, 8, 2] into %empty : tensor<32x2x32x16xf32> -> tensor<8x2x4x8x4x8x2xf32> return %pack : tensor<8x2x4x8x4x8x2xf32> } // CHECK-LABEL: func.func @bubble_up_pack_multiple_dims_through_expand( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x8x8x4x8x2xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: inner_dims_pos = [0, 1, 2] inner_tiles = [4, 8, 2] into %[[EMPTY]] // CHECK-SAME: : tensor<32x64x16xf32> -> tensor<8x8x8x4x8x2xf32> // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0], [1, 2], [3], [4], [5], [6]] @@ -1164,13 +1164,13 @@ func.func @bubble_up_pack_multiple_dims_through_expand(%arg0: tensor<32x64x16xf3 func.func @bubble_up_pack_inner_dims_reorder_through_expand(%arg0: tensor<32x64xf32>) -> tensor<4x2x4x16x4xf32> { %empty = tensor.empty() : tensor<4x2x4x16x4xf32> %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32> - %pack = tensor.pack %expanded inner_dims_pos = [2, 1] inner_tiles = [16, 4] into %empty : tensor<4x8x64xf32> -> tensor<4x2x4x16x4xf32> + %pack = linalg.pack %expanded inner_dims_pos = [2, 1] inner_tiles = [16, 4] into %empty : tensor<4x8x64xf32> -> tensor<4x2x4x16x4xf32> return %pack : tensor<4x2x4x16x4xf32> } // CHECK-LABEL: func.func @bubble_up_pack_inner_dims_reorder_through_expand( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x4x16x4xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 4] into %[[EMPTY]] // CHECK-SAME: : tensor<32x64xf32> -> tensor<8x4x16x4xf32> // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]] @@ -1182,13 +1182,13 @@ func.func @bubble_up_pack_inner_dims_reorder_through_expand(%arg0: tensor<32x64x func.func @bubble_up_pack_multiple_different_expanded_dims_through_expand(%arg0: tensor<32x64x16xf32>) -> tensor<4x2x2x8x16x4x4xf32> { %empty = tensor.empty() : tensor<4x2x2x8x16x4x4xf32> %expanded = tensor.expand_shape %arg0 [[0, 1], [2, 3], [4]] output_shape [4, 8, 2, 32, 16] : tensor<32x64x16xf32> into tensor<4x8x2x32x16xf32> - %pack = tensor.pack %expanded inner_dims_pos = [1, 3] inner_tiles = [4, 4] into %empty : tensor<4x8x2x32x16xf32> -> tensor<4x2x2x8x16x4x4xf32> + %pack = linalg.pack %expanded inner_dims_pos = [1, 3] inner_tiles = [4, 4] into %empty : tensor<4x8x2x32x16xf32> -> tensor<4x2x2x8x16x4x4xf32> return %pack : tensor<4x2x2x8x16x4x4xf32> } // CHECK-LABEL: func.func @bubble_up_pack_multiple_different_expanded_dims_through_expand( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x16x16x4x4xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %[[EMPTY]] // CHECK-SAME: : tensor<32x64x16xf32> -> tensor<8x16x16x4x4xf32> // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2, 3], [4], [5], [6]] @@ -1200,7 +1200,7 @@ func.func @bubble_up_pack_multiple_different_expanded_dims_through_expand(%arg0: func.func @no_bubble_up_pack_outer_dims_permutation_through_expand(%arg0: tensor<32x64xf32>) -> tensor<32x4x2x4x2xf32> { %empty = tensor.empty() : tensor<32x4x2x4x2xf32> %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32> - %pack = tensor.pack %expanded outer_dims_perm = [2, 0, 1] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %empty : tensor<4x8x64xf32> -> tensor<32x4x2x4x2xf32> + %pack = linalg.pack %expanded outer_dims_perm = [2, 0, 1] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %empty : tensor<4x8x64xf32> -> tensor<32x4x2x4x2xf32> return %pack : tensor<32x4x2x4x2xf32> } // CHECK-LABEL: func.func @no_bubble_up_pack_outer_dims_permutation_through_expand( @@ -1208,7 +1208,7 @@ func.func @no_bubble_up_pack_outer_dims_permutation_through_expand(%arg0: tensor // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x4x2x4x2xf32> // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2]] // CHECK-SAME: output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[EXPANDED]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[EXPANDED]] // CHECK-SAME: outer_dims_perm = [2, 0, 1] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %[[EMPTY]] // CHECK-SAME: : tensor<4x8x64xf32> -> tensor<32x4x2x4x2xf32> // CHECK: return %[[PACK]] : tensor<32x4x2x4x2xf32> @@ -1218,7 +1218,7 @@ func.func @no_bubble_up_pack_outer_dims_permutation_through_expand(%arg0: tensor func.func @no_bubble_up_pack_multiple_same_expanded_dim_through_expand(%arg0: tensor<32x64xf32>) -> tensor<2x2x64x2x4xf32> { %empty = tensor.empty() : tensor<2x2x64x2x4xf32> %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32> - %pack = tensor.pack %expanded inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %empty : tensor<4x8x64xf32> -> tensor<2x2x64x2x4xf32> + %pack = linalg.pack %expanded inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %empty : tensor<4x8x64xf32> -> tensor<2x2x64x2x4xf32> return %pack : tensor<2x2x64x2x4xf32> } // CHECK-LABEL: func.func @no_bubble_up_pack_multiple_same_expanded_dim_through_expand( @@ -1226,7 +1226,7 @@ func.func @no_bubble_up_pack_multiple_same_expanded_dim_through_expand(%arg0: te // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<2x2x64x2x4xf32> // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2]] // CHECK-SAME: output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[EXPANDED]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[EXPANDED]] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %[[EMPTY]] // CHECK-SAME: : tensor<4x8x64xf32> -> tensor<2x2x64x2x4xf32> // CHECK: return %[[PACK]] : tensor<2x2x64x2x4xf32> @@ -1236,7 +1236,7 @@ func.func @no_bubble_up_pack_multiple_same_expanded_dim_through_expand(%arg0: te func.func @no_bubble_up_pack_non_innermost_expanded_dim_through_expand(%arg0: tensor<32x64xf32>) -> tensor<2x8x64x2xf32> { %empty = tensor.empty() : tensor<2x8x64x2xf32> %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32> - %pack = tensor.pack %expanded inner_dims_pos = [0] inner_tiles = [2] into %empty : tensor<4x8x64xf32> -> tensor<2x8x64x2xf32> + %pack = linalg.pack %expanded inner_dims_pos = [0] inner_tiles = [2] into %empty : tensor<4x8x64xf32> -> tensor<2x8x64x2xf32> return %pack : tensor<2x8x64x2xf32> } // CHECK-LABEL: func.func @no_bubble_up_pack_non_innermost_expanded_dim_through_expand( @@ -1244,7 +1244,7 @@ func.func @no_bubble_up_pack_non_innermost_expanded_dim_through_expand(%arg0: te // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<2x8x64x2xf32> // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2]] // CHECK-SAME: output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[EXPANDED]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[EXPANDED]] // CHECK-SAME: inner_dims_pos = [0] inner_tiles = [2] into %[[EMPTY]] // CHECK-SAME: : tensor<4x8x64xf32> -> tensor<2x8x64x2xf32> // CHECK: return %[[PACK]] : tensor<2x8x64x2xf32> @@ -1255,7 +1255,7 @@ func.func @no_bubble_up_pack_expanded_padding_through_expand_cannot_reassociate( %cst = arith.constant 3.000000e+00 : f32 %empty = tensor.empty() : tensor<3x2x60x8xf32> %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [3, 10, 60] : tensor<30x60xf32> into tensor<3x10x60xf32> - %pack = tensor.pack %expanded padding_value(%cst : f32) inner_dims_pos = [1] inner_tiles = [8] into %empty : tensor<3x10x60xf32> -> tensor<3x2x60x8xf32> + %pack = linalg.pack %expanded padding_value(%cst : f32) inner_dims_pos = [1] inner_tiles = [8] into %empty : tensor<3x10x60xf32> -> tensor<3x2x60x8xf32> return %pack : tensor<3x2x60x8xf32> } // CHECK-LABEL: func.func @no_bubble_up_pack_expanded_padding_through_expand_cannot_reassociate( @@ -1264,7 +1264,7 @@ func.func @no_bubble_up_pack_expanded_padding_through_expand_cannot_reassociate( // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x2x60x8xf32> // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2]] // CHECK-SAME: output_shape [3, 10, 60] : tensor<30x60xf32> into tensor<3x10x60xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[EXPANDED]] padding_value(%[[CST]] : f32) +// CHECK: %[[PACK:.+]] = linalg.pack %[[EXPANDED]] padding_value(%[[CST]] : f32) // CHECK-SAME: inner_dims_pos = [1] inner_tiles = [8] into %[[EMPTY]] // CHECK-SAME: : tensor<3x10x60xf32> -> tensor<3x2x60x8xf32> // CHECK: return %[[PACK]] : tensor<3x2x60x8xf32> @@ -1274,7 +1274,7 @@ func.func @no_bubble_up_pack_expanded_padding_through_expand_cannot_reassociate( func.func @no_bubble_up_pack_extending_dimension_through_expand_cannot_reassociate(%arg0: tensor<32x64xf32>) -> tensor<8x4x16x8xf32> { %empty = tensor.empty() : tensor<8x4x16x8xf32> %expanded = tensor.expand_shape %arg0 [[0], [1, 2]] output_shape [32, 4, 16] : tensor<32x64xf32> into tensor<32x4x16xf32> - %pack = tensor.pack %expanded inner_dims_pos = [0] inner_tiles = [8] into %empty : tensor<32x4x16xf32> -> tensor<8x4x16x8xf32> + %pack = linalg.pack %expanded inner_dims_pos = [0] inner_tiles = [8] into %empty : tensor<32x4x16xf32> -> tensor<8x4x16x8xf32> return %pack : tensor<8x4x16x8xf32> } // CHECK-LABEL: func.func @no_bubble_up_pack_extending_dimension_through_expand_cannot_reassociate( @@ -1282,7 +1282,7 @@ func.func @no_bubble_up_pack_extending_dimension_through_expand_cannot_reassocia // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x4x16x8xf32> // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0], [1, 2]] // CHECK-SAME: output_shape [32, 4, 16] : tensor<32x64xf32> into tensor<32x4x16xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[EXPANDED]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[EXPANDED]] // CHECK-SAME: inner_dims_pos = [0] inner_tiles = [8] into %[[EMPTY]] // CHECK-SAME: : tensor<32x4x16xf32> -> tensor<8x4x16x8xf32> // CHECK: return %[[PACK]] : tensor<8x4x16x8xf32> @@ -1291,7 +1291,7 @@ func.func @no_bubble_up_pack_extending_dimension_through_expand_cannot_reassocia func.func @push_down_unpack_through_expand(%5: tensor, %dim: index, %sz0: index) -> tensor { %6 = tensor.empty(%dim) : tensor - %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor -> tensor + %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor -> tensor %expanded = tensor.expand_shape %unpack [[0, 1], [2]] output_shape [%sz0, 256, 256] : tensor into tensor func.return %expanded : tensor } @@ -1305,14 +1305,14 @@ func.func @push_down_unpack_through_expand(%5: tensor, %dim: index // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2], [3], [4]] output_shape [%[[SZ0]], 32, 32, 8, 8] : tensor into tensor // CHECK: %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] : tensor // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor -> tensor +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor -> tensor // CHECK: return %[[UNPACK]] : tensor // ----- func.func @push_down_unpack_through_expand_empty_outer_dims_perm(%5: tensor, %dim: index, %sz0: index) -> tensor { %6 = tensor.empty(%dim) : tensor - %unpack = tensor.unpack %5 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor -> tensor + %unpack = linalg.unpack %5 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor -> tensor %expanded = tensor.expand_shape %unpack [[0, 1], [2]] output_shape [%sz0, 256, 256] : tensor into tensor func.return %expanded : tensor } @@ -1326,14 +1326,14 @@ func.func @push_down_unpack_through_expand_empty_outer_dims_perm(%5: tensor into tensor // CHECK: %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] : tensor // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED:.+]] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor -> tensor +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED:.+]] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor -> tensor // CHECK: return %[[UNPACK]] : tensor // ----- func.func @push_down_permuted_unpack_through_expand(%5: tensor<4x32x384x8x8xf32>) -> tensor<4x12x256x256xf32> { %6 = tensor.empty() : tensor<4x3072x256xf32> - %unpack = tensor.unpack %5 outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 8] into %6 : tensor<4x32x384x8x8xf32> -> tensor<4x3072x256xf32> + %unpack = linalg.unpack %5 outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 8] into %6 : tensor<4x32x384x8x8xf32> -> tensor<4x3072x256xf32> %expanded = tensor.expand_shape %unpack [[0], [1, 2], [3]] output_shape [4, 12, 256, 256] : tensor<4x3072x256xf32> into tensor<4x12x256x256xf32> func.return %expanded : tensor<4x12x256x256xf32> } @@ -1341,14 +1341,14 @@ func.func @push_down_permuted_unpack_through_expand(%5: tensor<4x32x384x8x8xf32> // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0], [1], [2, 3], [4], [5]] output_shape [4, 32, 12, 32, 8, 8] : tensor<4x32x384x8x8xf32> into tensor<4x32x12x32x8x8xf32> // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<4x12x256x256xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<4x32x12x32x8x8xf32> -> tensor<4x12x256x256xf32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<4x32x12x32x8x8xf32> -> tensor<4x12x256x256xf32> // CHECK: return %[[UNPACK]] : tensor<4x12x256x256xf32> // ----- func.func @push_down_unpack_through_unit_expand(%5: tensor<6x32x8x8xf32>) -> tensor<3x16x1x256xf32> { %6 = tensor.empty() : tensor<48x256xf32> - %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<6x32x8x8xf32> -> tensor<48x256xf32> + %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<6x32x8x8xf32> -> tensor<48x256xf32> %expanded = tensor.expand_shape %unpack [[0, 1, 2], [3]] output_shape [3, 16, 1, 256] : tensor<48x256xf32> into tensor<3x16x1x256xf32> func.return %expanded : tensor<3x16x1x256xf32> } @@ -1356,14 +1356,14 @@ func.func @push_down_unpack_through_unit_expand(%5: tensor<6x32x8x8xf32>) -> ten // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1, 2], [3], [4], [5]] output_shape [3, 2, 1, 32, 8, 8] : tensor<6x32x8x8xf32> into tensor<3x2x1x32x8x8xf32> // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x16x1x256xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<3x2x1x32x8x8xf32> -> tensor<3x16x1x256xf32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<3x2x1x32x8x8xf32> -> tensor<3x16x1x256xf32> // CHECK: return %[[UNPACK]] : tensor<3x16x1x256xf32> // ----- func.func @push_down_unpack_through_expand_on_outer_dims(%5: tensor, %dim: index, %sz0: index) -> tensor { %6 = tensor.empty(%dim) : tensor - %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [8] into %6 : tensor -> tensor + %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [8] into %6 : tensor -> tensor %expanded = tensor.expand_shape %unpack [[0, 1], [2]] output_shape [%sz0, 256, 256] : tensor into tensor func.return %expanded : tensor } @@ -1377,19 +1377,19 @@ func.func @push_down_unpack_through_expand_on_outer_dims(%5: tensor, // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2], [3]] output_shape [%[[SZ0]], 256, 32, 8] : tensor into tensor // CHECK: %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] : tensor // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [2] inner_tiles = [8] into %[[EMPTY]] : tensor -> tensor +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [2] inner_tiles = [8] into %[[EMPTY]] : tensor -> tensor // CHECK: return %[[UNPACK]] : tensor // ----- func.func @no_push_down_unpack_through_non_divisible_expand(%5: tensor<384x32x8x8xf32>) -> tensor<256x12x256xf32> { %6 = tensor.empty() : tensor<3072x256xf32> - %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<384x32x8x8xf32> -> tensor<3072x256xf32> + %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<384x32x8x8xf32> -> tensor<3072x256xf32> %expanded = tensor.expand_shape %unpack [[0, 1], [2]] output_shape [256, 12, 256] : tensor<3072x256xf32> into tensor<256x12x256xf32> func.return %expanded : tensor<256x12x256xf32> } // CHECK-LABEL: func.func @no_push_down_unpack_through_non_divisible_expand // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] // CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[UNPACK]] {{\[}}[0, 1], [2]] output_shape [256, 12, 256] : tensor<3072x256xf32> into tensor<256x12x256xf32> // CHECK: return %[[EXPANDED]] : tensor<256x12x256xf32> diff --git a/mlir/test/Dialect/Linalg/decompose-tensor-pack-tile.mlir b/mlir/test/Dialect/Linalg/decompose-tensor-pack-tile.mlir index ec761d9a49436..72fde5490a305 100644 --- a/mlir/test/Dialect/Linalg/decompose-tensor-pack-tile.mlir +++ b/mlir/test/Dialect/Linalg/decompose-tensor-pack-tile.mlir @@ -4,7 +4,7 @@ // RUN: -transform-interpreter %s | FileCheck %s func.func @KCRS_to_KCRSsr(%arg0: tensor<1x1x128x64xf32>, %arg1: tensor<1x1x4x8x8x32xf32>) -> tensor<1x1x4x8x8x32xf32> { - %0 = tensor.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x128x64xf32> -> tensor<1x1x4x8x8x32xf32> + %0 = linalg.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x128x64xf32> -> tensor<1x1x4x8x8x32xf32> return %0 : tensor<1x1x4x8x8x32xf32> } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)> @@ -27,7 +27,7 @@ func.func @KCRS_to_KCRSsr(%arg0: tensor<1x1x128x64xf32>, %arg1: tensor<1x1x4x8x8 module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) transform.yield } @@ -36,7 +36,7 @@ module attributes {transform.with_named_sequence} { // ----- func.func @pad_and_pack(%arg0: tensor<13x15xf32>, %arg1: tensor<2x8x8x2xf32>, %arg2: f32) -> tensor<2x8x8x2xf32> { - %0 = tensor.pack %arg0 padding_value(%arg2 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<13x15xf32> -> tensor<2x8x8x2xf32> + %0 = linalg.pack %arg0 padding_value(%arg2 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<13x15xf32> -> tensor<2x8x8x2xf32> return %0 : tensor<2x8x8x2xf32> } // CHECK: func.func @pad_and_pack @@ -54,7 +54,7 @@ func.func @pad_and_pack(%arg0: tensor<13x15xf32>, %arg1: tensor<2x8x8x2xf32>, %a module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) transform.yield } @@ -64,7 +64,7 @@ module attributes {transform.with_named_sequence} { func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) -> tensor<32x4x32x8xf32> { - %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32> + %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32> return %0 : tensor<32x4x32x8xf32> } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)> @@ -85,7 +85,7 @@ func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) // CHECK-SAME: [%[[C]], %[[K]], 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] : tensor<1x1x32x8xf32> into tensor<32x4x32x8xf32> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) transform.yield } diff --git a/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir b/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir index 1cc1484ed4095..911b453f919c3 100644 --- a/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir +++ b/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir @@ -5,7 +5,7 @@ func.func @simple_KCRS_to_KCRSsr(%arg0: tensor, %arg1: tensor<1x1x?x1xi32>) -> tensor<1x1x?x1xi32> { %c8 = arith.constant 8 : index %c5 = arith.constant 5 : i32 - %pack = tensor.pack %arg0 padding_value(%c5 : i32) inner_dims_pos = [0, 1] inner_tiles = [%c8, 1] into %arg1 : tensor -> tensor<1x1x?x1xi32> + %pack = linalg.pack %arg0 padding_value(%c5 : i32) inner_dims_pos = [0, 1] inner_tiles = [%c8, 1] into %arg1 : tensor -> tensor<1x1x?x1xi32> return %pack : tensor<1x1x?x1xi32> } @@ -32,7 +32,7 @@ func.func @simple_KCRS_to_KCRSsr(%arg0: tensor, %arg1: tensor<1x1x?x1xi // ----- func.func @simple_pad_and_pack_static_tiles(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2xf32>, %pad: f32) -> tensor<1x1x8x2xf32> { - %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<5x1xf32> -> tensor<1x1x8x2xf32> + %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<5x1xf32> -> tensor<1x1x8x2xf32> return %0 : tensor<1x1x8x2xf32> } // CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0] -> (s0 - 5)> @@ -52,7 +52,7 @@ func.func @simple_pad_and_pack_static_tiles(%input: tensor<5x1xf32>, %output: te /// Same as example above, but with 1 dynamic tile size. func.func @simple_pad_and_pack_dynamic_tile(%input: tensor<5x1xf32>, %output: tensor<1x1x?x2xf32>, %pad: f32, %tile_dim_0: index) -> tensor<1x1x?x2xf32> { - %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32> + %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32> return %0 : tensor<1x1x?x2xf32> } // CHECK-LABEL: func.func @simple_pad_and_pack_dynamic_tile( @@ -72,7 +72,7 @@ func.func @simple_pad_and_pack_dynamic_tile(%input: tensor<5x1xf32>, %output: te func.func @simple_pad_and_pack_dynamic_tile_cst(%input: tensor<5x1xf32>, %output: tensor<1x1x?x2xf32>, %pad: f32) -> tensor<1x1x?x2xf32> { %tile_dim_0 = arith.constant 8 : index - %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32> + %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32> return %0 : tensor<1x1x?x2xf32> } // CHECK-LABEL: func.func @simple_pad_and_pack_dynamic_tile_cst( @@ -86,7 +86,7 @@ func.func @simple_pad_and_pack_dynamic_tile_cst(%input: tensor<5x1xf32>, %output // CHECK: return %[[RES]] : tensor<1x1x?x2xf32> func.func @simple_pad_and_pack_dynamic_tile_transpose(%input: tensor<5x1xf32>, %output: tensor<1x1x2x?xf32>, %pad: f32, %tile_dim_1: index) -> tensor<1x1x2x?xf32> { - %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [1, 0] inner_tiles = [2, %tile_dim_1] into %output : tensor<5x1xf32> -> tensor<1x1x2x?xf32> + %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [1, 0] inner_tiles = [2, %tile_dim_1] into %output : tensor<5x1xf32> -> tensor<1x1x2x?xf32> return %0 : tensor<1x1x2x?xf32> } // CHECK-LABEL: func.func @simple_pad_and_pack_dynamic_tile_transpose( @@ -116,7 +116,7 @@ func.func @simple_pad_and_pack_scalable_tile(%input: tensor<5x1xf32>, %output: t %c8 = arith.constant 8 : index %vscale = vector.vscale %c8_vscale = arith.muli %vscale, %c8 : index - %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%c8_vscale, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32> + %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%c8_vscale, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32> return %0 : tensor<1x1x?x2xf32> } @@ -138,7 +138,7 @@ func.func @simple_pad_and_pack_scalable_tile(%input: tensor<5x1xf32>, %output: t /// Same as example above, but with both tile sizes dynamic. func.func @simple_pad_and_pack_dynamic_tiles(%input: tensor<5x1xf32>, %output: tensor<1x1x?x?xf32>, %pad: f32, %tile_dim_0: index, %tile_dim_1: index) -> tensor<1x1x?x?xf32> { - %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, %tile_dim_1] into %output : tensor<5x1xf32> -> tensor<1x1x?x?xf32> + %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, %tile_dim_1] into %output : tensor<5x1xf32> -> tensor<1x1x?x?xf32> return %0 : tensor<1x1x?x?xf32> } // CHECK-LABEL: func.func @simple_pad_and_pack_dynamic_tiles( @@ -158,7 +158,7 @@ func.func @simple_pad_and_pack_dynamic_tiles(%input: tensor<5x1xf32>, %output: t // ----- func.func @simple_pad_and_pack_dynamic_tile_not_all_dims_tiled(%input: tensor<1x1x5x1xf32>, %output: tensor<1x1x1x1x2x?xf32>, %pad: f32, %high: index) -> tensor<1x1x1x1x2x?xf32> { - %0 = tensor.pack %input padding_value(%pad : f32) outer_dims_perm = [1, 0, 2, 3] inner_dims_pos = [3, 2] inner_tiles = [2, %high] into %output : tensor<1x1x5x1xf32> -> tensor<1x1x1x1x2x?xf32> + %0 = linalg.pack %input padding_value(%pad : f32) outer_dims_perm = [1, 0, 2, 3] inner_dims_pos = [3, 2] inner_tiles = [2, %high] into %output : tensor<1x1x5x1xf32> -> tensor<1x1x1x1x2x?xf32> return %0 : tensor<1x1x1x1x2x?xf32> } // CHECK: #[[$ATTR_2:.+]] = affine_map<()[s0] -> (s0 - 5)> @@ -183,7 +183,7 @@ func.func @simple_pad_and_pack_dynamic_tile_not_all_dims_tiled(%input: tensor<1x // ----- func.func @simple_NC_to_CNnc(%arg0: tensor<32x8xf32>, %arg1: tensor<1x1x32x8xf32>) -> tensor<1x1x32x8xf32>{ - %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x8xf32> -> tensor<1x1x32x8xf32> + %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x8xf32> -> tensor<1x1x32x8xf32> return %0 : tensor<1x1x32x8xf32> } // CHECK-LABEL: func.func @simple_NC_to_CNnc @@ -197,7 +197,7 @@ func.func @simple_NC_to_CNnc(%arg0: tensor<32x8xf32>, %arg1: tensor<1x1x32x8xf32 // ----- func.func @simple_CHW_to_CHWhwc(%arg0: tensor<3x5x7xf32>, %arg1: tensor<1x1x1x5x7x3xf32>) -> tensor<1x1x1x5x7x3xf32> { - %0 = tensor.pack %arg0 inner_dims_pos = [1, 2, 0] inner_tiles = [5, 7, 3] into %arg1 : tensor<3x5x7xf32> -> tensor<1x1x1x5x7x3xf32> + %0 = linalg.pack %arg0 inner_dims_pos = [1, 2, 0] inner_tiles = [5, 7, 3] into %arg1 : tensor<3x5x7xf32> -> tensor<1x1x1x5x7x3xf32> return %0 : tensor<1x1x1x5x7x3xf32> } // CHECK-LABEL: func.func @simple_CHW_to_CHWhwc @@ -215,7 +215,7 @@ func.func @simple_CHW_to_CHWhwc(%arg0: tensor<3x5x7xf32>, %arg1: tensor<1x1x1x5x // ----- func.func @simple_KCRS_to_KRSCsr(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<1x1x1x1x8x32xf32>) -> tensor<1x1x1x1x8x32xf32> { - %0 = tensor.pack %arg0 outer_dims_perm = [0, 2, 3, 1] inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x32x8xf32> -> tensor<1x1x1x1x8x32xf32> + %0 = linalg.pack %arg0 outer_dims_perm = [0, 2, 3, 1] inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x32x8xf32> -> tensor<1x1x1x1x8x32xf32> return %0 : tensor<1x1x1x1x8x32xf32> } // CHECK-LABEL: func.func @simple_KCRS_to_KRSCsr diff --git a/mlir/test/Dialect/Linalg/decompose-tensor-unpack-tile.mlir b/mlir/test/Dialect/Linalg/decompose-tensor-unpack-tile.mlir index 0dbdf470bbfc9..03437223f0d45 100644 --- a/mlir/test/Dialect/Linalg/decompose-tensor-unpack-tile.mlir +++ b/mlir/test/Dialect/Linalg/decompose-tensor-unpack-tile.mlir @@ -4,13 +4,13 @@ // RUN: -transform-interpreter %s | FileCheck %s func.func @KCRSsr_to_KCRS(%arg0: tensor<1x1x4x8x8x32xf32>, %arg1: tensor<1x1x128x64xf32>) -> tensor<1x1x128x64xf32> { - %0 = tensor.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x4x8x8x32xf32> -> tensor<1x1x128x64xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x4x8x8x32xf32> -> tensor<1x1x128x64xf32> return %0 : tensor<1x1x128x64xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 32, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) transform.yield } @@ -38,7 +38,7 @@ module attributes {transform.with_named_sequence} { // ----- func.func @unpack_and_extract_slice(%arg0: tensor<2x8x8x2xf32>, %arg1: tensor<13x15xf32>) -> tensor<13x15xf32> { - %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<2x8x8x2xf32> -> tensor<13x15xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<2x8x8x2xf32> -> tensor<13x15xf32> return %0 : tensor<13x15xf32> } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (-d0 + 13, 8)> @@ -70,7 +70,7 @@ func.func @unpack_and_extract_slice(%arg0: tensor<2x8x8x2xf32>, %arg1: tensor<13 module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [8, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) transform.yield } @@ -79,7 +79,7 @@ module attributes {transform.with_named_sequence} { // ----- func.func @CKkc_to_KC(%arg0: tensor<32x4x32x8xf32>, %arg1: tensor<128x256xf32>) -> tensor<128x256xf32> { - %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x4x32x8xf32> -> tensor<128x256xf32> + %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x4x32x8xf32> -> tensor<128x256xf32> return %0 : tensor<128x256xf32> } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)> @@ -102,7 +102,7 @@ func.func @CKkc_to_KC(%arg0: tensor<32x4x32x8xf32>, %arg1: tensor<128x256xf32>) module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [32, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) transform.yield } diff --git a/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir b/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir index ba1f214952562..d460c506d6e18 100644 --- a/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir +++ b/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir @@ -3,7 +3,7 @@ // RUN: -transform-interpreter=entry-point=decompose_unpack %s | FileCheck %s func.func @simple_KCRSsr_to_KCRS(%arg0: tensor<1x1x1x1x8x32xf32>, %arg1: tensor<1x1x32x8xf32>) -> tensor<1x1x32x8xf32> { - %0 = tensor.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x1x1x8x32xf32> -> tensor<1x1x32x8xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x1x1x8x32xf32> -> tensor<1x1x32x8xf32> return %0 : tensor<1x1x32x8xf32> } // CHECK-LABEL: func.func @simple_KCRSsr_to_KCRS @@ -22,7 +22,7 @@ func.func @simple_KCRSsr_to_KCRS(%arg0: tensor<1x1x1x1x8x32xf32>, %arg1: tensor< // ----- func.func @simple_unpack_static_tiles(%input: tensor<1x1x8x2xf32>, %output: tensor<5x1xf32>) -> tensor<5x1xf32> { - %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<1x1x8x2xf32> -> tensor<5x1xf32> + %0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<1x1x8x2xf32> -> tensor<5x1xf32> return %0 : tensor<5x1xf32> } // CHECK-LABEL: func.func @simple_unpack_static_tiles @@ -38,7 +38,7 @@ func.func @simple_unpack_static_tiles(%input: tensor<1x1x8x2xf32>, %output: tens /// Same as example above, but with 1 dynamic tile size. func.func @simple_unpack_dynamic_tile(%input: tensor<1x1x?x2xf32>, %output: tensor<5x1xf32>, %tile_dim: index) -> tensor<5x1xf32> { - %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%tile_dim, 2] into %output : tensor<1x1x?x2xf32> -> tensor<5x1xf32> + %0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%tile_dim, 2] into %output : tensor<1x1x?x2xf32> -> tensor<5x1xf32> return %0 : tensor<5x1xf32> } // CHECK-LABEL: func.func @simple_unpack_dynamic_tile @@ -55,7 +55,7 @@ func.func @simple_unpack_dynamic_tile(%input: tensor<1x1x?x2xf32>, %output: tens /// Same as example above, but with 1 dynamic tile size and a trasnpose func.func @simple_unpack_dynamic_tile_transpose(%src: tensor<1x1x2x?xf32>, %dest: tensor<5x1xf32>, %tile_dim: index) -> tensor<5x1xf32> { - %0 = tensor.unpack %src inner_dims_pos = [1, 0] inner_tiles = [2, %tile_dim] into %dest : tensor<1x1x2x?xf32> -> tensor<5x1xf32> + %0 = linalg.unpack %src inner_dims_pos = [1, 0] inner_tiles = [2, %tile_dim] into %dest : tensor<1x1x2x?xf32> -> tensor<5x1xf32> return %0 : tensor<5x1xf32> } // CHECK-LABEL: func.func @simple_unpack_dynamic_tile_transpose @@ -78,7 +78,7 @@ func.func @simple_unpack_scalable_tile(%input: tensor<1x1x?x2xf32>, %output: ten %c8 = arith.constant 8 : index %vscale = vector.vscale %c8_vscale = arith.muli %vscale, %c8 : index - %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%c8_vscale, 2] into %output : tensor<1x1x?x2xf32> -> tensor<5x1xf32> + %0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%c8_vscale, 2] into %output : tensor<1x1x?x2xf32> -> tensor<5x1xf32> return %0 : tensor<5x1xf32> } // CHECK-LABEL: func.func @simple_unpack_scalable_tile @@ -97,7 +97,7 @@ func.func @simple_unpack_scalable_tile(%input: tensor<1x1x?x2xf32>, %output: ten // ----- func.func @simple_CNnc_to_NC(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<32x8xf32>) -> tensor<32x8xf32>{ - %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<1x1x32x8xf32> -> tensor<32x8xf32> + %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<1x1x32x8xf32> -> tensor<32x8xf32> return %0 : tensor<32x8xf32> } // CHECK-LABEL: func.func @simple_CNnc_to_NC @@ -112,7 +112,7 @@ func.func @simple_CNnc_to_NC(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<32x8xf32 // ----- func.func @simple_NCHWc_to_NCHW(%arg0: tensor<2x1x16x8x32xf32>, %arg1: tensor<2x32x16x8xf32>) -> tensor<2x32x16x8xf32> { - %0 = tensor.unpack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %arg1 : tensor<2x1x16x8x32xf32> -> tensor<2x32x16x8xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %arg1 : tensor<2x1x16x8x32xf32> -> tensor<2x32x16x8xf32> return %0 : tensor<2x32x16x8xf32> } // CHECK-LABEL: func.func @simple_NCHWc_to_NCHW @@ -131,7 +131,7 @@ func.func @simple_NCHWc_to_NCHW(%arg0: tensor<2x1x16x8x32xf32>, %arg1: tensor<2x // ----- func.func @simple_NHWC_to_NCHW(%arg0: tensor<1x16x8x32xf32>, %arg1: tensor<1x32x16x8xf32>) -> tensor<1x32x16x8xf32> { - %0 = tensor.unpack %arg0 outer_dims_perm = [0, 2, 3, 1] inner_dims_pos = [] inner_tiles = [] into %arg1 : tensor<1x16x8x32xf32> -> tensor<1x32x16x8xf32> + %0 = linalg.unpack %arg0 outer_dims_perm = [0, 2, 3, 1] inner_dims_pos = [] inner_tiles = [] into %arg1 : tensor<1x16x8x32xf32> -> tensor<1x32x16x8xf32> return %0 : tensor<1x32x16x8xf32> } // CHECK-LABEL: func.func @simple_NHWC_to_NCHW @@ -150,7 +150,7 @@ func.func @simple_NHWC_to_NCHW(%arg0: tensor<1x16x8x32xf32>, %arg1: tensor<1x32x // ----- func.func @unpack_with_dynamic_dims(%arg0: tensor, %arg1: tensor) -> tensor { - %0 = tensor.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor -> tensor + %0 = linalg.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor -> tensor return %0 : tensor } // CHECK-LABEL: func.func @unpack_with_dynamic_dims diff --git a/mlir/test/Dialect/Linalg/fold-empty-op.mlir b/mlir/test/Dialect/Linalg/fold-empty-op.mlir new file mode 100644 index 0000000000000..5ce19d7091318 --- /dev/null +++ b/mlir/test/Dialect/Linalg/fold-empty-op.mlir @@ -0,0 +1,82 @@ +// RUN: mlir-opt -split-input-file -transform-interpreter %s | FileCheck %s + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.op<"func.func"> + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.fold_pack_unpack_into_empty + } : !transform.op<"func.func"> + transform.yield + } +} + +func.func @pack_empty(%arg0: tensor<8x8x32x32xf32>) -> tensor<8x8x32x32xf32> { + %empty_unpacked = tensor.empty() : tensor<256x256xf32> + %packed = linalg.pack %empty_unpacked + inner_dims_pos = [0, 1] inner_tiles = [32, 32] + into %arg0 : tensor<256x256xf32> -> tensor<8x8x32x32xf32> + return %packed : tensor<8x8x32x32xf32> +} + +// CHECK-LABEL: func.func @pack_empty( +// CHECK-SAME: %[[T:.+]]: tensor<8x8x32x32xf32> +// CHECK-NOT: linalg.pack +// CHECK: return %[[T]] : tensor<8x8x32x32xf32> + +func.func @pack_empty_dynamic(%arg0: tensor, %dim0: index, %dim1: index) -> tensor { + %empty_unpacked = tensor.empty(%dim0, %dim1) : tensor + %packed = linalg.pack %empty_unpacked + inner_dims_pos = [0, 1] inner_tiles = [32, 32] + into %arg0 : tensor -> tensor + return %packed : tensor +} + +// CHECK-LABEL: func.func @pack_empty_dynamic( +// CHECK-SAME: %[[T:.+]]: tensor, +// CHECK-SAME: %[[DIM0:[a-zA-Z0-9_]+]]: index, +// CHECK-SAME: %[[DIM1:[a-zA-Z0-9_]+]]: index +// CHECK-NOT: linalg.pack +// CHECK: return %[[T]] : tensor + +func.func @unpack_empty(%arg0: tensor<256x256xf32>) -> tensor<256x256xf32> { + %empty_packed = tensor.empty() : tensor<8x8x32x32xf32> + %unpacked = linalg.unpack %empty_packed + inner_dims_pos = [0, 1] inner_tiles = [32, 32] + into %arg0 : tensor<8x8x32x32xf32> -> tensor<256x256xf32> + return %unpacked : tensor<256x256xf32> +} + +// CHECK-LABEL: func.func @unpack_empty( +// CHECK-SAME: %[[T:.+]]: tensor<256x256xf32> +// CHECK-NOT: linalg.unpack +// CHECK: return %[[T]] : tensor<256x256xf32> + +func.func @unpack_empty_dynamic(%arg0: tensor, %dim0: index, %dim1: index) -> tensor { + %empty_packed = tensor.empty(%dim0, %dim1) : tensor + %unpacked = linalg.unpack %empty_packed + inner_dims_pos = [0, 1] inner_tiles = [32, 32] + into %arg0 : tensor -> tensor + return %unpacked : tensor +} + +// CHECK-LABEL: func.func @unpack_empty_dynamic( +// CHECK-SAME: %[[T:.+]]: tensor, +// CHECK-SAME: %[[DIM0:[a-zA-Z0-9_]+]]: index, +// CHECK-SAME: %[[DIM1:[a-zA-Z0-9_]+]]: index +// CHECK-NOT: linalg.unpack +// CHECK: return %[[T]] : tensor + +func.func @pack_padded_empty(%arg0: tensor<8x8x32x32xf32>) -> tensor<8x8x32x32xf32> { + %pad = arith.constant 1.0 : f32 + %empty_unpacked = tensor.empty() : tensor<256x256xf32> + %packed = linalg.pack %empty_unpacked + padding_value(%pad : f32) + inner_dims_pos = [0, 1] inner_tiles = [32, 32] + into %arg0 : tensor<256x256xf32> -> tensor<8x8x32x32xf32> + return %packed : tensor<8x8x32x32xf32> +} + +// CHECK-LABEL: func.func @pack_padded_empty( +// CHECK-SAME: %[[T:.+]]: tensor<8x8x32x32xf32> +// CHECK: %[[PACK:.+]] = linalg.pack +// CHECK: return %[[PACK]] : tensor<8x8x32x32xf32> diff --git a/mlir/test/Dialect/Tensor/simplify-pack-unpack.mlir b/mlir/test/Dialect/Linalg/simplify-pack-unpack.mlir similarity index 86% rename from mlir/test/Dialect/Tensor/simplify-pack-unpack.mlir rename to mlir/test/Dialect/Linalg/simplify-pack-unpack.mlir index f9e51ae52a74b..51350e5bc8498 100644 --- a/mlir/test/Dialect/Tensor/simplify-pack-unpack.mlir +++ b/mlir/test/Dialect/Linalg/simplify-pack-unpack.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -split-input-file -test-tensor-transform-patterns="test-simplify-pack-unpack-patterns" %s | FileCheck %s +// RUN: mlir-opt -split-input-file -test-linalg-transform-patterns="test-simplify-pack-unpack-patterns" %s | FileCheck %s // CHECK-LABEL: func.func @single_dim_packing( // CHECK-SAME: %[[ARG0:.+]]: tensor<256xf32>) @@ -6,7 +6,7 @@ // CHECK: return %[[EXPANDED]] : tensor<8x32xf32> func.func @single_dim_packing(%arg0: tensor<256xf32>) -> tensor<8x32xf32> { %empty = tensor.empty() : tensor<8x32xf32> - %0 = tensor.pack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<256xf32> -> tensor<8x32xf32> + %0 = linalg.pack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<256xf32> -> tensor<8x32xf32> return %0 : tensor<8x32xf32> } @@ -15,11 +15,11 @@ func.func @single_dim_packing(%arg0: tensor<256xf32>) -> tensor<8x32xf32> { // CHECK-LABEL: func.func @single_dim_packing_with_padding( // CHECK-SAME: %[[ARG0:.+]]: tensor<255xf32>) // CHECK-NOT: tensor.expand_shape -// CHECK: tensor.pack +// CHECK: linalg.pack func.func @single_dim_packing_with_padding(%arg0: tensor<255xf32>) -> tensor<8x32xf32> { %empty = tensor.empty() : tensor<8x32xf32> %cst = arith.constant 0.000000e+00 : f32 - %0 = tensor.pack %arg0 padding_value(%cst : f32) inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<255xf32> -> tensor<8x32xf32> + %0 = linalg.pack %arg0 padding_value(%cst : f32) inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<255xf32> -> tensor<8x32xf32> return %0 : tensor<8x32xf32> } @@ -31,7 +31,7 @@ func.func @single_dim_packing_with_padding(%arg0: tensor<255xf32>) -> tensor<8x3 // CHECK: return %[[EXPANDED]] : tensor<5x8x32xf32> func.func @single_last_inner_dim_packing(%arg0: tensor<5x256xf32>) -> tensor<5x8x32xf32> { %empty = tensor.empty() : tensor<5x8x32xf32> - %0 = tensor.pack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<5x8x32xf32> + %0 = linalg.pack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<5x8x32xf32> return %0 : tensor<5x8x32xf32> } @@ -43,7 +43,7 @@ func.func @single_last_inner_dim_packing(%arg0: tensor<5x256xf32>) -> tensor<5x8 // CHECK: return %[[EXPANDED]] : tensor<2x32xf32> func.func @pack_1d_with_outer_dims_perm(%arg0: tensor<64xf32>) -> tensor<2x32xf32> { %empty = tensor.empty() : tensor<2x32xf32> - %pack = tensor.pack %arg0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<64xf32> -> tensor<2x32xf32> + %pack = linalg.pack %arg0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<64xf32> -> tensor<2x32xf32> return %pack : tensor<2x32xf32> } @@ -55,7 +55,7 @@ func.func @pack_1d_with_outer_dims_perm(%arg0: tensor<64xf32>) -> tensor<2x32xf3 // CHECK: return %[[EXPANDED]] : tensor<5x8x32xf32> func.func @single_last_inner_dim_packing_with_identity_outer_dims_perm(%arg0: tensor<5x256xf32>) -> tensor<5x8x32xf32> { %empty = tensor.empty() : tensor<5x8x32xf32> - %0 = tensor.pack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<5x8x32xf32> + %0 = linalg.pack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<5x8x32xf32> return %0 : tensor<5x8x32xf32> } @@ -63,10 +63,10 @@ func.func @single_last_inner_dim_packing_with_identity_outer_dims_perm(%arg0: te // CHECK-LABEL: func.func @packing_with_outer_dims_perm( // CHECK-NOT: tensor.expand_shape -// CHECK: tensor.pack +// CHECK: linalg.pack func.func @packing_with_outer_dims_perm(%arg0: tensor<5x256xf32>) -> tensor<8x5x32xf32> { %empty = tensor.empty() : tensor<8x5x32xf32> - %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<8x5x32xf32> + %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<8x5x32xf32> return %0 : tensor<8x5x32xf32> } @@ -74,10 +74,10 @@ func.func @packing_with_outer_dims_perm(%arg0: tensor<5x256xf32>) -> tensor<8x5x // CHECK-LABEL: func.func @single_first_inner_dim_packing( // CHECK-NOT: tensor.expand_shape -// CHECK: tensor.pack +// CHECK: linalg.pack func.func @single_first_inner_dim_packing(%arg0: tensor<256x5xf32>) -> tensor<8x5x32xf32> { %empty = tensor.empty() : tensor<8x5x32xf32> - %0 = tensor.pack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<256x5xf32> -> tensor<8x5x32xf32> + %0 = linalg.pack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<256x5xf32> -> tensor<8x5x32xf32> return %0 : tensor<8x5x32xf32> } @@ -89,7 +89,7 @@ func.func @single_first_inner_dim_packing(%arg0: tensor<256x5xf32>) -> tensor<8x // CHECK: return %[[EXPANDED]] func.func @pack_1x32_to_1x32x1x1(%arg0 : tensor<1x32xf32>) -> tensor<1x32x1x1xf32> { %empty = tensor.empty() : tensor<1x32x1x1xf32> - %pack = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 1] into %empty + %pack = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 1] into %empty : tensor<1x32xf32> -> tensor<1x32x1x1xf32> return %pack : tensor<1x32x1x1xf32> } @@ -102,7 +102,7 @@ func.func @pack_1x32_to_1x32x1x1(%arg0 : tensor<1x32xf32>) -> tensor<1x32x1x1xf3 // CHECK: return %[[EXPANDED]] func.func @pack_1x32_to_1x16x1x2(%arg0 : tensor<1x32xf32>) -> tensor<1x16x1x2xf32> { %empty = tensor.empty() : tensor<1x16x1x2xf32> - %pack = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 2] into %empty + %pack = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 2] into %empty : tensor<1x32xf32> -> tensor<1x16x1x2xf32> return %pack : tensor<1x16x1x2xf32> } @@ -115,7 +115,7 @@ func.func @pack_1x32_to_1x16x1x2(%arg0 : tensor<1x32xf32>) -> tensor<1x16x1x2xf3 // CHECK: return %[[EXPANDED]] func.func @pack_32x1_to_16x1x2x1(%arg0 : tensor<32x1xf32>) -> tensor<1x16x2x1xf32> { %empty = tensor.empty() : tensor<1x16x2x1xf32> - %pack = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 1] into %empty + %pack = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 1] into %empty : tensor<32x1xf32> -> tensor<1x16x2x1xf32> return %pack : tensor<1x16x2x1xf32> } @@ -124,10 +124,10 @@ func.func @pack_32x1_to_16x1x2x1(%arg0 : tensor<32x1xf32>) -> tensor<1x16x2x1xf3 // CHECK-LABEL: func.func @pack_32x1_to_16x1x1x2 // CHECK-NOT: tensor.expand_shape -// CHECK: tensor.pack +// CHECK: linalg.pack func.func @pack_32x1_to_16x1x1x2(%arg0 : tensor<32x1xf32>) -> tensor<16x1x1x2xf32> { %empty = tensor.empty() : tensor<16x1x1x2xf32> - %pack = tensor.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [1, 2] into %empty + %pack = linalg.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [1, 2] into %empty : tensor<32x1xf32> -> tensor<16x1x1x2xf32> return %pack : tensor<16x1x1x2xf32> } @@ -140,7 +140,7 @@ func.func @pack_32x1_to_16x1x1x2(%arg0 : tensor<32x1xf32>) -> tensor<16x1x1x2xf3 // CHECK: return %[[COLLAPSED]] func.func @unpack_1d_to_collapse(%arg0: tensor<8x32xf32>) -> tensor<256xf32> { %empty = tensor.empty() : tensor<256xf32> - %0 = tensor.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x32xf32> -> tensor<256xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x32xf32> -> tensor<256xf32> return %0 : tensor<256xf32> } @@ -148,10 +148,10 @@ func.func @unpack_1d_to_collapse(%arg0: tensor<8x32xf32>) -> tensor<256xf32> { // CHECK-LABEL: func.func @unpack_to_partial_slice // CHECK-NOT: tensor.collapse -// CHECK: tensor.unpack +// CHECK: linalg.unpack func.func @unpack_to_partial_slice(%arg0: tensor<8x32xf32>) -> tensor<255xf32> { %empty = tensor.empty() : tensor<255xf32> - %0 = tensor.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x32xf32> -> tensor<255xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x32xf32> -> tensor<255xf32> return %0 : tensor<255xf32> } @@ -159,14 +159,14 @@ func.func @unpack_to_partial_slice(%arg0: tensor<8x32xf32>) -> tensor<255xf32> { // CHECK-LABEL: func.func @unpack_dynamic // CHECK-NOT: tensor.collapse -// CHECK: tensor.unpack +// CHECK: linalg.unpack func.func @unpack_dynamic(%arg0: tensor) -> tensor { %c32 = arith.constant 32 : index %c0 = arith.constant 0 : index %d0 = tensor.dim %arg0, %c0 : tensor %size = arith.muli %d0, %c32 : index %empty = tensor.empty(%size) : tensor - %0 = tensor.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor -> tensor + %0 = linalg.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor -> tensor return %0 : tensor } @@ -178,7 +178,7 @@ func.func @unpack_dynamic(%arg0: tensor) -> tensor { // CHECK: return %[[COLLAPSED]] : tensor<5x256xf32> func.func @single_last_inner_dim_unpacking(%arg0: tensor<5x8x32xf32>) -> tensor<5x256xf32> { %empty = tensor.empty() : tensor<5x256xf32> - %0 = tensor.unpack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x8x32xf32> -> tensor<5x256xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x8x32xf32> -> tensor<5x256xf32> return %0 : tensor<5x256xf32> } @@ -190,7 +190,7 @@ func.func @single_last_inner_dim_unpacking(%arg0: tensor<5x8x32xf32>) -> tensor< // CHECK: return %[[COLLAPSED]] : tensor<5x256xf32> func.func @single_last_inner_dim_unpacking_with_identity_outer_dims_perm(%arg0: tensor<5x8x32xf32>) -> tensor<5x256xf32> { %empty = tensor.empty() : tensor<5x256xf32> - %0 = tensor.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x8x32xf32> -> tensor<5x256xf32> + %0 = linalg.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x8x32xf32> -> tensor<5x256xf32> return %0 : tensor<5x256xf32> } @@ -198,10 +198,10 @@ func.func @single_last_inner_dim_unpacking_with_identity_outer_dims_perm(%arg0: // CHECK-LABEL: func.func @unpacking_with_outer_dims_perm( // CHECK-NOT: tensor.collpase_shape -// CHECK: tensor.unpack +// CHECK: linalg.unpack func.func @unpacking_with_outer_dims_perm(%arg0: tensor<8x5x32xf32>) -> tensor<5x256xf32> { %empty = tensor.empty() : tensor<5x256xf32> - %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<8x5x32xf32> -> tensor<5x256xf32> + %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<8x5x32xf32> -> tensor<5x256xf32> return %0 : tensor<5x256xf32> } @@ -209,10 +209,10 @@ func.func @unpacking_with_outer_dims_perm(%arg0: tensor<8x5x32xf32>) -> tensor<5 // CHECK-LABEL: func.func @single_first_inner_dim_unpacking( // CHECK-NOT: tensor.collapse_shape -// CHECK: tensor.unpack +// CHECK: linalg.unpack func.func @single_first_inner_dim_unpacking(%arg0: tensor<8x5x32xf32>) -> tensor<256x5xf32> { %empty = tensor.empty() : tensor<256x5xf32> - %0 = tensor.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x5x32xf32> -> tensor<256x5xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x5x32xf32> -> tensor<256x5xf32> return %0 : tensor<256x5xf32> } @@ -224,7 +224,7 @@ func.func @single_first_inner_dim_unpacking(%arg0: tensor<8x5x32xf32>) -> tensor // CHECK: return %[[COLLAPSED]] func.func @unpack_1x32x1x1_to_1x32(%arg0 : tensor<1x32x1x1xf32>) -> tensor<1x32xf32> { %empty = tensor.empty() : tensor<1x32xf32> - %unpack = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 1] into %empty + %unpack = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 1] into %empty : tensor<1x32x1x1xf32> -> tensor<1x32xf32> return %unpack : tensor<1x32xf32> } @@ -237,7 +237,7 @@ func.func @unpack_1x32x1x1_to_1x32(%arg0 : tensor<1x32x1x1xf32>) -> tensor<1x32x // CHECK: return %[[COLLAPSED]] func.func @unpack_1x2x1x16_to_1x32(%arg0 : tensor<1x2x1x16xf32>) -> tensor<1x32xf32> { %empty = tensor.empty() : tensor<1x32xf32> - %unpack = tensor.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [1, 16] into %empty + %unpack = linalg.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [1, 16] into %empty : tensor<1x2x1x16xf32> -> tensor<1x32xf32> return %unpack : tensor<1x32xf32> } @@ -250,7 +250,7 @@ func.func @unpack_1x2x1x16_to_1x32(%arg0 : tensor<1x2x1x16xf32>) -> tensor<1x32x // CHECK: return %[[COLLAPSED]] func.func @unpack_16x1x2x1_to_32x1(%arg0 : tensor<1x16x2x1xf32>) -> tensor<32x1xf32> { %empty = tensor.empty() : tensor<32x1xf32> - %unpack = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 1] into %empty + %unpack = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 1] into %empty : tensor<1x16x2x1xf32> -> tensor<32x1xf32> return %unpack : tensor<32x1xf32> } @@ -259,10 +259,10 @@ func.func @unpack_16x1x2x1_to_32x1(%arg0 : tensor<1x16x2x1xf32>) -> tensor<32x1x // CHECK-LABEL: func.func @unpack_16x1x1x2_to_32x1 // CHECK-NOT: tensor.collapse_shape -// CHECK: tensor.unpack +// CHECK: linalg.unpack func.func @unpack_16x1x1x2_to_32x1(%arg0 : tensor<16x1x1x2xf32>) -> tensor<32x1xf32> { %empty = tensor.empty() : tensor<32x1xf32> - %unpack = tensor.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [1, 2] into %empty + %unpack = linalg.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [1, 2] into %empty : tensor<16x1x1x2xf32> -> tensor<32x1xf32> return %unpack : tensor<32x1xf32> } @@ -275,7 +275,7 @@ func.func @unpack_16x1x1x2_to_32x1(%arg0 : tensor<16x1x1x2xf32>) -> tensor<32x1x // CHECK: return %[[EXPANDED]] : tensor<1x1x32x64xf32> func.func @pad_like_pack(%arg0: tensor<32x64xf32>) -> tensor<1x1x32x64xf32> { %empty = tensor.empty() : tensor<1x1x32x64xf32> - %0 = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<32x64xf32> -> tensor<1x1x32x64xf32> + %0 = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<32x64xf32> -> tensor<1x1x32x64xf32> return %0 : tensor<1x1x32x64xf32> } @@ -287,7 +287,7 @@ func.func @pad_like_pack(%arg0: tensor<32x64xf32>) -> tensor<1x1x32x64xf32> { // CHECK: return %[[EXPANDED]] : tensor<1x1x32x64xf32> func.func @pad_like_pack_with_outer_dims_perm(%arg0: tensor<32x64xf32>) -> tensor<1x1x32x64xf32> { %empty = tensor.empty() : tensor<1x1x32x64xf32> - %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<32x64xf32> -> tensor<1x1x32x64xf32> + %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<32x64xf32> -> tensor<1x1x32x64xf32> return %0 : tensor<1x1x32x64xf32> } @@ -299,7 +299,7 @@ func.func @pad_like_pack_with_outer_dims_perm(%arg0: tensor<32x64xf32>) -> tenso // CHECK: return %[[EXPANDED]] : tensor<32x1x64xf32> func.func @inner_pad_like_pack(%arg0: tensor<32x64xf32>) -> tensor<32x1x64xf32> { %empty = tensor.empty() : tensor<32x1x64xf32> - %0 = tensor.pack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x64xf32> -> tensor<32x1x64xf32> + %0 = linalg.pack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x64xf32> -> tensor<32x1x64xf32> return %0 : tensor<32x1x64xf32> } @@ -309,11 +309,11 @@ func.func @inner_pad_like_pack(%arg0: tensor<32x64xf32>) -> tensor<32x1x64xf32> // CHECK-LABEL: func.func @pad_and_inner_dim_shuffle_pack( // CHECK-SAME: %[[ARG0:.+]]: tensor<32x64xf32>) // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x1x64x32xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1, 0] inner_tiles = [64, 32] into %[[EMPTY]] : tensor<32x64xf32> -> tensor<1x1x64x32xf32> +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1, 0] inner_tiles = [64, 32] into %[[EMPTY]] : tensor<32x64xf32> -> tensor<1x1x64x32xf32> // CHECK: return %[[PACK]] : tensor<1x1x64x32xf32> func.func @pad_and_inner_dim_shuffle_pack(%arg0: tensor<32x64xf32>) -> tensor<1x1x64x32xf32> { %empty = tensor.empty() : tensor<1x1x64x32xf32> - %0 = tensor.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [64, 32] into %empty : tensor<32x64xf32> -> tensor<1x1x64x32xf32> + %0 = linalg.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [64, 32] into %empty : tensor<32x64xf32> -> tensor<1x1x64x32xf32> return %0 : tensor<1x1x64x32xf32> } @@ -323,11 +323,11 @@ func.func @pad_and_inner_dim_shuffle_pack(%arg0: tensor<32x64xf32>) -> tensor<1x // CHECK-LABEL: func.func @pad_like_pack_with_transpose( // CHECK-SAME: %[[ARG0:.+]]: tensor<32x64x16xf32>) // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x1x16x64xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [64] into %[[EMPTY]] : tensor<32x64x16xf32> -> tensor<32x1x16x64xf32> +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [64] into %[[EMPTY]] : tensor<32x64x16xf32> -> tensor<32x1x16x64xf32> // CHECK: return %[[PACK]] : tensor<32x1x16x64xf32> func.func @pad_like_pack_with_transpose(%arg0: tensor<32x64x16xf32>) -> tensor<32x1x16x64xf32> { %empty = tensor.empty() : tensor<32x1x16x64xf32> - %0 = tensor.pack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x64x16xf32> -> tensor<32x1x16x64xf32> + %0 = linalg.pack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x64x16xf32> -> tensor<32x1x16x64xf32> return %0 : tensor<32x1x16x64xf32> } @@ -339,7 +339,7 @@ func.func @pad_like_pack_with_transpose(%arg0: tensor<32x64x16xf32>) -> tensor<3 // CHECK: return %[[COLLAPSED]] : tensor<32x64xf32> func.func @unpad_like_unpack(%arg0: tensor<1x1x32x64xf32>) -> tensor<32x64xf32> { %empty = tensor.empty() : tensor<32x64xf32> - %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<32x64xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<32x64xf32> return %0 : tensor<32x64xf32> } @@ -351,7 +351,7 @@ func.func @unpad_like_unpack(%arg0: tensor<1x1x32x64xf32>) -> tensor<32x64xf32> // CHECK: return %[[COLLAPSED]] : tensor<32x64xf32> func.func @unpad_like_unpack_with_outer_dims_perm(%arg0: tensor<1x1x32x64xf32>) -> tensor<32x64xf32> { %empty = tensor.empty() : tensor<32x64xf32> - %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<32x64xf32> + %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<32x64xf32> return %0 : tensor<32x64xf32> } @@ -363,7 +363,7 @@ func.func @unpad_like_unpack_with_outer_dims_perm(%arg0: tensor<1x1x32x64xf32>) // CHECK: return %[[COLLAPSED]] : tensor<32x64xf32> func.func @inner_unpad_like_unpack(%arg0: tensor<32x1x64xf32>) -> tensor<32x64xf32> { %empty = tensor.empty() : tensor<32x64xf32> - %0 = tensor.unpack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x1x64xf32> -> tensor<32x64xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x1x64xf32> -> tensor<32x64xf32> return %0 : tensor<32x64xf32> } @@ -373,11 +373,11 @@ func.func @inner_unpad_like_unpack(%arg0: tensor<32x1x64xf32>) -> tensor<32x64xf // CHECK-LABEL: func.func @unpad_and_inner_dim_shuffle_pack( // CHECK-SAME: %[[ARG0:.+]]: tensor<1x1x32x64xf32>) // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<64x32xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] inner_dims_pos = [1, 0] inner_tiles = [32, 64] into %[[EMPTY]] : tensor<1x1x32x64xf32> -> tensor<64x32xf32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] inner_dims_pos = [1, 0] inner_tiles = [32, 64] into %[[EMPTY]] : tensor<1x1x32x64xf32> -> tensor<64x32xf32> // CHECK: return %[[UNPACK]] : tensor<64x32xf32> func.func @unpad_and_inner_dim_shuffle_pack(%arg0: tensor<1x1x32x64xf32>) -> tensor<64x32xf32> { %empty = tensor.empty() : tensor<64x32xf32> - %0 = tensor.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<64x32xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<64x32xf32> return %0 : tensor<64x32xf32> } @@ -387,10 +387,10 @@ func.func @unpad_and_inner_dim_shuffle_pack(%arg0: tensor<1x1x32x64xf32>) -> ten // CHECK-LABEL: func.func @unpad_like_unpack_with_transpose( // CHECK-SAME: %[[ARG0:.+]]: tensor<32x1x16x64xf32>) // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x64x16xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [64] into %[[EMPTY]] : tensor<32x1x16x64xf32> -> tensor<32x64x16xf32> +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [64] into %[[EMPTY]] : tensor<32x1x16x64xf32> -> tensor<32x64x16xf32> // CHECK: return %[[UNPACK]] : tensor<32x64x16xf32> func.func @unpad_like_unpack_with_transpose(%arg0: tensor<32x1x16x64xf32>) -> tensor<32x64x16xf32> { %empty = tensor.empty() : tensor<32x64x16xf32> - %0 = tensor.unpack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x1x16x64xf32> -> tensor<32x64x16xf32> + %0 = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x1x16x64xf32> -> tensor<32x64x16xf32> return %0 : tensor<32x64x16xf32> } diff --git a/mlir/test/Dialect/Linalg/td/decompose-pack.mlir b/mlir/test/Dialect/Linalg/td/decompose-pack.mlir index 49c45e29d5a14..32054134266c7 100644 --- a/mlir/test/Dialect/Linalg/td/decompose-pack.mlir +++ b/mlir/test/Dialect/Linalg/td/decompose-pack.mlir @@ -1,6 +1,6 @@ module @transforms attributes { transform.with_named_sequence } { transform.named_sequence @decompose_pack(%module: !transform.any_op {transform.readonly}) { - %pack = transform.structured.match ops{["tensor.pack"]} in %module : (!transform.any_op) -> !transform.any_op + %pack = transform.structured.match ops{["linalg.pack"]} in %module : (!transform.any_op) -> !transform.any_op %1 = transform.get_parent_op %pack {isolated_from_above} : (!transform.any_op) -> !transform.any_op transform.apply_patterns to %1 { diff --git a/mlir/test/Dialect/Linalg/td/decompose-unpack.mlir b/mlir/test/Dialect/Linalg/td/decompose-unpack.mlir index 11243634262e0..f5b8403af5e58 100644 --- a/mlir/test/Dialect/Linalg/td/decompose-unpack.mlir +++ b/mlir/test/Dialect/Linalg/td/decompose-unpack.mlir @@ -1,6 +1,6 @@ module @transforms attributes { transform.with_named_sequence } { transform.named_sequence @decompose_unpack(%module: !transform.any_op {transform.readonly}) { - %pack = transform.structured.match ops{["tensor.unpack"]} in %module : (!transform.any_op) -> !transform.any_op + %pack = transform.structured.match ops{["linalg.unpack"]} in %module : (!transform.any_op) -> !transform.any_op %1 = transform.get_parent_op %pack {isolated_from_above} : (!transform.any_op) -> !transform.any_op transform.apply_patterns to %1 { diff --git a/mlir/test/Dialect/Linalg/transform-lower-pack.mlir b/mlir/test/Dialect/Linalg/transform-lower-pack.mlir index 5f8ff36a16578..81fd7a8a947d7 100644 --- a/mlir/test/Dialect/Linalg/transform-lower-pack.mlir +++ b/mlir/test/Dialect/Linalg/transform-lower-pack.mlir @@ -4,7 +4,7 @@ func.func @pack(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<17x2x16x16x32x8xf32>) -> tensor<17x2x16x16x32x8xf32> { %cst_0 = arith.constant 0.0 : f32 - // tensor.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose + // linalg.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose // CHECK: tensor.pad {{.*}} low[0, 0, 0, 0] // CHECK: : tensor<129x47x16x16xf32> to tensor<136x64x16x16xf32> // CHECK: tensor.expand_shape %{{.*}} [{{.*}}[0, 1], [2, 3], [4], [5]] @@ -13,16 +13,16 @@ func.func @pack(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<17x2x16x16x32x8xf // CHECK-SAME: ins(%{{.*}} : tensor<17x8x2x32x16x16xf32>) // CHECK-SAME: outs(%{{.*}} : tensor<17x2x16x16x32x8xf32>) // CHECK-SAME: permutation = [0, 2, 4, 5, 3, 1] - %pack = tensor.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1 + %pack = linalg.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1 : tensor<129x47x16x16xf32> -> tensor<17x2x16x16x32x8xf32> return %pack : tensor<17x2x16x16x32x8xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %pack = transform.structured.match ops{["tensor.pack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.pack"> - transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">) + %pack = transform.structured.match ops{["linalg.pack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.pack"> + transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">) -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">) transform.yield } @@ -33,7 +33,7 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func.func @pack( func.func @pack(%arg0: tensor<128x8xf32>, %arg1: tensor<8x8x16x1xf32>) -> tensor<8x8x16x1xf32> { - // tensor.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose + // linalg.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose // CHECK: tensor.pad {{.*}} low[0, 0] // CHECK: : tensor<128x8xf32> to tensor<128x8xf32> // CHECK: tensor.expand_shape %{{.*}} [{{.*}}[0, 1], [2, 3]] @@ -43,7 +43,7 @@ func.func @pack(%arg0: tensor<128x8xf32>, %arg1: tensor<8x8x16x1xf32>) -> tensor // CHECK-SAME: outs(%{{.*}} : tensor<8x8x16x1xf32>) // CHECK-SAME: permutation = [0, 2, 1, 3] - %pack = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %arg1 + %pack = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %arg1 : tensor<128x8xf32> -> tensor<8x8x16x1xf32> return %pack : tensor<8x8x16x1xf32> @@ -51,9 +51,9 @@ func.func @pack(%arg0: tensor<128x8xf32>, %arg1: tensor<8x8x16x1xf32>) -> tensor module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %pack = transform.structured.match ops{["tensor.pack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.pack"> - transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">) + %pack = transform.structured.match ops{["linalg.pack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.pack"> + transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">) -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">) transform.yield } @@ -67,7 +67,7 @@ module attributes {transform.with_named_sequence} { func.func @pack_as_pad(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x1x1x136x64x16x16xf32>) -> tensor<1x1x1x1x136x64x16x16xf32> { %cst_0 = arith.constant 0.0 : f32 - // tensor.pack is lowered to tensor.pad + tensor.insert_slice + // linalg.pack is lowered to tensor.pad + tensor.insert_slice // CHECK: %[[PAD:.*]] = tensor.pad %[[SRC]] low[0, 0, 0, 0] high[7, 17, 0, 0] // CHECK: : tensor<129x47x16x16xf32> to tensor<136x64x16x16xf32> // CHECK: %[[RES:.*]] = tensor.insert_slice %[[PAD]] into %[[OUT]] @@ -79,16 +79,16 @@ func.func @pack_as_pad(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x1x1x13 // CHECK-SAME: [1, 1, 1, 1, 1, 1, 1, 1] // CHECK-SAME: : tensor<136x64x16x16xf32> into tensor<1x1x1x1x136x64x16x16xf32> // CHECK: return %[[RES]] - %pack = tensor.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1 + %pack = linalg.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1 : tensor<129x47x16x16xf32> -> tensor<1x1x1x1x136x64x16x16xf32> return %pack : tensor<1x1x1x1x136x64x16x16xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %pack = transform.structured.match ops{["tensor.pack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.pack"> - transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">) + %pack = transform.structured.match ops{["linalg.pack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.pack"> + transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">) -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">) transform.yield } @@ -101,22 +101,22 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func.func @pack_as_pad_disabled_insert_slice( func.func @pack_as_pad_disabled_insert_slice(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x1x1x136x64x16x16xf32>) -> tensor<1x1x1x1x136x64x16x16xf32> { %cst_0 = arith.constant 0.0 : f32 - // tensor.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose + // linalg.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose // CHECK-SAME: %[[ARG0:[^:]*]]: tensor<129x47x16x16xf32> // CHECK-DAG: %[[PAD:.*]] = tensor.pad %[[ARG0]] // CHECK-NOT: %[[RES:.*]] = tensor.insert_slice %[[PAD]] // CHECK: %[[PAD_EXPANDED:.*]] = tensor.expand_shape %[[PAD]] // CHECK-DAG: %[[RES:.*]] = linalg.transpose ins(%[[PAD_EXPANDED]] - %pack = tensor.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1 + %pack = linalg.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1 : tensor<129x47x16x16xf32> -> tensor<1x1x1x1x136x64x16x16xf32> return %pack : tensor<1x1x1x1x136x64x16x16xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %pack = transform.structured.match ops{["tensor.pack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.pack"> - transform.structured.lower_pack %pack {lowerPadLikeWithInsertSlice = false}: (!transform.op<"tensor.pack">) + %pack = transform.structured.match ops{["linalg.pack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.pack"> + transform.structured.lower_pack %pack {lowerPadLikeWithInsertSlice = false}: (!transform.op<"linalg.pack">) -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">) transform.yield } @@ -141,16 +141,16 @@ func.func @pack_not_a_pad(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x16x // CHECK-SAME: outs(%{{.*}} : tensor<1x1x16x16x136x64xf32>) // CHECK-SAME: permutation = [0, 2, 4, 5, 1, 3] - %pack = tensor.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1] inner_tiles = [136, 64] into %arg1 + %pack = linalg.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1] inner_tiles = [136, 64] into %arg1 : tensor<129x47x16x16xf32> -> tensor<1x1x16x16x136x64xf32> return %pack : tensor<1x1x16x16x136x64xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %pack = transform.structured.match ops{["tensor.pack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.pack"> - transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">) + %pack = transform.structured.match ops{["linalg.pack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.pack"> + transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">) -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">) transform.yield } @@ -172,16 +172,16 @@ func.func @unpack(%arg0: tensor<17x2x16x16x32x8xf32>, %arg1: tensor<129x47x16x16 // CHECK-SAME: : tensor<136x64x16x16xf32> to tensor<129x47x16x16xf32> // CHECK: linalg.copy ins(%[[SLICE]] : tensor<129x47x16x16xf32>) // CHECK-SAME: outs(%[[ARG1]] : tensor<129x47x16x16xf32>) - %unpack = tensor.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1 + %unpack = linalg.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1 : tensor<17x2x16x16x32x8xf32> -> tensor<129x47x16x16xf32> return %unpack : tensor<129x47x16x16xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.unpack"> - transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">) + %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.unpack"> + transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">) -> (!transform.op<"tensor.empty">, !transform.op<"linalg.transpose">, !transform.op<"tensor.collapse_shape">, @@ -207,16 +207,16 @@ func.func @unpack_with_identity_outer_dims_perm(%arg0: tensor<17x2x16x16x32x8xf3 // CHECK-SAME: : tensor<136x64x16x16xf32> to tensor<129x47x16x16xf32> // CHECK: linalg.copy ins(%[[SLICE]] : tensor<129x47x16x16xf32>) // CHECK-SAME: outs(%[[ARG1]] : tensor<129x47x16x16xf32>) - %unpack = tensor.unpack %arg0 outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1 + %unpack = linalg.unpack %arg0 outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1 : tensor<17x2x16x16x32x8xf32> -> tensor<129x47x16x16xf32> return %unpack : tensor<129x47x16x16xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.unpack"> - transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">) + %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.unpack"> + transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">) -> (!transform.op<"tensor.empty">, !transform.op<"linalg.transpose">, !transform.op<"tensor.collapse_shape">, @@ -241,16 +241,16 @@ func.func @unpack_as_pad(%arg0: tensor<1x1x1x1x136x64x16x16xf32>, %arg1: tensor< // strides multiplers. // CHECK-SAME: [1, 1, 1, 1, 1, 1, 1, 1] // CHECK-SAME: : tensor<1x1x1x1x136x64x16x16xf32> to tensor<129x47x16x16xf32> - %pack = tensor.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1 + %pack = linalg.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1 : tensor<1x1x1x1x136x64x16x16xf32> -> tensor<129x47x16x16xf32> return %pack : tensor<129x47x16x16xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.unpack"> - transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">) + %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.unpack"> + transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">) -> (!transform.op<"tensor.empty">, !transform.op<"linalg.transpose">, !transform.op<"tensor.collapse_shape">, @@ -267,22 +267,22 @@ module attributes {transform.with_named_sequence} { func.func @unpack_as_pad_disabled_extract_slice(%arg0: tensor<1x1x1x1x136x64x16x16xf32>, %arg1: tensor<129x47x16x16xf32>) -> tensor<129x47x16x16xf32> { %cst_0 = arith.constant 0.0 : f32 - // tensor.unpack is lowered to tensor.extract_slice + linalg.transpose + tensor.collapse_shape + // linalg.unpack is lowered to tensor.extract_slice + linalg.transpose + tensor.collapse_shape // CHECK-DAG: %[[ARG0:[^:]*]]: tensor<1x1x1x1x136x64x16x16xf32> // CHECK-NOT: %[[RES:.*]] = tensor.extract_slice %[[ARG0]] // CHECK: %[[TRANSPOSED:.*]] = linalg.transpose ins(%[[ARG0]] // CHECK: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[TRANSPOSED]] // CHECK-DAG: %[[RES:.*]] = tensor.extract_slice %[[COLLAPSED]] - %pack = tensor.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1 + %pack = linalg.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1 : tensor<1x1x1x1x136x64x16x16xf32> -> tensor<129x47x16x16xf32> return %pack : tensor<129x47x16x16xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.unpack"> - transform.structured.lower_unpack %unpack {lowerUnpadLikeWithExtractSlice = false}: (!transform.op<"tensor.unpack">) + %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.unpack"> + transform.structured.lower_unpack %unpack {lowerUnpadLikeWithExtractSlice = false}: (!transform.op<"linalg.unpack">) -> (!transform.op<"tensor.empty">, !transform.op<"linalg.transpose">, !transform.op<"tensor.collapse_shape">, @@ -305,7 +305,7 @@ func.func @pack_with_outer_dims_perm(%src: tensor<100x200x128x256xi32>, // CHECK-SAME: ins(%{{.*}} : tensor<100x200x4x32x16x16xi32>) // CHECK-SAME: outs(%{{.*}} : tensor<200x4x16x100x16x32xi32>) // CHECK-SAME: permutation = [1, 2, 4, 0, 5, 3] - %0 = tensor.pack %src + %0 = linalg.pack %src outer_dims_perm = [1, 2, 3, 0] inner_dims_pos = [3, 2] inner_tiles = [16, 32] @@ -315,9 +315,9 @@ func.func @pack_with_outer_dims_perm(%src: tensor<100x200x128x256xi32>, module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %pack = transform.structured.match ops{["tensor.pack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.pack"> - transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">) + %pack = transform.structured.match ops{["linalg.pack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.pack"> + transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">) -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">) transform.yield } @@ -337,7 +337,7 @@ func.func @pack_with_pad(%src: tensor<4225x12xf32>, %dest: tensor<265x16x16x1xf3 // CHECK-SAME: outs(%{{[a-zA-Z0-9]*}} : tensor<265x16x16x1xf32>) // CHECK-SAME: permutation = [0, 2, 1, 3] %cst = arith.constant 0.000000e+00 : f32 - %0 = tensor.pack %src + %0 = linalg.pack %src padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %dest @@ -347,9 +347,9 @@ func.func @pack_with_pad(%src: tensor<4225x12xf32>, %dest: tensor<265x16x16x1xf3 module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %pack = transform.structured.match ops{["tensor.pack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.pack"> - transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">) + %pack = transform.structured.match ops{["linalg.pack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.pack"> + transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">) -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">) transform.yield } @@ -370,7 +370,7 @@ func.func @pack_with_pad_and_outer_dims_perm(%src: tensor<100x200x127x255xi32>, // CHECK-SAME: outs(%{{.*}} : tensor<200x4x16x100x16x32xi32>) // CHECK-SAME: permutation = [1, 2, 4, 0, 5, 3] %cst_0 = arith.constant 0 : i32 - %0 = tensor.pack %src + %0 = linalg.pack %src padding_value(%cst_0 : i32) outer_dims_perm = [1, 2, 3, 0] inner_dims_pos = [3, 2] @@ -381,9 +381,9 @@ func.func @pack_with_pad_and_outer_dims_perm(%src: tensor<100x200x127x255xi32>, module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %pack = transform.structured.match ops{["tensor.pack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.pack"> - transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">) + %pack = transform.structured.match ops{["linalg.pack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.pack"> + transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">) -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">) transform.yield } @@ -429,7 +429,7 @@ func.func @dynamic_pack_pad_transpose_inner_and_outer_dims(%source: tensor - %pack = tensor.pack %source padding_value(%padding_value : f32) + %pack = linalg.pack %source padding_value(%padding_value : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack : tensor -> tensor return %pack : tensor @@ -437,9 +437,9 @@ func.func @dynamic_pack_pad_transpose_inner_and_outer_dims(%source: tensor !transform.op<"tensor.pack"> - transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">) + %pack = transform.structured.match ops{["linalg.pack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.pack"> + transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">) -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">) transform.yield } @@ -453,7 +453,7 @@ module attributes {transform.with_named_sequence} { func.func @pack_as_pad_with_outer_dims_perm(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x1x1x136x64x16x16xf32>) -> tensor<1x1x1x1x136x64x16x16xf32> { %cst_0 = arith.constant 0.0 : f32 - // tensor.pack is lowered to tensor.pad + tensor.insert_slice + // linalg.pack is lowered to tensor.pad + tensor.insert_slice // CHECK: %[[PAD:.*]] = tensor.pad %[[SRC]] low[0, 0, 0, 0] high[7, 17, 0, 0] // CHECK: : tensor<129x47x16x16xf32> to tensor<136x64x16x16xf32> // CHECK: %[[RES:.*]] = tensor.insert_slice %[[PAD]] into %[[OUT]] @@ -465,7 +465,7 @@ func.func @pack_as_pad_with_outer_dims_perm(%arg0: tensor<129x47x16x16xf32>, %ar // CHECK-SAME: [1, 1, 1, 1, 1, 1, 1, 1] // CHECK-SAME: : tensor<136x64x16x16xf32> into tensor<1x1x1x1x136x64x16x16xf32> // CHECK: return %[[RES]] - %pack = tensor.pack %arg0 + %pack = linalg.pack %arg0 padding_value(%cst_0 : f32) outer_dims_perm = [1, 2, 3, 0] inner_dims_pos = [0, 1, 2, 3] @@ -476,9 +476,9 @@ func.func @pack_as_pad_with_outer_dims_perm(%arg0: tensor<129x47x16x16xf32>, %ar module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %pack = transform.structured.match ops{["tensor.pack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.pack"> - transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">) + %pack = transform.structured.match ops{["linalg.pack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.pack"> + transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">) -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">) transform.yield } @@ -501,7 +501,7 @@ func.func @pack_as_pad_with_unit_dims(%arg0: tensor<3x1x1x1xf32>, %arg1: tensor< // CHECK-SAME: outs(%[[OUT]] : tensor<1x1x1x1x8x1xf32>) // CHECK-SAME: permutation = [0, 2, 4, 5, 1, 3] // CHECK: return %[[TRANSPOSED]] : tensor<1x1x1x1x8x1xf32> - %pack = tensor.pack %arg0 + %pack = linalg.pack %arg0 padding_value(%zero : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %arg1 : tensor<3x1x1x1xf32> -> tensor<1x1x1x1x8x1xf32> @@ -512,9 +512,9 @@ func.func @pack_as_pad_with_unit_dims(%arg0: tensor<3x1x1x1xf32>, %arg1: tensor< module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %pack = transform.structured.match ops{["tensor.pack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.pack"> - transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">) + %pack = transform.structured.match ops{["linalg.pack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.pack"> + transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">) -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">) transform.yield } @@ -541,16 +541,16 @@ module attributes {transform.with_named_sequence} { // CHECK: linalg.copy ins(%[[SLICE]] : tensor<32x?x?xf32>) // CHECK-SAME: outs(%[[ARG1]] : tensor<32x?x?xf32>) func.func @unpack_with_dynamic_dest(%arg0: tensor<32x2x49x16x16xf32>, %arg1: tensor<32x?x?xf32>) -> tensor<32x?x?xf32> { - %pack = tensor.unpack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %arg1 + %pack = linalg.unpack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %arg1 : tensor<32x2x49x16x16xf32> -> tensor<32x?x?xf32> return %pack : tensor<32x?x?xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.unpack"> - transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">) + %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.unpack"> + transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">) -> (!transform.op<"tensor.empty">, !transform.op<"linalg.transpose">, !transform.op<"tensor.collapse_shape">, @@ -582,15 +582,15 @@ module attributes {transform.with_named_sequence} { // CHECK: linalg.copy ins(%[[SLICE]] : tensor) // CHECK-SAME: outs(%[[ARG1]] : tensor) func.func @unpack_with_dynamic_input_dest(%arg0: tensor, %arg1: tensor) -> tensor { - %unpack = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 16] into %arg1 : tensor -> tensor + %unpack = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 16] into %arg1 : tensor -> tensor return %unpack : tensor } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.unpack"> - transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">) + %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.unpack"> + transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">) -> (!transform.op<"tensor.empty">, !transform.op<"linalg.transpose">, !transform.op<"tensor.collapse_shape">, @@ -626,14 +626,14 @@ module attributes {transform.with_named_sequence} { // CHECK: linalg.copy ins(%[[SLICE]] : tensor) // CHECK-SAME: outs(%[[ARG1]] : tensor) func.func @unpack_fully_dynamic(%source: tensor, %dest: tensor, %tile_n : index, %tile_m : index) -> tensor { - %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor -> tensor + %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor -> tensor return %0 : tensor } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.unpack"> - transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">) + %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.unpack"> + transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">) -> (!transform.op<"tensor.empty">, !transform.op<"linalg.transpose">, !transform.op<"tensor.collapse_shape">, @@ -664,16 +664,16 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: [1, 1, 1, 1, 1, 1, 1, 1] // CHECK-SAME: : tensor<1x1x1x1x136x64x16x16xf32> to tensor func.func @unpack_as_pad_dynamic(%arg0: tensor<1x1x1x1x136x64x16x16xf32>, %arg1: tensor) -> tensor { - %pack = tensor.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1 + %pack = linalg.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1 : tensor<1x1x1x1x136x64x16x16xf32> -> tensor return %pack : tensor } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.unpack"> - transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">) + %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.unpack"> + transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">) -> (!transform.op<"tensor.empty">, !transform.op<"linalg.transpose">, !transform.op<"tensor.collapse_shape">, @@ -698,16 +698,16 @@ module attributes {transform.with_named_sequence} { // CHECK: linalg.copy ins(%[[SLICE]] // CHECK-SAME: : tensor<32x64xf32>) outs(%[[ARG0]] : tensor<32x64xf32>) -> tensor<32x64xf32> func.func @unpack_with_outer_dims_perm(%arg0: tensor<32x64xf32>, %arg1: tensor<2x4x32x8xf32>) -> tensor<32x64xf32> { - %unpack = tensor.unpack %arg1 outer_dims_perm = [1, 0] + %unpack = linalg.unpack %arg1 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg0 : tensor<2x4x32x8xf32> -> tensor<32x64xf32> return %unpack : tensor<32x64xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { - %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op - : (!transform.any_op) -> !transform.op<"tensor.unpack"> - transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">) + %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op + : (!transform.any_op) -> !transform.op<"linalg.unpack"> + transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">) -> (!transform.op<"tensor.empty">, !transform.op<"linalg.transpose">, !transform.op<"tensor.collapse_shape">, diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir index ac1ca9319d335..20019424e8d3c 100644 --- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir @@ -106,12 +106,12 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func.func @unpack_elemwise // CHECK: %[[RES:.*]] = scf.for // CHECK: scf.for -// CHECK: tensor.unpack +// CHECK: linalg.unpack // CHECK: linalg.elemwise_unary // CHECK: return %[[RES]] func.func @unpack_elemwise(%arg0: tensor<16x48x8x8xf32>, %arg1: tensor<128x384xf32>) -> tensor<128x384xf32> { %0 = tensor.empty() : tensor<128x384xf32> - %1 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0 + %1 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0 : tensor<16x48x8x8xf32> -> tensor<128x384xf32> %2 = linalg.elemwise_unary ins(%1: tensor<128x384xf32>) outs(%arg1: tensor<128x384xf32>) -> tensor<128x384xf32> @@ -132,12 +132,12 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func.func @pack_elemwise // CHECK: %[[RES:.*]] = scf.for // CHECK: scf.for -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK: linalg.elemwise_unary // CHECK: return %[[RES]] func.func @pack_elemwise(%arg0: tensor<128x384xf32>, %arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32> { %0 = tensor.empty() : tensor<16x48x8x8xf32> - %1 = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0 + %1 = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0 : tensor<128x384xf32> -> tensor<16x48x8x8xf32> %2 = linalg.elemwise_unary ins(%1: tensor<16x48x8x8xf32>) outs(%arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32> @@ -156,14 +156,14 @@ module attributes {transform.with_named_sequence} { // ----- // CHECK-LABEL: func.func @nofuse_pack_elemwise -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK: %[[RES:.*]] = scf.for // CHECK: scf.for // CHECK: linalg.elemwise_unary // CHECK: return %[[RES]] func.func @nofuse_pack_elemwise(%arg0: tensor<128x384xf32>, %arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32> { %0 = tensor.empty() : tensor<16x48x8x8xf32> - %1 = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0 + %1 = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0 : tensor<128x384xf32> -> tensor<16x48x8x8xf32> %2 = linalg.elemwise_unary ins(%1: tensor<16x48x8x8xf32>) outs(%arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32> diff --git a/mlir/test/Dialect/Linalg/transform-op-pack.mlir b/mlir/test/Dialect/Linalg/transform-op-pack.mlir index 6c26ebd0a5b84..b3ad73e8df8e7 100644 --- a/mlir/test/Dialect/Linalg/transform-op-pack.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-pack.mlir @@ -15,9 +15,9 @@ // CHECK-SAME: %[[T1:.+]]: tensor<3xf16> func.func @reduction_2d_static(%t0: tensor<3x7xf16>, %t1: tensor<3xf16>) -> tensor<3xf16> { // CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<3x2x4xf16> - // CHECK: %[[PACKED:.*]] = tensor.pack %[[T0]] padding_value(%{{.*}} : f16) + // CHECK: %[[PACKED:.*]] = linalg.pack %[[T0]] padding_value(%{{.*}} : f16) // CHECK-SAME: inner_dims_pos = [1] inner_tiles = [4] into %[[EMPTY]] : tensor<3x7xf16> -> tensor<3x2x4xf16> - // CHECK-NOT: tensor.pack + // CHECK-NOT: linalg.pack // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]] // CHECK-SAME: iterator_types = ["parallel", "reduction", "reduction"] @@ -29,7 +29,7 @@ func.func @reduction_2d_static(%t0: tensor<3x7xf16>, %t1: tensor<3xf16>) -> tens linalg.yield %3 : f16 } -> tensor<3xf16> - // CHECK-NOT: tensor.unpack + // CHECK-NOT: linalg.unpack return %2 : tensor<3xf16> } @@ -59,9 +59,9 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[T1:.+]]: tensor<3xf16> func.func @col_reduction_2d_static(%t0: tensor<7x3xf16>, %t1: tensor<3xf16>) -> tensor<3xf16> { // CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<3x2x4xf16> - // CHECK: %[[PACKED:.*]] = tensor.pack %[[T0]] padding_value(%{{.*}} : f16) + // CHECK: %[[PACKED:.*]] = linalg.pack %[[T0]] padding_value(%{{.*}} : f16) // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0] inner_tiles = [4] into %[[EMPTY]] : tensor<7x3xf16> -> tensor<3x2x4xf16> - // CHECK-NOT: tensor.pack + // CHECK-NOT: linalg.pack // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]] // CHECK-SAME: iterator_types = ["reduction", "parallel", "reduction"] @@ -73,7 +73,7 @@ func.func @col_reduction_2d_static(%t0: tensor<7x3xf16>, %t1: tensor<3xf16>) -> linalg.yield %3 : f16 } -> tensor<3xf16> - // CHECK-NOT: tensor.unpack + // CHECK-NOT: linalg.unpack return %2 : tensor<3xf16> } @@ -83,12 +83,12 @@ module attributes {transform.with_named_sequence} { %1 = transform.structured.pack %0 packed_sizes = [4, 0] : (!transform.any_op) -> (!transform.op<"linalg.generic">) %pack = transform.get_producer_of_operand %1[0] - : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.pack">) + : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.pack">) %2, %pack_2, %empty_unpack_2 = transform.structured.pack_transpose %pack with_compute_op(%1) outer_perm = [1, 0] - : (!transform.op<"tensor.pack">, !transform.op<"linalg.generic">) - -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.any_op) + : (!transform.op<"linalg.pack">, !transform.op<"linalg.generic">) + -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.any_op) transform.yield } } @@ -116,9 +116,9 @@ func.func @reduction_2d_dynamic(%t0: tensor, %t1: tensor) -> ten // CHECK-DAG: %[[D1:.*]] = tensor.dim %[[T0]], %[[C1]] : tensor // CHECK: %[[D1B4:.*]] = affine.apply #[[$DIV4]]()[%[[D1]]] // CHECK: %[[EMPTY:.*]] = tensor.empty(%[[D0]], %[[D1B4]]) : tensor - // CHECK: %[[PACKED:.*]] = tensor.pack %[[T0]] padding_value(%{{.*}} : f16) + // CHECK: %[[PACKED:.*]] = linalg.pack %[[T0]] padding_value(%{{.*}} : f16) // CHECK-SAME: inner_dims_pos = [1] inner_tiles = [4] into %[[EMPTY]] : tensor -> tensor - // CHECK-NOT: tensor.pack + // CHECK-NOT: linalg.pack // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]] // CHECK-SAME: iterator_types = ["parallel", "reduction", "reduction"] @@ -130,7 +130,7 @@ func.func @reduction_2d_dynamic(%t0: tensor, %t1: tensor) -> ten linalg.yield %3 : f16 } -> tensor - // CHECK-NOT: tensor.unpack + // CHECK-NOT: linalg.unpack return %2 : tensor } @@ -162,11 +162,11 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[T0:.+]]: tensor, // CHECK-SAME: %[[T1:.+]]: tensor func.func @reduction_2d_dynamic(%t0: tensor, %t1: tensor) -> tensor { - // CHECK: %[[PACKED_0:.*]] = tensor.pack %[[T0]] padding_value(%{{.*}} : f16) + // CHECK: %[[PACKED_0:.*]] = linalg.pack %[[T0]] padding_value(%{{.*}} : f16) // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [3, 4] into %{{.*}} : tensor -> tensor - // CHECK: %[[PACKED_1:.*]] = tensor.pack %[[T1]] padding_value(%{{.*}} : f16) + // CHECK: %[[PACKED_1:.*]] = linalg.pack %[[T1]] padding_value(%{{.*}} : f16) // CHECK-SAME: inner_dims_pos = [0] inner_tiles = [3] into %{{.*}} : tensor -> tensor - // CHECK-NOT: tensor.pack + // CHECK-NOT: linalg.pack // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]] // CHECK-SAME: iterator_types = ["parallel", "reduction", "parallel", "reduction"] @@ -178,7 +178,7 @@ func.func @reduction_2d_dynamic(%t0: tensor, %t1: tensor) -> ten linalg.yield %3 : f16 } -> tensor - // CHECK: tensor.unpack %{{.*}} inner_dims_pos = [0] inner_tiles = [3] into %{{.*}} : tensor -> tensor + // CHECK: linalg.unpack %{{.*}} inner_dims_pos = [0] inner_tiles = [3] into %{{.*}} : tensor -> tensor return %2 : tensor } @@ -207,11 +207,11 @@ module attributes {transform.with_named_sequence} { func.func @matmul(%A: tensor, %B: tensor, %C: tensor) -> tensor { - // CHECK: %[[PACK_A:.*]] = tensor.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [2, 4] + // CHECK: %[[PACK_A:.*]] = linalg.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [2, 4] // CHECK-SAME: : tensor -> tensor - // CHECK: %[[PACK_B:.*]] = tensor.pack %{{.*}} inner_dims_pos = [1, 0] inner_tiles = [3, 4] + // CHECK: %[[PACK_B:.*]] = linalg.pack %{{.*}} inner_dims_pos = [1, 0] inner_tiles = [3, 4] // CHECK-SAME: : tensor -> tensor - // CHECK: %[[PACK_C:.*]] = tensor.pack %{{.*}} outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [3, 2] + // CHECK: %[[PACK_C:.*]] = linalg.pack %{{.*}} outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [3, 2] // CHECK-SAME: : tensor -> tensor // CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]] @@ -222,7 +222,7 @@ func.func @matmul(%A: tensor, %B: tensor, %C: tensor) outs(%C: tensor) -> tensor - // CHECK: tensor.unpack %{{.*}} outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [3, 2] + // CHECK: linalg.unpack %{{.*}} outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [3, 2] // CHECK-SAME: : tensor -> tensor return %0 : tensor } @@ -235,12 +235,12 @@ module attributes {transform.with_named_sequence} { : (!transform.any_op) -> (!transform.op<"linalg.generic">) %unpack = transform.get_consumers_of_result %1[0] - : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.unpack">) + : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.unpack">) %2, %pack_2, %unpack_2 = transform.structured.pack_transpose %unpack with_compute_op(%1) outer_perm = [1, 0] inner_perm = [1, 0] - : (!transform.op<"tensor.unpack">, !transform.op<"linalg.generic">) - -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.op<"tensor.unpack">) + : (!transform.op<"linalg.unpack">, !transform.op<"linalg.generic">) + -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.op<"linalg.unpack">) transform.yield } } @@ -259,11 +259,11 @@ module attributes {transform.with_named_sequence} { func.func @conv_2d_nchw_fchw(%i: tensor<14x512x28x28xf32>, %f: tensor<1024x512x1x1xf32>, %o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32> { - // CHECK: %[[PACK_INPUT:.*]] = tensor.pack %{{.*}} inner_dims_pos = [1] inner_tiles = [8] + // CHECK: %[[PACK_INPUT:.*]] = linalg.pack %{{.*}} inner_dims_pos = [1] inner_tiles = [8] // CHECK-SAME: : tensor<14x512x28x28xf32> -> tensor<14x64x28x28x8xf32> - // CHECK: %[[PACK_FILTER:.*]] = tensor.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [4, 8] + // CHECK: %[[PACK_FILTER:.*]] = linalg.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [4, 8] // CHECK-SAME: : tensor<1024x512x1x1xf32> -> tensor<256x64x1x1x4x8xf32> - // CHECK: %[[PACK_INPUT:.*]] = tensor.pack %{{.*}} inner_dims_pos = [1] inner_tiles = [4] + // CHECK: %[[PACK_INPUT:.*]] = linalg.pack %{{.*}} inner_dims_pos = [1] inner_tiles = [4] // CHECK-SAME: : tensor<14x1024x28x28xf32> -> tensor<14x256x28x28x4xf32> // CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "parallel", "reduction"]} @@ -272,7 +272,7 @@ func.func @conv_2d_nchw_fchw(%i: tensor<14x512x28x28xf32>, %f: tensor<1024x512x1 %0 = linalg.conv_2d_nchw_fchw ins(%i, %f: tensor<14x512x28x28xf32>, tensor<1024x512x1x1xf32>) outs(%o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32> - // CHECK: tensor.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [4] + // CHECK: linalg.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [4] // CHECK-SAME: : tensor<14x256x28x28x4xf32> -> tensor<14x1024x28x28xf32> return %0: tensor<14x1024x28x28xf32> } @@ -300,11 +300,11 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[INIT:.+]]: tensor func.func @conv_2d_nhwc_hwcf(%input: tensor, %filter: tensor<1x?x?x?xf32>, %init: tensor) -> tensor { - // CHECK: %[[PACK_INPUT:.*]] = tensor.pack %{{.*}} inner_dims_pos = [3] inner_tiles = [6] + // CHECK: %[[PACK_INPUT:.*]] = linalg.pack %{{.*}} inner_dims_pos = [3] inner_tiles = [6] // CHECK-SAME: : tensor -> tensor - // CHECK: %[[PACK_FILTER:.*]] = tensor.pack %{{.*}} inner_dims_pos = [3, 2] inner_tiles = [4, 6] + // CHECK: %[[PACK_FILTER:.*]] = linalg.pack %{{.*}} inner_dims_pos = [3, 2] inner_tiles = [4, 6] // CHECK-SAME: : tensor<1x?x?x?xf32> -> tensor<1x?x?x?x4x6xf32> - // CHECK: %[[PACK_OUTPUT:.*]] = tensor.pack %{{.*}} inner_dims_pos = [3] inner_tiles = [4] + // CHECK: %[[PACK_OUTPUT:.*]] = linalg.pack %{{.*}} inner_dims_pos = [3] inner_tiles = [4] // CHECK-SAME: : tensor -> tensor // CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]] @@ -315,7 +315,7 @@ func.func @conv_2d_nhwc_hwcf(%input: tensor, %filter: tensor<1x?x?x ins (%input, %filter: tensor, tensor<1x?x?x?xf32>) outs (%init: tensor) -> tensor - // CHECK: tensor.unpack %{{.*}} inner_dims_pos = [3] inner_tiles = [4] + // CHECK: linalg.unpack %{{.*}} inner_dims_pos = [3] inner_tiles = [4] // CHECK-SAME: : tensor -> tensor return %0 : tensor } @@ -349,11 +349,11 @@ func.func @matmul_dynamic_pack_size(%A: tensor, %B: tensor, %C // CHECK: %[[TS:.*]] = "some_tile_size"() : () -> index %sz = "some_tile_size"() : () -> (index) - // CHECK: %[[PACK_A:.*]] = tensor.pack %[[A]] {{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]] + // CHECK: %[[PACK_A:.*]] = linalg.pack %[[A]] {{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]] // CHECK-SAME: : tensor -> tensor - // CHECK: %[[PACK_B:.*]] = tensor.pack %[[B]] {{.*}} inner_dims_pos = [1, 0] inner_tiles = [%[[TS]], %[[TS]]] + // CHECK: %[[PACK_B:.*]] = linalg.pack %[[B]] {{.*}} inner_dims_pos = [1, 0] inner_tiles = [%[[TS]], %[[TS]]] // CHECK-SAME: : tensor -> tensor - // CHECK: %[[PACK_C:.*]] = tensor.pack %[[C]] {{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]] + // CHECK: %[[PACK_C:.*]] = linalg.pack %[[C]] {{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]] // CHECK-SAME: : tensor -> tensor // CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "reduction"]} @@ -363,7 +363,7 @@ func.func @matmul_dynamic_pack_size(%A: tensor, %B: tensor, %C outs(%C: tensor) -> tensor - // CHECK: tensor.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]] into %[[C]] + // CHECK: linalg.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]] into %[[C]] // CHECK-SAME: : tensor -> tensor return %0 : tensor } @@ -445,16 +445,16 @@ module attributes {transform.with_named_sequence} { // ----- func.func @no_single_packing_op(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>) { - %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32> - %1 = tensor.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32> - %2 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32> + %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32> + %1 = linalg.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32> + %2 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32> return } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{requires target to map to exactly 1 packing op and 1 packed op (got 2 and 1)}} transform.structured.pack_transpose %0 with_compute_op(%1) inner_perm = [0] @@ -476,7 +476,7 @@ module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1 = transform.structured.match ops{["tensor.empty"]} in %arg1 : (!transform.any_op) -> !transform.any_op - // expected-error @below {{requires target to map to a tensor.pack or tensor.unpack}} + // expected-error @below {{requires target to map to a linalg.pack or linalg.unpack}} transform.structured.pack_transpose %0 with_compute_op(%1) inner_perm = [0] : (!transform.any_op, !transform.any_op) @@ -488,14 +488,14 @@ module attributes {transform.with_named_sequence} { // ----- func.func @no_linalg_target(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>) { - %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32> + %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32> %1 = arith.constant 0 : index return } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1 = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{requires a LinalgOp target}} transform.structured.pack_transpose %0 with_compute_op(%1) @@ -509,7 +509,7 @@ module attributes {transform.with_named_sequence} { // ----- func.func @no_single_use_by_linalg(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>) { - %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32> + %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32> %f0 = arith.constant 0.0 : f32 %1 = tensor.empty() : tensor %2 = linalg.fill ins(%f0: f32) outs(%1 : tensor) -> tensor @@ -518,7 +518,7 @@ func.func @no_single_use_by_linalg(%source: tensor<128x256xf32>, %dest: tensor<4 module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{not a single use by the LinalgOp target}} transform.structured.pack_transpose %0 with_compute_op(%1) @@ -532,8 +532,8 @@ module attributes {transform.with_named_sequence} { // ----- func.func @not_produced_by_linalg(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>) { - %a = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32> - %b = tensor.unpack %a inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32> + %a = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32> + %b = linalg.unpack %a inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32> %f0 = arith.constant 0.0 : f32 %1 = tensor.empty() : tensor %2 = linalg.fill ins(%f0: f32) outs(%1 : tensor) -> tensor @@ -542,7 +542,7 @@ func.func @not_produced_by_linalg(%source: tensor<128x256xf32>, %dest: tensor<4x module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{not produced by the LinalgOp target}} transform.structured.pack_transpose %0 with_compute_op(%1) @@ -559,13 +559,13 @@ func.func @no_matching_pack(%source: tensor<16xf32>) { %f0 = arith.constant 0.0 : f32 %1 = tensor.empty() : tensor<4x4xf32> %2 = linalg.fill ins(%f0: f32) outs(%1 : tensor<4x4xf32>) -> tensor<4x4xf32> - %b = tensor.unpack %2 inner_dims_pos = [0] inner_tiles = [4] into %source : tensor<4x4xf32> -> tensor<16xf32> + %b = linalg.unpack %2 inner_dims_pos = [0] inner_tiles = [4] into %source : tensor<4x4xf32> -> tensor<16xf32> return } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{could not find matching pack op}} transform.structured.pack_transpose %0 with_compute_op(%1) @@ -593,13 +593,13 @@ module attributes {transform.with_named_sequence} { : (!transform.any_op) -> (!transform.op<"linalg.generic">) %unpack = transform.get_consumers_of_result %1[0] - : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.unpack">) + : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.unpack">) %2, %pack_2, %unpack_2 = // expected-error @below {{invalid outer_perm}} transform.structured.pack_transpose %unpack with_compute_op(%1) outer_perm = [1] - : (!transform.op<"tensor.unpack">, !transform.op<"linalg.generic">) - -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.op<"tensor.unpack">) + : (!transform.op<"linalg.unpack">, !transform.op<"linalg.generic">) + -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.op<"linalg.unpack">) transform.yield } } @@ -621,13 +621,13 @@ module attributes {transform.with_named_sequence} { : (!transform.any_op) -> (!transform.op<"linalg.generic">) %unpack = transform.get_consumers_of_result %1[0] - : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.unpack">) + : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.unpack">) %2, %pack_2, %unpack_2 = // expected-error @below {{invalid inner_perm}} transform.structured.pack_transpose %unpack with_compute_op(%1) inner_perm = [1] - : (!transform.op<"tensor.unpack">, !transform.op<"linalg.generic">) - -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.op<"tensor.unpack">) + : (!transform.op<"linalg.unpack">, !transform.op<"linalg.generic">) + -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.op<"linalg.unpack">) transform.yield } } @@ -643,12 +643,12 @@ func.func @no_padding_on_packs(%A: tensor<32x32xf32>, %B: tensor<32x32xf32>, %C: } // CHECK-LABEL: no_padding_on_packs -// CHECK: tensor.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [4, 8] +// CHECK: linalg.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [4, 8] // CHECK-SAME: into %{{.+}} : tensor<32x32xf32> -> tensor<8x4x4x8xf32> -// CHECK: tensor.pack %{{.+}} outer_dims_perm = [1, 0] +// CHECK: linalg.pack %{{.+}} outer_dims_perm = [1, 0] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 8] // CHECK-SAME: into %{{.+}} : tensor<32x32xf32> -> tensor<4x4x8x8xf32> -// CHECK: tensor.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [4, 8] +// CHECK: linalg.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [4, 8] // CHECK-SAME: into %{{.+}} : tensor<32x32xf32> -> tensor<8x4x4x8xf32> module attributes {transform.with_named_sequence} { @@ -657,12 +657,12 @@ module attributes {transform.with_named_sequence} { %1 = transform.structured.pack %0 packed_sizes = [4, 8, 8] : (!transform.any_op) -> (!transform.op<"linalg.generic">) %pack = transform.get_producer_of_operand %1[1] - : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.pack">) + : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.pack">) %2, %pack_2, %empty_unpack_2 = transform.structured.pack_transpose %pack with_compute_op(%1) outer_perm = [1, 0] inner_perm = [1, 0] - : (!transform.op<"tensor.pack">, !transform.op<"linalg.generic">) - -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.any_op) + : (!transform.op<"linalg.pack">, !transform.op<"linalg.generic">) + -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.any_op) transform.yield } } diff --git a/mlir/test/Dialect/Linalg/transform-op-tile-pack-unpack.mlir b/mlir/test/Dialect/Linalg/transform-op-tile-pack-unpack.mlir new file mode 100644 index 0000000000000..456a5ea453963 --- /dev/null +++ b/mlir/test/Dialect/Linalg/transform-op-tile-pack-unpack.mlir @@ -0,0 +1,491 @@ +// RUN: mlir-opt %s -transform-interpreter -canonicalize -cse -split-input-file | FileCheck %s + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)> +// CHECK: func.func @NC_to_NCnc +// CHECK-SAME: %[[IN:.*]]: tensor<128x256xf32>, +// CHECK-SAME: %[[OUT:.*]]: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> { +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK: %[[RES0:.*]] = scf.for %[[N:.*]] = %[[C0]] to %[[C4]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<4x8x32x32xf32>) { +// CHECK: %[[RES1:.+]] = scf.for %[[C:.*]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<4x8x32x32xf32>) { +// CHECK-DAG: %[[IN_N:.+]] = affine.apply #[[MAP0]](%[[N]]) +// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]]) +// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_N]], %[[IN_C]]] [64, 128] [1, 1] : tensor<128x256xf32> to tensor<64x128xf32> +// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[N]], %[[C]], 0, 0] [2, 4, 32, 32] [1, 1, 1, 1] : tensor<4x8x32x32xf32> to tensor<2x4x32x32xf32> +// CHECK: %[[SUB_RES:.*]] = linalg.pack +// CHECK-SAME: %[[SUB_IN]] inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %[[SUB_OUT]] +// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]] +// CHECK: scf.yield %[[INSERT]] : tensor<4x8x32x32xf32> +// CHECK: } +// CHECK: scf.yield %[[RES1:.*]] : tensor<4x8x32x32xf32> +// CHECK: } +// CHECK: return %[[RES0:.*]] : tensor<4x8x32x32xf32> +// CHECK: } +func.func @NC_to_NCnc(%arg0: tensor<128x256xf32>, %arg1: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> { + %0 = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %arg1 : tensor<128x256xf32> -> tensor<4x8x32x32xf32> + return %0 : tensor<4x8x32x32xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +// CHECK: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 8)> +// CHECK: func.func @KC_to_CKkc +// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: +// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index +// CHECK-DAG: %[[C32:.+]] = arith.constant 32 : index +// CHECK: scf.for %[[C:.+]] = %[[C0]] to %[[C32]] step %[[C2]] +// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]]) +// CHECK: %[[INPUT_SLICE:.+]] = tensor.extract_slice %[[IN]] +// CHECK-SAME: [0, %[[IN_C]]] [128, 16] +// CHECK: %[[OUTPUT_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[C]], 0, 0, 0] [2, 4, 32, 8] +// CHECK: linalg.pack +// CHECK-SAME: %[[INPUT_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] +// CHECK-SAME: into %[[OUTPUT_SLICE]] +func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) -> tensor<32x4x32x8xf32> { + %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32> + return %0 : tensor<32x4x32x8xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * -2 + 15, 8)> +// CHECK: func.func @pad_and_pack_static( +// CHECK-SAME: %[[IN:.*]]: tensor<13x15xf32>, +// CHECK-SAME: %[[OUT:.*]]: tensor<2x8x8x2xf32>, +// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor<2x8x8x2xf32> { +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index +// CHECK-DAG: %[[RES0:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[OUT]]) -> (tensor<2x8x8x2xf32>) { +// CHECK-DAG: %[[IN_J:.*]] = affine.apply #[[MAP0]](%[[J]]) +// CHECK-DAG: %[[IN_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]]) +// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][0, %[[IN_J]]] [13, %[[IN_J_SZ]]] [1, 1] +// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][0, %[[J]], 0, 0] [2, 4, 8, 2] [1, 1, 1, 1] +// CHECK: %[[SUB_RES:.*]] = linalg.pack +// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] +// CHECK-SAME: into %[[SUB_OUT]] +// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]] +// CHECK: scf.yield %[[INSERT]] : tensor<2x8x8x2xf32> +// CHECK: } +// CHECK: return %[[RES0:.*]] : tensor<2x8x8x2xf32> +// CHECK: } +func.func @pad_and_pack_static(%input: tensor<13x15xf32>, %output: tensor<2x8x8x2xf32>, %pad: f32) -> tensor<2x8x8x2xf32> { + %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<13x15xf32> -> tensor<2x8x8x2xf32> + return %0 : tensor<2x8x8x2xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> (d0 * 8)> +// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -8 + s0, d0 * 8)> +// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 * 2)> +// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -2 + s0, d0 * 2)> +// CHECK: func.func @pad_and_pack_partially_dynamic( +// CHECK-SAME: %[[IN:.*]]: tensor, +// CHECK-SAME: %[[OUT:.*]]: tensor, +// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor { +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor +// CHECK-DAG: %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor +// CHECK: %[[RES0:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor) { +// CHECK: %[[RES1:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor) { +// CHECK-DAG: %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]] +// CHECK-DAG: %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]] +// CHECK-DAG: %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]]) +// CHECK-DAG: %[[IN_I_SZ:.*]] = affine.min #[[MAP3]] +// CHECK-DAG: %[[IN_J:.*]] = affine.apply #[[MAP4]](%[[J]]) +// CHECK-DAG: %[[IN_J_SZ:.*]] = affine.min #[[MAP5]] +// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor to tensor +// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], 8, 2] [1, 1, 1, 1] : tensor to tensor +// CHECK: %[[SUB_RES:.*]] = linalg.pack +// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] +// CHECK-SAME: into %[[SUB_OUT]] +// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]] +// CHECK: scf.yield %[[INSERT]] : tensor +// CHECK: } +// CHECK: scf.yield %[[RES1:.*]] : tensor +// CHECK: } +// CHECK: return %[[VAL_34:.*]] : tensor +// CHECK: } +func.func @pad_and_pack_partially_dynamic(%input: tensor, %output: tensor, %pad: f32) -> tensor { + %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor -> tensor + return %0 : tensor +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (d0 * s0)> +// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s0, -(d1 * s0) + s1)> +// CHECK: func.func @pad_and_pack_fully_dynamic( +// CHECK-SAME: %[[IN:.*]]: tensor, +// CHECK-SAME: %[[OUT:.*]]: tensor, +// CHECK-SAME: %[[PAD:.*]]: f32, +// CHECK-SAME: %[[TILE_0:.*]]: index, +// CHECK-SAME: %[[TILE_1:.*]]: index) -> tensor { +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor +// CHECK-DAG: %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor +// CHECK: %[[RES0:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor) { +// CHECK: %[[RES1:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor) { +// CHECK-DAG: %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]] +// CHECK-DAG: %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]] +// CHECK-DAG: %[[IN_D0:.*]] = tensor.dim %[[IN]], %[[C0]] +// CHECK-DAG: %[[IN_D1:.*]] = tensor.dim %[[IN]], %[[C1]] +// CHECK: %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]])[%[[TILE_0]]] +// CHECK: %[[IN_I_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_I_SZ]], %[[I]])[%[[TILE_0]], %[[IN_D0]]] +// CHECK: %[[IN_J:.*]] = affine.apply #[[MAP2]](%[[J]])[%[[TILE_1]]] +// CHECK: %[[IN_J_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_J_SZ]], %[[J]])[%[[TILE_1]], %[[IN_D1]]] +// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor to tensor +// CHECK: %[[OUT_D2:.+]] = tensor.dim %[[ITER1]], %[[C2]] +// CHECK: %[[OUT_D3:.+]] = tensor.dim %[[ITER1]], %[[C3]] +// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], %[[OUT_D2]], %[[OUT_D3]]] [1, 1, 1, 1] : tensor to tensor +// CHECK: %[[PACK:.*]] = linalg.pack +// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [%[[TILE_0]], %[[TILE_1]]] +// CHECK-SAME: into %[[SUB_OUT]] +// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[PACK]] into %[[ITER1]] +// CHECK: scf.yield %[[INSERT]] : tensor +// CHECK: } +// CHECK: scf.yield %[[RES1:.*]] : tensor +// CHECK: } +// CHECK: return %[[RES0:.*]] : tensor +// CHECK: } +func.func @pad_and_pack_fully_dynamic(%source: tensor, %dest: tensor, %pad: f32, %tile_n : index, %tile_m : index) -> tensor { + %0 = linalg.pack %source padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor -> tensor + return %0 : tensor +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)> +// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 16)> +// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 16)> +// CHECK-DAG: #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 16 - d0 floordiv 16 + 1)> +// CHECK: func.func @NCnc_to_NC +// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: +// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index +// CHECK-DAG: %[[C256:.*]] = arith.constant 256 : index +// CHECK: %{{.+}} = scf.for %[[I:.+]] = %[[C0]] to %[[C256]] step %[[C2]] +// CHECK: %{{.+}} = scf.for %[[J:.+]] = %[[C0]] to %[[C128]] step %[[C4]] +// CHECK-DAG: %[[IN_I:.+]] = affine.apply #[[MAP0]](%[[I]]) +// CHECK-DAG: %[[OFFSET_I:.+]] = affine.apply #[[MAP1]](%[[I]]) +// CHECK-DAG: %[[IN_I_SZ:.+]] = affine.apply #[[MAP2]](%[[I]]) +// CHECK-DAG: %[[IN_J:.+]] = affine.apply #[[MAP4]](%[[J]]) +// CHECK-DAG: %[[OFFSET_J:.+]] = affine.apply #[[MAP5]](%[[J]]) +// CHECK-DAG: %[[IN_J_SZ:.+]] = affine.apply #[[MAP6]](%[[J]]) +// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[IN]] +// CHECK-SAME: [%[[IN_I]], %[[IN_J]], 0, 0] [%[[IN_I_SZ]], %[[IN_J_SZ]], 32, 16] +// CHECK-SAME: : tensor<8x8x32x16xf32> to tensor +// CHECK: %[[EMPTY:.+]] = tensor.empty +// CHECK: %[[UNPACK:.+]] = linalg.unpack +// CHECK-SAME: %[[SLICE]] inner_dims_pos = [0, 1] inner_tiles = [32, 16] +// CHECK-SAME: into %[[EMPTY]] +// CHECK: %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]] +// CHECK-SAME: [%[[OFFSET_I]], %[[OFFSET_J]]] [2, 4] +// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]] +// CHECK-SAME: into %{{.+}}[%[[I]], %[[J]]] [2, 4] +// CHECK: scf.yield %[[RES]] +func.func @NCnc_to_NC(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { + %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> + return %0 : tensor<256x128xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)> +// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 8)> +// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 8)> +// CHECK-DAG: #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 8 - d0 floordiv 8 + 1)> +// CHECK: func.func @CKkc_to_KC +// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: +// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index +// CHECK-DAG: %[[C256:.*]] = arith.constant 256 : index +// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C128]] step %[[C2]] +// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[C256]] step %[[C4]] +// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]]) +// CHECK-DAG: %[[OFFSET_K:.+]] = affine.apply #[[MAP1]](%[[K]]) +// CHECK-DAG: %[[IN_K_SZ:.+]] = affine.apply #[[MAP2]](%[[K]]) +// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP4]](%[[C]]) +// CHECK-DAG: %[[OFFSET_C:.+]] = affine.apply #[[MAP5]](%[[C]]) +// CHECK-DAG: %[[IN_C_SZ:.+]] = affine.apply #[[MAP6]](%[[C]]) +// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]] +// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [%[[IN_C_SZ]], %[[IN_K_SZ]], 32, 8] +// CHECK: %[[EMPTY:.+]] = tensor.empty +// CHECK: %[[UNPACK:.+]] = linalg.unpack +// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] +// CHECK-SAME: into %[[EMPTY]] +// CHECK: %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]] +// CHECK-SAME: [%[[OFFSET_K]], %[[OFFSET_C]]] [2, 4] +// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]] +// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [2, 4] +// CHECK: scf.yield %[[RES]] +func.func @CKkc_to_KC(%source: tensor<32x4x32x8xf32>, %dest: tensor<128x256xf32>) -> tensor<128x256xf32> { + %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %dest : tensor<32x4x32x8xf32> -> tensor<128x256xf32> + return %0 : tensor<128x256xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 floordiv 4)> +// CHECK: func.func @perfect_CKkc_to_KC +// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: +// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index +// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index +// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C8]] step %[[C2]] +// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[C128]] step %[[C4]] +// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]]) +// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP1]](%[[C]]) +// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]] +// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [1, 1, 2, 4] +// CHECK: %[[ITER_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[K]], %[[C]]] [2, 4] +// CHECK: %[[UNPACK:.+]] = linalg.unpack +// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 4] +// CHECK-SAME: into %[[ITER_SLICE]] +// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]] +// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [2, 4] +// CHECK: scf.yield %[[RES]] +func.func @perfect_CKkc_to_KC(%source: tensor<32x4x2x4xf32>, %dest: tensor<8x128xf32>) -> tensor<8x128xf32> { + %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %dest : tensor<32x4x2x4xf32> -> tensor<8x128xf32> + return %0 : tensor<8x128xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> (d0 floordiv 2)> +// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0) -> (d0 ceildiv 2)> +// CHECK: func.func @dynamic_perfect_CKkc_to_KC +// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: +// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[DIM_0:.+]] = tensor.dim %[[OUT]], %[[C0]] +// CHECK-DAG: %[[DIM_1:.+]] = tensor.dim %[[OUT]], %[[C1]] +// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[DIM_0]] step %[[C2]] +// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[DIM_1]] step %[[C4]] +// CHECK-DAG: %[[OUT_K_SZ:.+]] = affine.min #[[MAP0]](%[[K]])[%[[DIM_0]]] +// CHECK-DAG: %[[OUT_C_SZ:.+]] = affine.min #[[MAP1]](%[[C]])[%[[DIM_1]]] +// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP2]](%[[K]]) +// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP2]](%[[C]]) +// CHECK-DAG: %[[IN_C_SZ:.+]] = affine.apply #[[MAP3]](%[[OUT_C_SZ]]) +// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]] +// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [%[[IN_C_SZ]], 1, 2, 2] +// CHECK: %[[ITER_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[K]], %[[C]]] [%[[OUT_K_SZ]], %[[OUT_C_SZ]]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack +// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 2] +// CHECK-SAME: into %[[ITER_SLICE]] +// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]] +// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [%[[OUT_K_SZ]], %[[OUT_C_SZ]]] +// CHECK: scf.yield %[[RES]] + +func.func @dynamic_perfect_CKkc_to_KC(%source: tensor, %dest: tensor) -> tensor { + %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %dest : tensor -> tensor + return %0 : tensor +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +// CHECK: #[[MAP:.+]] = affine_map<(d0) -> (d0 floordiv 2)> +// CHECK: func.func @perfect_NKPQk_to_NPQK( +// CHECK-SAME: %[[SOURCE:.+]]: tensor<1x4x6x6x2xf32>, +// CHECK-SAME: %{{.+}}: tensor<1x6x6x8xf32>) +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index +// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK: %{{.+}} = scf.for %[[P:.+]] = %[[C0]] to %[[C6]] step %[[C1]] +// CHECK: %{{.+}} = scf.for %[[Q:.+]] = %[[C0]] to %[[C6]] step %[[C1]] +// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C8]] step %[[C4]] +// CHECK: %[[K_SZ:.+]] = affine.apply #[[MAP]](%[[K]]) +// CHECK: %[[SLICE_SOURCE:.+]] = tensor.extract_slice %[[SOURCE]][0, %[[K_SZ]], %[[P]], %[[Q]], 0] +// CHECK: %[[SLICE_DEST:.+]] = tensor.extract_slice %{{.+}}[0, %[[P]], %[[Q]], %[[K]]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack +// CHECK-SAME: %[[SLICE_SOURCE]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] +// CHECK-SAME: into %[[SLICE_DEST]] +// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]] +// CHECK-SAME: into %{{.+}}[0, %[[P]], %[[Q]], %[[K]]] +// CHECK: scf.yield %[[RES]] + +func.func @perfect_NKPQk_to_NPQK(%source: tensor<1x4x6x6x2xf32>, %dest: tensor<1x6x6x8xf32>) -> tensor<1x6x6x8xf32> { + %0 = linalg.unpack %source outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %dest : tensor<1x4x6x6x2xf32> -> tensor<1x6x6x8xf32> + return %0 : tensor<1x6x6x8xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +func.func private @get_dynamic_tile_size() -> index + +// CHECK-LABEL: func.func @fully_dynamic_unpack +// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] +// CHECK-SAME: %[[DST:[0-9a-zA-Z]+]] +// CHECK: %[[INNER_TS:.+]] = call @get_dynamic_tile_size() : () -> index +// CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[DST]]) +// CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) +// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[SRC]] +// CHECK: %[[EMPTY:.+]] = tensor.empty +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[SLICE]] +// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [%[[INNER_TS]], %[[INNER_TS]]] into %[[EMPTY]] +func.func @fully_dynamic_unpack(%source: tensor, %dest: tensor) -> tensor { + %0 = func.call @get_dynamic_tile_size() : () -> index + %1 = linalg.unpack %source inner_dims_pos = [1, 0] inner_tiles = [%0, %0] into %dest : tensor -> tensor + return %1 : tensor +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [4, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + transform.yield + } +} + +// ----- + +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * 2)> +// CHECK: func.func @perfect_NPQK_to_NKPQk +// CHECK-SAME: %[[SOURCE:.+]]: tensor<1x6x6x8xf32>, +// CHECK-SAME: %{{.+}}: tensor<1x4x6x6x2xf32>) +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index +// CHECK: %{{.+}} = scf.for %[[ARG2:.+]] = %[[C0]] to %[[C4]] step %[[C1]] +// CHECK: %{{.+}} = scf.for %[[ARG4:.+]] = %[[C0]] to %[[C6]] step %[[C1]] +// CHECK: %{{.+}} = scf.for %[[ARG6:.+]] = %[[C0]] to %[[C6]] step %[[C1]] +// CHECK: %[[APPLY:.+]] = affine.apply #[[MAP1]](%[[ARG2]]) +// CHECK: %[[SLICE_SOURCE:.+]] = tensor.extract_slice %[[SOURCE]][0, %[[ARG4]], %[[ARG6]], %[[APPLY]]] +// CHECK: %[[SLICE_DEST:.+]] = tensor.extract_slice %{{.+}}[0, %[[ARG2]], %[[ARG4]], %[[ARG6]], 0] +// CHECK: %[[PACK:.+]] = linalg.pack +// CHECK-SAME: %[[SLICE_SOURCE]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] +// CHECK-SAME: into %[[SLICE_DEST]] +// CHECK: %[[RES:.+]] = tensor.insert_slice %[[PACK]] +// CHECK-SAME: into %{{.+}}[0, %[[ARG2]], %[[ARG4]], %[[ARG6]], 0] +// CHECK: scf.yield %[[RES]] + +func.func @perfect_NPQK_to_NKPQk(%source: tensor<1x6x6x8xf32>, %dest: tensor<1x4x6x6x2xf32>) -> tensor<1x4x6x6x2xf32> { + %0 = linalg.pack %source outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %dest : tensor<1x6x6x8xf32> -> tensor<1x4x6x6x2xf32> + return %0 : tensor<1x4x6x6x2xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) + transform.yield + } +} diff --git a/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir b/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir index 100692426ef44..5812c4db88247 100644 --- a/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir +++ b/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir @@ -378,11 +378,11 @@ func.func @no_padding_on_packs(%A: tensor<32x32xf32>, %B: tensor<32x32xf32>, %C: } // CHECK-LABEL: no_padding_on_packs -// CHECK: tensor.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [8, 4] +// CHECK: linalg.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [8, 4] // CHECK-SAME: into %{{.+}} : tensor<32x32xf32> -> tensor<4x8x8x4xf32> -// CHECK: tensor.pack %{{.+}} outer_dims_perm = [1, 0] +// CHECK: linalg.pack %{{.+}} outer_dims_perm = [1, 0] // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [4, 16] into %{{.+}} : tensor<32x32xf32> -> tensor<2x8x4x16xf32> -// CHECK: tensor.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [8, 16] +// CHECK: linalg.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [8, 16] // CHECK-SAME: into %{{.+}} : tensor<32x32xf32> -> tensor<4x2x8x16xf32> module attributes {transform.with_named_sequence} { @@ -393,12 +393,12 @@ module attributes {transform.with_named_sequence} { matmul_packed_sizes = [8, 16, 4] matmul_inner_dims_order = [0, 1, 2] : (!transform.op<"linalg.matmul">) -> !transform.op<"linalg.generic"> %pack = transform.get_producer_of_operand %1[1] - : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.pack">) + : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.pack">) %2, %pack_2, %empty_unpack_2 = transform.structured.pack_transpose %pack with_compute_op(%1) outer_perm = [1, 0] inner_perm = [1, 0] - : (!transform.op<"tensor.pack">, !transform.op<"linalg.generic">) - -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.any_op) + : (!transform.op<"linalg.pack">, !transform.op<"linalg.generic">) + -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.any_op) transform.yield } } diff --git a/mlir/test/Dialect/Linalg/transform-tile-and-fuse-pack-unpack.mlir b/mlir/test/Dialect/Linalg/transform-tile-and-fuse-pack-unpack.mlir index faf7ff9ad7ed0..5d4ae4f15d3fd 100644 --- a/mlir/test/Dialect/Linalg/transform-tile-and-fuse-pack-unpack.mlir +++ b/mlir/test/Dialect/Linalg/transform-tile-and-fuse-pack-unpack.mlir @@ -14,7 +14,7 @@ module { func.func @fuse_pack_as_producer(%src: tensor<128x256xf32>, %other: tensor<4x4x128x256xf32>) -> tensor<4x4x128x256xf32> { %dest = tensor.empty() : tensor<1x1x128x256xf32> - %pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [128, 256] + %pack = linalg.pack %src inner_dims_pos = [0, 1] inner_tiles = [128, 256] into %dest : tensor<128x256xf32> -> tensor<1x1x128x256xf32> %out = tensor.empty() : tensor<4x4x128x256xf32> @@ -36,10 +36,10 @@ module { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { // Find and lower pack operation. - %pack = transform.structured.match ops{["tensor.pack"]} in %arg1 - : (!transform.any_op) -> !transform.op<"tensor.pack"> + %pack = transform.structured.match ops{["linalg.pack"]} in %arg1 + : (!transform.any_op) -> !transform.op<"linalg.pack"> %paded, %expanded, %transpose = transform.structured.lower_pack %pack {lowerPadLikeWithInsertSlice = false} - : (!transform.op<"tensor.pack">) + : (!transform.op<"linalg.pack">) -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">) @@ -72,7 +72,7 @@ module { func.func @fuse_pack_as_producer_blocked_by_insert_slice(%src: tensor<128x256xf32>, %other: tensor<4x4x128x256xf32>) -> tensor<4x4x128x256xf32> { %dest = tensor.empty() : tensor<1x1x128x256xf32> - %pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [128, 256] + %pack = linalg.pack %src inner_dims_pos = [0, 1] inner_tiles = [128, 256] into %dest : tensor<128x256xf32> -> tensor<1x1x128x256xf32> %out = tensor.empty() : tensor<4x4x128x256xf32> @@ -94,10 +94,10 @@ module { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { // Find and lower pack operation. - %pack = transform.structured.match ops{["tensor.pack"]} in %arg1 - : (!transform.any_op) -> !transform.op<"tensor.pack"> + %pack = transform.structured.match ops{["linalg.pack"]} in %arg1 + : (!transform.any_op) -> !transform.op<"linalg.pack"> %paded, %expanded, %transpose = transform.structured.lower_pack %pack - : (!transform.op<"tensor.pack">) + : (!transform.op<"linalg.pack">) -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">) @@ -143,7 +143,7 @@ module { } -> tensor<1x1x128x256xf32> %dest = tensor.empty() : tensor<128x256xf32> - %unpack = tensor.unpack %res inner_dims_pos = [0, 1] inner_tiles = [128, 256] + %unpack = linalg.unpack %res inner_dims_pos = [0, 1] inner_tiles = [128, 256] into %dest : tensor<1x1x128x256xf32> -> tensor<128x256xf32> return %unpack : tensor<128x256xf32> @@ -152,10 +152,10 @@ module { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { // Find and lower unpack operation. - %unpack = transform.structured.match ops{["tensor.unpack"]} in %arg1 - : (!transform.any_op) -> !transform.op<"tensor.unpack"> + %unpack = transform.structured.match ops{["linalg.unpack"]} in %arg1 + : (!transform.any_op) -> !transform.op<"linalg.unpack"> transform.structured.lower_unpack %unpack {lowerUnpadLikeWithExtractSlice = false} - : (!transform.op<"tensor.unpack">) + : (!transform.op<"linalg.unpack">) -> (!transform.op<"tensor.empty">, !transform.op<"linalg.transpose">, !transform.op<"tensor.collapse_shape">, @@ -204,7 +204,7 @@ module { } -> tensor<1x1x128x256xf32> %dest = tensor.empty() : tensor<128x256xf32> - %unpack = tensor.unpack %res inner_dims_pos = [0, 1] inner_tiles = [128, 256] + %unpack = linalg.unpack %res inner_dims_pos = [0, 1] inner_tiles = [128, 256] into %dest : tensor<1x1x128x256xf32> -> tensor<128x256xf32> return %unpack : tensor<128x256xf32> @@ -213,10 +213,10 @@ module { module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { // Find and lower unpack operation. - %unpack = transform.structured.match ops{["tensor.unpack"]} in %arg1 - : (!transform.any_op) -> !transform.op<"tensor.unpack"> + %unpack = transform.structured.match ops{["linalg.unpack"]} in %arg1 + : (!transform.any_op) -> !transform.op<"linalg.unpack"> transform.structured.lower_unpack %unpack - : (!transform.op<"tensor.unpack">) + : (!transform.op<"linalg.unpack">) -> (!transform.op<"tensor.empty">, !transform.op<"linalg.transpose">, !transform.op<"tensor.collapse_shape">, diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir index 8fbc74ec345c6..8f3b199145ce0 100644 --- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir @@ -115,13 +115,13 @@ module attributes {transform.with_named_sequence} { func.func @test_pack_no_vectorize_dynamic_shape(%arg0: tensor, %arg1: tensor<4x16xf32>) -> tensor<4x16xf32> { %pad = arith.constant 0.000000e+00 : f32 // expected-error @+1 {{Attempted to vectorize, but failed}} - %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [0] inner_tiles = [16] into %arg1 : tensor -> tensor<4x16xf32> + %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [0] inner_tiles = [16] into %arg1 : tensor -> tensor<4x16xf32> return %pack : tensor<4x16xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op transform.structured.vectorize %0 : !transform.any_op transform.yield } diff --git a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir index 5ae3f893c2e73..9f2ee47b45b3e 100644 --- a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir @@ -1944,13 +1944,13 @@ module attributes {transform.with_named_sequence} { // masking was used. func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> { - %pack = tensor.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32> + %pack = linalg.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32> return %pack : tensor<4x1x32x16x2xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op transform.yield @@ -1977,7 +1977,7 @@ module attributes {transform.with_named_sequence} { func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> { %pad = arith.constant 0.000000e+00 : f32 - %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32> + %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32> return %pack : tensor<32x4x1x16x2xf32> } @@ -1995,7 +1995,7 @@ func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor< module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op transform.yield diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index 6d39262945de5..c6d9ec6215715 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -671,7 +671,7 @@ module attributes {transform.with_named_sequence} { // masking was used. func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> { - %pack = tensor.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32> + %pack = linalg.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32> return %pack : tensor<4x1x32x16x2xf32> } // CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 @@ -688,7 +688,7 @@ func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op transform.structured.vectorize %0 vector_sizes [4, 1, 32] : !transform.any_op transform.yield } @@ -702,7 +702,7 @@ module attributes {transform.with_named_sequence} { func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> { %pad = arith.constant 0.000000e+00 : f32 - %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32> + %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32> return %pack : tensor<32x4x1x16x2xf32> } // CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 @@ -725,7 +725,7 @@ func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor< module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op transform.structured.vectorize %0 vector_sizes [32, 4, 1] : !transform.any_op transform.yield } @@ -734,7 +734,7 @@ module attributes {transform.with_named_sequence} { // ----- func.func @test_vectorize_dynamic_pack(%arg0: tensor, %arg1: tensor) -> tensor { - %pack = tensor.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg1 : tensor -> tensor + %pack = linalg.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg1 : tensor -> tensor return %pack : tensor } // CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 @@ -766,7 +766,7 @@ func.func @test_vectorize_dynamic_pack(%arg0: tensor, %arg1: tensor !transform.any_op + %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op transform.structured.vectorize %0 vector_sizes [4, 1] : !transform.any_op transform.yield } @@ -893,12 +893,12 @@ func.func @test_vectorize_dynamic_shapes_unpack(%arg0: tensor, %arg1: t // CHECK: %[[writeMsk0:.*]] = vector.create_mask {{.*}} : vector<4x16xi1> // CHECK: %[[write0:.*]] = vector.mask %[[writeMsk0:.*]] {{.*}} vector.transfer_write %[[sc0]], %[[empt0]] // CHECK: return %[[write0]] - %ret = tensor.unpack %arg1 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg0 : tensor -> tensor + %ret = linalg.unpack %arg1 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg0 : tensor -> tensor return %ret : tensor } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op transform.structured.vectorize %0 vector_sizes [4, 16] : !transform.any_op transform.yield } @@ -925,12 +925,12 @@ func.func @test_vectorize_unpack(%source: tensor<8x8x32x16xf32>, %dest: tensor<2 // CHECK: %[[WRITEMSK:.*]] = vector.create_mask %[[C256]], %[[C128]] : vector<512x128xi1> // CHECK: %[[WRIT:.*]] = vector.mask %[[WRITEMSK]] {{.*}} : vector<512x128xi1> -> tensor<256x128xf32> // CHECK: return %[[WRIT]] : tensor<256x128xf32> - %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> + %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> return %0 : tensor<256x128xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op transform.structured.vectorize %0 vector_sizes [512, 128] : !transform.any_op transform.yield } @@ -949,12 +949,12 @@ func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest: // CHECK: %[[C00:.*]] = arith.constant 0 : index // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32> // CHECK: return %[[WRIT]] : tensor<256x128xf32> - %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> + %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> return %0 : tensor<256x128xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op transform.structured.vectorize %0 vector_sizes [256, 128] : !transform.any_op transform.yield } @@ -973,12 +973,12 @@ func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest: // CHECK: %[[C00:.*]] = arith.constant 0 : index // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32> // CHECK: return %[[WRIT]] : tensor<256x128xf32> - %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> + %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> return %0 : tensor<256x128xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op transform.structured.vectorize %0 vector_sizes [256, 128] : !transform.any_op transform.yield } @@ -988,7 +988,7 @@ func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest: // CHECK-LABEL: test_vectorize_pack_no_vector_sizes func.func @test_vectorize_pack_no_vector_sizes(%arg0: tensor<64x4xf32>, %arg1: tensor<2x4x16x2xf32>) -> tensor<2x4x16x2xf32> { - %pack = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<64x4xf32> -> tensor<2x4x16x2xf32> + %pack = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<64x4xf32> -> tensor<2x4x16x2xf32> return %pack : tensor<2x4x16x2xf32> } // CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 @@ -1005,7 +1005,7 @@ func.func @test_vectorize_pack_no_vector_sizes(%arg0: tensor<64x4xf32>, %arg1: t module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op transform.structured.vectorize %0 : !transform.any_op transform.yield } @@ -1016,7 +1016,7 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: test_vectorize_padded_pack_no_vector_sizes func.func @test_vectorize_padded_pack_no_vector_sizes(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> { %pad = arith.constant 0.000000e+00 : f32 - %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32> + %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32> return %pack : tensor<32x4x1x16x2xf32> } // CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 @@ -1033,7 +1033,7 @@ func.func @test_vectorize_padded_pack_no_vector_sizes(%arg0: tensor<32x7x15xf32> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op transform.structured.vectorize %0 : !transform.any_op transform.yield } @@ -1051,12 +1051,12 @@ func.func @test_vectorize_unpack_no_vector_sizes(%source: tensor<8x8x32x16xf32>, // CHECK: %[[C00:.*]] = arith.constant 0 : index // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32> // CHECK: return %[[WRIT]] : tensor<256x128xf32> - %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> + %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> return %0 : tensor<256x128xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op transform.structured.vectorize %0 : !transform.any_op transform.yield } @@ -1075,12 +1075,12 @@ func.func @test_vectorize_unpack_no_vector_sizes_slice_output(%source: tensor<8x // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[EMPT]]{{\[}}%[[C00]], %[[C00]]] // CHECK-SAME: {in_bounds = [true, false]} : vector<64x128xf32>, tensor<64x127xf32> // CHECK: return %[[WRIT]] : tensor<64x127xf32> - %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %dest : tensor<8x4x16x16xf32> -> tensor<64x127xf32> + %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %dest : tensor<8x4x16x16xf32> -> tensor<64x127xf32> return %0 : tensor<64x127xf32> } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op transform.structured.vectorize %0 : !transform.any_op transform.yield } @@ -1089,7 +1089,7 @@ func.func @test_vectorize_unpack_no_vector_sizes_slice_output(%source: tensor<8x // ----- func.func @test_vectorize_unpack_no_vector_sizes_permute(%source: tensor<4x7x4xf32>, %dest: tensor<7x16xf32>) -> tensor<7x16xf32> { - %0 = tensor.unpack %source outer_dims_perm=[1, 0] inner_dims_pos = [1] inner_tiles = [4] into %dest : tensor<4x7x4xf32> -> tensor<7x16xf32> + %0 = linalg.unpack %source outer_dims_perm=[1, 0] inner_dims_pos = [1] inner_tiles = [4] into %dest : tensor<4x7x4xf32> -> tensor<7x16xf32> return %0 : tensor<7x16xf32> } // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 @@ -1103,7 +1103,7 @@ func.func @test_vectorize_unpack_no_vector_sizes_permute(%source: tensor<4x7x4xf // CHECK: return %[[WRIT]] : tensor<7x16xf32> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op transform.structured.vectorize %0 : !transform.any_op transform.yield } diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir index 01d14871072cd..90cc0ca658ffb 100644 --- a/mlir/test/Dialect/Tensor/canonicalize.mlir +++ b/mlir/test/Dialect/Tensor/canonicalize.mlir @@ -899,225 +899,6 @@ func.func @fold_extract_constant_splat() -> (tensor<4x4xi32>) { // ----- -// CHECK-LABEL: func @fold_pack_constant_splat -// CHECK-NOT: tensor.pack -// CHECK: arith.constant dense<1.000000e-01> : tensor<8x16x8x32xf32> -func.func @fold_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> { - %cst = arith.constant dense<1.000000e-01> : tensor<64x128xf32> - %0 = tensor.pack %cst outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] - inner_tiles = [8, 32] into %dest : tensor<64x128xf32> -> tensor<8x16x8x32xf32> - return %0 : tensor<8x16x8x32xf32> -} - -// ----- - -// CHECK-LABEL: func @fold_padding_value_pack_constant_splat -// CHECK-NOT: tensor.pack -// CHECK: arith.constant dense<1.000000e-01> : tensor<8x16x8x32xf32> -func.func @fold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> { - %pad = arith.constant 1.000000e-01 : f32 - %cst = arith.constant dense<1.000000e-01> : tensor<63x127xf32> - %0 = tensor.pack %cst - padding_value(%pad : f32) - outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] - inner_tiles = [8, 32] into %dest : tensor<63x127xf32> -> tensor<8x16x8x32xf32> - return %0 : tensor<8x16x8x32xf32> -} - - -// ----- - -// CHECK-LABEL: func @nofold_padding_value_pack_constant_splat -// CHECK: arith.constant dense<1.000000e-01> : tensor<63x127xf32> -// CHECK: tensor.pack -func.func @nofold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> { - %pad = arith.constant 0.0 : f32 - %cst = arith.constant dense<1.000000e-01> : tensor<63x127xf32> - %0 = tensor.pack %cst - padding_value(%pad : f32) - outer_dims_perm = [1, 0] - inner_dims_pos = [0, 1] - inner_tiles = [8, 32] - into %dest : tensor<63x127xf32> -> tensor<8x16x8x32xf32> - return %0 : tensor<8x16x8x32xf32> -} - -// ----- - -func.func @fold_padding_value_pack(%arg0: tensor<1200x500000xf32>) -> tensor<31250x1200x16x1xf32> { - %cst = arith.constant 0.000000e+00 : f32 - %0 = tensor.empty() : tensor<31250x1200x16x1xf32> - %pack = tensor.pack %arg0 - padding_value(%cst : f32) - outer_dims_perm = [1, 0] - inner_dims_pos = [1, 0] - inner_tiles = [16, 1] - into %0 : tensor<1200x500000xf32> -> tensor<31250x1200x16x1xf32> - return %pack : tensor<31250x1200x16x1xf32> -} -// CHECK-LABEL: func @fold_padding_value_pack -// CHECK-NOT: padding_value - -// ----- - -func.func @infer_src_shape_pack(%src: tensor, %dest: tensor<10x20x30x40x16xf32>) -> tensor<10x20x30x40x16xf32> { - %cst = arith.constant 0.000000e+00 : f32 - %pack = tensor.pack %src - padding_value(%cst : f32) - outer_dims_perm = [2, 1, 3, 0] - inner_dims_pos = [2] - inner_tiles = [16] - into %dest : tensor -> tensor<10x20x30x40x16xf32> - return %pack : tensor<10x20x30x40x16xf32> -} -// CHECK-LABEL: func.func @infer_src_shape_pack -// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] -// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]] -// CHECK: %[[CAST_SRC:.+]] = tensor.cast %[[SRC]] : tensor to tensor<40x20x?x30xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[CAST_SRC]] {{.+}} into %[[DEST]] -// CHECK: return %[[PACK]] - -// ----- - -func.func @infer_dest_shape_pack(%src: tensor<30x20x?x10xf32>, %dest: tensor) -> tensor { - %cst = arith.constant 0.000000e+00 : f32 - %pack = tensor.pack %src - padding_value(%cst : f32) - outer_dims_perm = [2, 1, 3, 0] - inner_dims_pos = [2] - inner_tiles = [16] - into %dest : tensor<30x20x?x10xf32> -> tensor - return %pack : tensor -} -// CHECK-LABEL: func.func @infer_dest_shape_pack -// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] -// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]] -// CHECK: %[[CAST_DEST:.+]] = tensor.cast %[[DEST]] : tensor to tensor -// CHECK: %[[PACK:.+]] = tensor.pack %[[SRC]] {{.+}} into %[[CAST_DEST]] -// CHECK: %[[CAST_PACK:.+]] = tensor.cast %[[PACK]] : tensor to tensor -// CHECK: return %[[CAST_PACK]] - -// ----- - -func.func @no_infer_pack_shape(%arg0: tensor, %arg1: index) -> tensor<32x7x?x16x1xf32> { - %cst = arith.constant 0.000000e+00 : f32 - %0 = tensor.empty(%arg1) : tensor<32x7x?x16x1xf32> - %pack = tensor.pack %arg0 padding_value(%cst : f32) outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 0] inner_tiles = [16, 1] into %0 : tensor -> tensor<32x7x?x16x1xf32> - return %pack : tensor<32x7x?x16x1xf32> -} -// CHECK-LABEL: func.func @no_infer_pack_shape -// CHECK-NOT: tensor.cast - -// ----- - -func.func @fold_padding_value_pack_negative1(%arg0: tensor<1200x499999xf32>) -> tensor<31250x1200x16x1xf32> { - %cst = arith.constant 0.000000e+00 : f32 - %0 = tensor.empty() : tensor<31250x1200x16x1xf32> - %pack = tensor.pack %arg0 - padding_value(%cst : f32) - outer_dims_perm = [1, 0] - inner_dims_pos = [1, 0] - inner_tiles = [16, 1] - into %0 : tensor<1200x499999xf32> -> tensor<31250x1200x16x1xf32> - return %pack : tensor<31250x1200x16x1xf32> -} -// CHECK-LABEL: func @fold_padding_value_pack_negative1 -// CHECK: tensor.pack -// CHECK-SAME: padding_value - -// ----- - -func.func @fold_padding_value_pack_negative2(%arg0: tensor<1200x?xf32>, %arg1: tensor) -> tensor { - %cst = arith.constant 0.000000e+00 : f32 - %pack = tensor.pack %arg0 - padding_value(%cst : f32) - outer_dims_perm = [1, 0] - inner_dims_pos = [1, 0] - inner_tiles = [16, 1] - into %arg1 : tensor<1200x?xf32> -> tensor - return %pack : tensor -} -// CHECK-LABEL: func @fold_padding_value_pack_negative2 -// CHECK: tensor.pack -// CHECK-SAME: padding_value - -// ----- - -func.func @fold_padding_value_pack_negative3(%arg0: tensor<1200x500000xf32>, %arg1: tensor, %tile : index) -> tensor { - %cst = arith.constant 0.000000e+00 : f32 - %pack = tensor.pack %arg0 - padding_value(%cst : f32) - outer_dims_perm = [1, 0] - inner_dims_pos = [1, 0] - inner_tiles = [%tile, 1] - into %arg1 : tensor<1200x500000xf32> -> tensor - return %pack : tensor -} -// CHECK-LABEL: func @fold_padding_value_pack_negative3 -// CHECK: tensor.pack -// CHECK-SAME: padding_value - -// ----- - -// CHECK-LABEL: func @fold_unpack_constant_splat -// CHECK-NOT: tensor.unpack -// CHECK: arith.constant dense<1.000000e-01> : tensor<128x256xf32> -func.func @fold_unpack_constant_splat(%dest : tensor<128x256xf32>) -> tensor<128x256xf32> { - %cst = arith.constant dense<1.000000e-01> : tensor<16x8x8x32xf32> - %0 = tensor.unpack %cst inner_dims_pos = [0, 1] - inner_tiles = [8, 32] into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32> - return %0 : tensor<128x256xf32> -} - -// ----- - -func.func @infer_dest_shape_unpack(%src: tensor<10x20x30x40x16xf32>, %dest: tensor) -> tensor { - %unpack = tensor.unpack %src - outer_dims_perm = [2, 1, 3, 0] - inner_dims_pos = [2] - inner_tiles = [16] - into %dest : tensor<10x20x30x40x16xf32> -> tensor - return %unpack : tensor -} -// CHECK-LABEL: func.func @infer_dest_shape_unpack -// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] -// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]] -// CHECK: %[[CAST_DEST:.+]] = tensor.cast %[[DEST]] : tensor to tensor<40x20x?x30xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[SRC]] {{.+}} into %[[CAST_DEST]] -// CHECK: %[[CAST_UNPACK:.+]] = tensor.cast %[[UNPACK]] : tensor<40x20x?x30xf32> to tensor -// CHECK: return %[[CAST_UNPACK]] - -// ----- - -func.func @infer_src_shape_unpack(%src: tensor, %dest: tensor<30x20x?x10xf32>) -> tensor<30x20x?x10xf32> { - %unpack = tensor.unpack %src - outer_dims_perm = [2, 1, 3, 0] - inner_dims_pos = [2] - inner_tiles = [16] - into %dest : tensor -> tensor<30x20x?x10xf32> - return %unpack : tensor<30x20x?x10xf32> -} -// CHECK-LABEL: func.func @infer_src_shape_unpack -// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] -// CHECK-SAME: %[[DEST:[0-9a-zA-Z]+]] -// CHECK: %[[CAST_SRC:.+]] = tensor.cast %[[SRC]] : tensor to tensor -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[CAST_SRC]] -// CHECK: return %[[UNPACK]] - -// ----- - -func.func @no_infer_unpack_shape(%arg1: tensor<32x7x?x16x1xf32>, %arg2: index) -> tensor { - %cst = arith.constant 0.000000e+00 : f32 - %0 = tensor.empty(%arg2) : tensor - %unpack = tensor.unpack %arg1 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 0] inner_tiles = [16, 1] into %0 : tensor<32x7x?x16x1xf32> -> tensor - return %unpack : tensor -} -// CHECK-LABEL: func.func @no_infer_unpack_shape -// CHECK-NOT: tensor.cast - -// ----- - - // CHECK-LABEL: func @fold_overlapping_insert // CHECK-SAME: %[[INPUT:.+]]: tensor, %{{.+}}: tensor<4x?x8xf32>, %[[SLICE2:.+]]: tensor<4x?x8xf32> func.func @fold_overlapping_insert(%input : tensor, %slice1: tensor<4x?x8xf32>, %slice2: tensor<4x?x8xf32>, %i: index, %size: index) -> (tensor) { @@ -2370,174 +2151,6 @@ func.func @collapse_expand_fold_to_cast(%t: tensor, %sz0: index) -> (tens // ----- -// Chain: NC -> NCnc -> NCnc -> NC -// CHECK: func.func @unpack_pack( -// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>) -// CHECK: return %[[T]] : tensor<128x128xf32> -func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> { - %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32> - %packed = tensor.pack %t inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32> - %tensor_empty1 = tensor.empty() : tensor<128x128xf32> - %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor<128x128xf32> - return %unpacked : tensor<128x128xf32> -} - -// ----- - -// Chain: NC -> NCcn -> NCnc -> NC -// CHECK: func.func @unpack_pack( -// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>) -// CHECK-NOT: return %[[T]] : tensor<128x128xf32> -func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> { - %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32> - %packed = tensor.pack %t inner_dims_pos = [1, 0] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32> - %tensor_empty1 = tensor.empty() : tensor<128x128xf32> - %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor -<128x128xf32> - return %unpacked : tensor<128x128xf32> -} - -// ----- - -// Chain: NC -> CNcn -> NCnc -> NC -// CHECK: func.func @unpack_pack( -// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>) -// CHECK-NOT: return %[[T]] : tensor<128x128xf32> -func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> { - %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32> - %packed = tensor.pack %t outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32> - %tensor_empty1 = tensor.empty() : tensor<128x128xf32> - %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor -<128x128xf32> - return %unpacked : tensor<128x128xf32> -} - -// ----- - -// Chain: NC -> NCnc -> NCnc -> NC -// CHECK: func.func @unpack_pack( -// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>, -// CHECK: return %[[T]] : tensor<128x128xf32> -func.func @unpack_pack(%t: tensor<128x128xf32>, %tile1: index, %tile2: index) -> tensor<128x128xf32> { - %tensor_empty = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32> - %packed = tensor.pack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x?x?xf32> - %tensor_empty1 = tensor.empty() : tensor<128x128xf32> - %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<16x16x?x?xf32> -> tensor -<128x128xf32> - return %unpacked : tensor<128x128xf32> -} - -// ----- - -// CHECK: func.func @unpack_pack_with_padding_no_canonicalization( -// CHECK: tensor.pack -// CHECK: tensor.unpack -func.func @unpack_pack_with_padding_no_canonicalization(%t: tensor<256x512xbf16>) -> tensor<224x512xbf16> { - %tensor_empty = tensor.empty() : tensor<4x16x64x32xbf16> - %tensor_empty1 = tensor.empty() : tensor<224x512xbf16> - %packed = tensor.pack %t outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty : tensor<256x512xbf16> -> tensor<4x16x64x32xbf16> - %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty1 : tensor<4x16x64x32xbf16> -> tensor<224x512xbf16> - return %unpacked : tensor<224x512xbf16> -} - -// ----- - -// Chain NCnc -> NC -> NC -> NCnc -// CHECK: func.func @pack_unpack( -// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>, -// CHECK: return %[[T]] : tensor<16x16x?x?xf32> -func.func @pack_unpack(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> { - %tensor_empty = tensor.empty() : tensor<128x128xf32> - %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32> - %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32> - %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32> - return %packed : tensor<16x16x?x?xf32> -} - -// ----- - -// Chain NCnc -> NC -> NC -> NCnc -// CHECK: func.func @pack_unpack( -// CHECK-SAME: %[[T:.+]]: tensor<16x16x8x8xf32> -// CHECK: return %[[T]] : tensor<16x16x8x8xf32> -func.func @pack_unpack(%t: tensor<16x16x8x8xf32>) -> tensor<16x16x8x8xf32> { - %tensor_empty = tensor.empty() : tensor<128x128xf32> - %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty : tensor<16x16x8x8xf32> -> tensor<128x128xf32> - %tensor_empty1 = tensor.empty() : tensor<16x16x8x8xf32> - %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x8x8xf32> - return %packed : tensor<16x16x8x8xf32> -} - -// ----- - -// CHECK: func.func @pack_unpack_same_tiles( -// CHECK-SAME: %[[T:.+]]: tensor, -// CHECK: return %[[T]] : tensor -func.func @pack_unpack_same_tiles(%t: tensor, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index, - %tile1: index, %tile2: index) -> tensor { - %tensor_empty = tensor.empty(%dim1, %dim2) : tensor - %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor -> tensor - %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor - %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor -> tensor - return %packed : tensor -} - -// ----- - -// CHECK: func.func @pack_unpack_different_tiles( -// CHECK-SAME: %[[T:.+]]: tensor, -// CHECK-NOT: return %[[T]] : tensor -func.func @pack_unpack_different_tiles(%t: tensor, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index, - %tile1: index, %tile2: index) -> tensor { - %tensor_empty = tensor.empty(%dim1, %dim2) : tensor - %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor -> tensor - %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor - %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile2, %tile1] into %tensor_empty1 : tensor -> tensor - return %packed : tensor -} - -// ----- - -// CHECK: func.func @pack_unpack_dynamic_with_padding( -// CHECK-SAME: %[[T:.+]]: tensor, -// CHECK-NOT: return %[[T]] : tensor -func.func @pack_unpack_dynamic_with_padding(%t: tensor, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index, - %tile1: index, %tile2: index, %pad: f32) -> tensor { - %tensor_empty = tensor.empty(%dim1, %dim2) : tensor - %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor -> tensor - %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor - %packed = tensor.pack %unpacked padding_value(%pad: f32) inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor -> tensor - return %packed : tensor -} - -// ----- - -// CHECK: func.func @pack_outer_dims_unpack_no_outer_dims( -// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>, -// CHECK: return %[[T]] : tensor<16x16x?x?xf32> -func.func @pack_outer_dims_unpack_no_outer_dims(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> { - %tensor_empty = tensor.empty() : tensor<128x128xf32> - %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32> - %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32> - %packed = tensor.pack %unpacked outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32> - return %packed : tensor<16x16x?x?xf32> -} - -// ----- - -// CHECK: func.func @pack_no_outer_dims_unpack_outer_dims( -// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>, -// CHECK: return %[[T]] : tensor<16x16x?x?xf32> -func.func @pack_no_outer_dims_unpack_outer_dims(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> { - %tensor_empty = tensor.empty() : tensor<128x128xf32> - %unpacked = tensor.unpack %t outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32> - %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32> - %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32> - return %packed : tensor<16x16x?x?xf32> -} - -// ----- - // CHECK: func.func @invalid_empty_negative_size // CHECK: %[[IDX:.*]] = index.constant // CHECK: %[[T:.*]] = tensor.empty(%[[IDX]]) : tensor<4x5x?xf32> @@ -2551,22 +2164,6 @@ func.func @invalid_empty_negative_size() -> (tensor<4x5x?xf32>) { // ----- -// Fold DstStyleOp -> tensor.unpack operations. -func.func @fold_dst_style_ops_into_unpack(%arg0 : tensor, %init : tensor) -> tensor { - %cst = arith.constant 0.0 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor) -> tensor - %unpack = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [16, 64] into %fill : tensor -> tensor - return %unpack : tensor -} -// CHECK-LABEL: func @fold_dst_style_ops_into_unpack -// CHECK-SAME: %[[ARG0:.+]]: tensor -// CHECK-SAME: %[[INIT:.+]]: tensor -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] -// CHECK-SAME: into %[[INIT]] -// CHECK: return %[[UNPACK]] - -// ----- - // The IR in this test case in invalid. This test tests that the canonicalizer // does not crash. @@ -2598,21 +2195,6 @@ func.func @generate_negative_size_verifies() -> tensor { return %tensor : tensor } -// ----- - -func.func @infer_and_fold_pack_unpack_same_tiles(%t: tensor<10x20x4x4xf32>) -> tensor<10x20x4x4xf32> { - %dim1 = arith.constant 40 : index - %dim2 = arith.constant 80 : index - %tensor_empty = tensor.empty(%dim1, %dim2) : tensor - %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %tensor_empty : tensor<10x20x4x4xf32> -> tensor - %cast = tensor.cast %unpacked : tensor to tensor<40x80xf32> - %tensor_empty1 = tensor.empty() : tensor<10x20x4x4xf32> - %packed = tensor.pack %cast inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %tensor_empty1 : tensor<40x80xf32> -> tensor<10x20x4x4xf32> - return %packed : tensor<10x20x4x4xf32> -} -// CHECK-LABEL: func.func @infer_and_fold_pack_unpack_same_tiles -// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] -// CHECK: return %[[SRC]] // ----- @@ -2787,62 +2369,6 @@ func.func @fold_cast_multiple_results(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2x return %0#1 : index } -// ----- - -// CHECK-LABEL: func.func @fold_cast_pack_dynamic_tile_size -// CHECK-SAME: %[[DEST:.*]]: tensor<1x1x8x1xi32>, -// CHECK-SAME: %[[SRC:.*]]: tensor<7x?xi32>, -// CHECK-SAME: %[[PAD:.*]]: i32) -> tensor<1x1x8x1xi32> { -// CHECK: %[[PACK:.*]] = tensor.pack %[[SRC]] padding_value(%[[PAD]] : i32) -// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[DEST]] -// CHECK-SAME: test_attr -// CHECK-SAME: : tensor<7x?xi32> -> tensor<1x1x8x1xi32> -// CHECK: return %[[PACK]] : tensor<1x1x8x1xi32> -func.func @fold_cast_pack_dynamic_tile_size( - %dest: tensor<1x1x8x1xi32>, - %src: tensor<7x?xi32>, - %pad: i32) -> tensor<1x1x8x1xi32> { - - %cast = tensor.cast %dest : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32> - %c8 = arith.constant 8 : index - %pack = tensor.pack %src padding_value(%pad : i32) - inner_dims_pos = [0, 1] - inner_tiles = [%c8, 1] - into %cast {test_attr} : tensor<7x?xi32> -> tensor<1x1x?x1xi32> - %res = tensor.cast %pack : tensor<1x1x?x1xi32> to tensor<1x1x8x1xi32> - return %res : tensor<1x1x8x1xi32> -} - -// ----- - -// CHECK-LABEL: func.func @fold_cast_unpack_dynamic_tile_size( -// CHECK-SAME: %[[SRC:.*]]: tensor<1x1x8x1xi32>, -// CHECK-SAME: %[[DEST:.*]]: tensor<7x?xi32>) -> tensor<7x?xi32> { -// CHECK: %[[RES:.*]] = tensor.unpack %[[SRC]] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[DEST]] {test_attr} : tensor<1x1x8x1xi32> -> tensor<7x?xi32> -// CHECK: return %[[RES]] : tensor<7x?xi32> -func.func @fold_cast_unpack_dynamic_tile_size( - %src: tensor<1x1x8x1xi32>, - %res: tensor<7x?xi32>) -> tensor<7x?xi32> { - - %cast = tensor.cast %src : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32> - %c8 = arith.constant 8 : index - %unpack = tensor.unpack %cast - inner_dims_pos = [0, 1] - inner_tiles = [%c8, 1] - into %res {test_attr} : tensor<1x1x?x1xi32> -> tensor<7x?xi32> - return %unpack : tensor<7x?xi32> -} - -// ----- - -// CHECK-LABEL: func.func @pack_dont_drop_attributes( -// CHECK: tensor.pack {{.*}} {test_attr} -func.func @pack_dont_drop_attributes(%arg0: tensor, %arg1: tensor<128x?x100x16x1xf16>) -> tensor<128x?x100x16x1xf16> { - %c32_i64 = arith.constant 32 : i64 - %cst = arith.constant 0.000000e+00 : f16 - %pack = tensor.pack %arg0 padding_value(%cst : f16) outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %arg1 {test_attr} : tensor -> tensor<128x?x100x16x1xf16> - return %pack : tensor<128x?x100x16x1xf16> -} // ----- diff --git a/mlir/test/Dialect/Tensor/fold-empty-op.mlir b/mlir/test/Dialect/Tensor/fold-empty-op.mlir index 850bbcee34020..7b11c9f43c7ec 100644 --- a/mlir/test/Dialect/Tensor/fold-empty-op.mlir +++ b/mlir/test/Dialect/Tensor/fold-empty-op.mlir @@ -61,77 +61,6 @@ func.func @rank_reducing_empty_tensor_extract(%sz : index, %idx : index) -> tens return %r: tensor<2xf32> } -func.func @pack_empty(%arg0: tensor<8x8x32x32xf32>) -> tensor<8x8x32x32xf32> { - %empty_unpacked = tensor.empty() : tensor<256x256xf32> - %packed = tensor.pack %empty_unpacked - inner_dims_pos = [0, 1] inner_tiles = [32, 32] - into %arg0 : tensor<256x256xf32> -> tensor<8x8x32x32xf32> - return %packed : tensor<8x8x32x32xf32> -} - -// CHECK-LABEL: func.func @pack_empty( -// CHECK-SAME: %[[T:.+]]: tensor<8x8x32x32xf32> -// CHECK-NOT: tensor.pack -// CHECK: return %[[T]] : tensor<8x8x32x32xf32> - -func.func @pack_empty_dynamic(%arg0: tensor, %dim0: index, %dim1: index) -> tensor { - %empty_unpacked = tensor.empty(%dim0, %dim1) : tensor - %packed = tensor.pack %empty_unpacked - inner_dims_pos = [0, 1] inner_tiles = [32, 32] - into %arg0 : tensor -> tensor - return %packed : tensor -} - -// CHECK-LABEL: func.func @pack_empty_dynamic( -// CHECK-SAME: %[[T:.+]]: tensor, -// CHECK-SAME: %[[DIM0:[a-zA-Z0-9_]+]]: index, -// CHECK-SAME: %[[DIM1:[a-zA-Z0-9_]+]]: index -// CHECK-NOT: tensor.pack -// CHECK: return %[[T]] : tensor - -func.func @unpack_empty(%arg0: tensor<256x256xf32>) -> tensor<256x256xf32> { - %empty_packed = tensor.empty() : tensor<8x8x32x32xf32> - %unpacked = tensor.unpack %empty_packed - inner_dims_pos = [0, 1] inner_tiles = [32, 32] - into %arg0 : tensor<8x8x32x32xf32> -> tensor<256x256xf32> - return %unpacked : tensor<256x256xf32> -} - -// CHECK-LABEL: func.func @unpack_empty( -// CHECK-SAME: %[[T:.+]]: tensor<256x256xf32> -// CHECK-NOT: tensor.unpack -// CHECK: return %[[T]] : tensor<256x256xf32> - -func.func @unpack_empty_dynamic(%arg0: tensor, %dim0: index, %dim1: index) -> tensor { - %empty_packed = tensor.empty(%dim0, %dim1) : tensor - %unpacked = tensor.unpack %empty_packed - inner_dims_pos = [0, 1] inner_tiles = [32, 32] - into %arg0 : tensor -> tensor - return %unpacked : tensor -} - -// CHECK-LABEL: func.func @unpack_empty_dynamic( -// CHECK-SAME: %[[T:.+]]: tensor, -// CHECK-SAME: %[[DIM0:[a-zA-Z0-9_]+]]: index, -// CHECK-SAME: %[[DIM1:[a-zA-Z0-9_]+]]: index -// CHECK-NOT: tensor.unpack -// CHECK: return %[[T]] : tensor - -func.func @pack_padded_empty(%arg0: tensor<8x8x32x32xf32>) -> tensor<8x8x32x32xf32> { - %pad = arith.constant 1.0 : f32 - %empty_unpacked = tensor.empty() : tensor<256x256xf32> - %packed = tensor.pack %empty_unpacked - padding_value(%pad : f32) - inner_dims_pos = [0, 1] inner_tiles = [32, 32] - into %arg0 : tensor<256x256xf32> -> tensor<8x8x32x32xf32> - return %packed : tensor<8x8x32x32xf32> -} - -// CHECK-LABEL: func.func @pack_padded_empty( -// CHECK-SAME: %[[T:.+]]: tensor<8x8x32x32xf32> -// CHECK: %[[PACK:.+]] = tensor.pack -// CHECK: return %[[PACK]] : tensor<8x8x32x32xf32> - // ----- module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir b/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir index bff913f5f55fe..84eb60248b8be 100644 --- a/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir +++ b/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir @@ -1,8 +1,8 @@ -// RUN: mlir-opt -split-input-file -test-tensor-transform-patterns=test-fold-into-pack-and-unpack %s | FileCheck %s +// RUN: mlir-opt -split-input-file -test-linalg-transform-patterns=test-fold-into-pack-and-unpack %s | FileCheck %s func.func @fold_unpack_slice(%arg0 : tensor, %arg1 : tensor, %arg2 : index, %arg3 : index) -> tensor { - %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1 + %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1 : tensor -> tensor %1 = tensor.extract_slice %0[0, 0] [%arg2, %arg3] [1, 1] : tensor to tensor return %1 : tensor @@ -13,7 +13,7 @@ func.func @fold_unpack_slice(%arg0 : tensor, %arg1 : tensor -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] inner_dims_pos = [0, 1] inner_tiles = [8, 4] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] inner_dims_pos = [0, 1] inner_tiles = [8, 4] // CHECK-SAME: into %[[INIT]] // CHECK: return %[[UNPACK]] @@ -21,39 +21,39 @@ func.func @fold_unpack_slice(%arg0 : tensor, %arg1 : tensor, %arg1 : tensor, %arg2 : index, %arg3 : index, %arg4 : index) -> tensor { - %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1 + %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1 : tensor -> tensor %1 = tensor.extract_slice %0[0, %arg4] [%arg2, %arg3] [1, 1] : tensor to tensor return %1 : tensor } // CHECK-LABEL: func @nofold_unpack_slice_non_zero_offset( -// CHECK: %[[UNPACK:.+]] = tensor.unpack +// CHECK: %[[UNPACK:.+]] = linalg.unpack // CHECK: tensor.extract_slice %[[UNPACK]] // ----- func.func @nofold_unpack_slice_non_unit_stride(%arg0 : tensor, %arg1 : tensor, %arg2 : index, %arg3 : index, %arg4 : index) -> tensor { - %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1 + %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1 : tensor -> tensor %1 = tensor.extract_slice %0[0, 0] [%arg2, %arg3] [%arg4, 1] : tensor to tensor return %1 : tensor } // CHECK-LABEL: func @nofold_unpack_slice_non_unit_stride( -// CHECK: %[[UNPACK:.+]] = tensor.unpack +// CHECK: %[[UNPACK:.+]] = linalg.unpack // CHECK: tensor.extract_slice %[[UNPACK]] // ----- func.func @nofold_unpack_slice_rank_reduced(%arg0 : tensor, %arg1 : tensor, %arg2 : index, %arg3 : index) -> tensor { - %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1 + %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1 : tensor -> tensor %1 = tensor.extract_slice %0[0, 0] [1, 1] [1, 1] : tensor to tensor return %1 : tensor } // CHECK-LABEL: func @nofold_unpack_slice_rank_reduced( -// CHECK: %[[UNPACK:.+]] = tensor.unpack +// CHECK: %[[UNPACK:.+]] = linalg.unpack // CHECK: tensor.extract_slice %[[UNPACK]] // ----- @@ -66,7 +66,7 @@ func.func @pad_pack(%src: tensor<16641x16xf32>) -> tensor<2082x1x8x32xf32> { tensor.yield %cst : f32 } : tensor<16641x16xf32> to tensor<16656x16xf32> %empty = tensor.empty() : tensor<2082x1x8x32xf32> - %pack = tensor.pack %padded padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty + %pack = linalg.pack %padded padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty : tensor<16656x16xf32> -> tensor<2082x1x8x32xf32> return %pack : tensor<2082x1x8x32xf32> } @@ -74,7 +74,7 @@ func.func @pad_pack(%src: tensor<16641x16xf32>) -> tensor<2082x1x8x32xf32> { // CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]] // CHECK: %[[PAD_VAL:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[DEST:.+]] = tensor.empty() : tensor<2082x1x8x32xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[SRC]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[SRC]] // CHECK-SAME: padding_value(%[[PAD_VAL]] : f32) // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %[[DEST]] @@ -88,13 +88,13 @@ func.func @nofold_pad_pack(%src: tensor<16641x16xf32>) -> tensor<2082x1x8x32xf32 tensor.yield %cst : f32 } : tensor<16641x16xf32> to tensor<16656x16xf32> %empty = tensor.empty() : tensor<2082x1x8x32xf32> - %pack = tensor.pack %padded padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty + %pack = linalg.pack %padded padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty : tensor<16656x16xf32> -> tensor<2082x1x8x32xf32> return %pack : tensor<2082x1x8x32xf32> } // CHECK-LABEL: func.func @nofold_pad_pack // CHECK: tensor.pad -// CHECK: tensor.pack +// CHECK: linalg.pack // ----- @@ -107,19 +107,19 @@ func.func @pad_pack_different_padding_value(%src: tensor<16641x16xf32>) -> tenso tensor.yield %cst0 : f32 } : tensor<16641x16xf32> to tensor<16656x16xf32> %empty = tensor.empty() : tensor<2082x1x8x32xf32> - %pack = tensor.pack %padded padding_value(%cst1 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty + %pack = linalg.pack %padded padding_value(%cst1 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty : tensor<16656x16xf32> -> tensor<2082x1x8x32xf32> return %pack : tensor<2082x1x8x32xf32> } // CHECK-LABEL: func.func @pad_pack_different_padding_value // CHECK: tensor.pad -// CHECK: tensor.pack +// CHECK: linalg.pack // ----- -func.func @tensor_pack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x57x56x2x32xf32> { +func.func @linalg.pack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x57x56x2x32xf32> { %0 = tensor.empty() : tensor<56x2x1x57x32xf32> - %pack = tensor.pack %arg0 + %pack = linalg.pack %arg0 outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [3] inner_tiles = [32] @@ -132,10 +132,10 @@ func.func @tensor_pack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> t permutation = [2, 3, 0, 1, 4] return %transposed : tensor<1x57x56x2x32xf32> } -// CHECK: func @tensor_pack_linalg_transpose_fold( +// CHECK: func @linalg.pack_linalg_transpose_fold( // CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x64xf32>) // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x57x56x2x32xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [2, 1, 0, 3] // CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[INIT]] @@ -143,9 +143,9 @@ func.func @tensor_pack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> t // ----- -func.func @tensor_pack_linalg_transpose_fold_with_padding(%arg0: tensor<56x57x1x55xf32>, %padding: f32) -> tensor<1x57x56x2x32xf32> { +func.func @linalg.pack_linalg_transpose_fold_with_padding(%arg0: tensor<56x57x1x55xf32>, %padding: f32) -> tensor<1x57x56x2x32xf32> { %0 = tensor.empty() : tensor<56x2x1x57x32xf32> - %pack = tensor.pack %arg0 padding_value(%padding : f32) + %pack = linalg.pack %arg0 padding_value(%padding : f32) outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [3] inner_tiles = [32] @@ -158,10 +158,10 @@ func.func @tensor_pack_linalg_transpose_fold_with_padding(%arg0: tensor<56x57x1x permutation = [2, 3, 0, 1, 4] return %transposed : tensor<1x57x56x2x32xf32> } -// CHECK: func @tensor_pack_linalg_transpose_fold_with_padding( +// CHECK: func @linalg.pack_linalg_transpose_fold_with_padding( // CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x55xf32>, %[[PADDING:.+]]: f32) // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x57x56x2x32xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] padding_value(%[[PADDING]] : f32) +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] padding_value(%[[PADDING]] : f32) // CHECK-SAME: outer_dims_perm = [2, 1, 0, 3] // CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[INIT]] @@ -169,9 +169,9 @@ func.func @tensor_pack_linalg_transpose_fold_with_padding(%arg0: tensor<56x57x1x // ----- -func.func @tensor_pack_linalg_transpose_fold_no_outer_dims_perm(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x2x56x57x32xf32> { +func.func @linalg.pack_linalg_transpose_fold_no_outer_dims_perm(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x2x56x57x32xf32> { %0 = tensor.empty() : tensor<56x57x1x2x32xf32> - %pack = tensor.pack %arg0 + %pack = linalg.pack %arg0 inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<56x57x1x64xf32> -> tensor<56x57x1x2x32xf32> @@ -183,10 +183,10 @@ func.func @tensor_pack_linalg_transpose_fold_no_outer_dims_perm(%arg0: tensor<56 permutation = [2, 3, 0, 1, 4] return %transposed : tensor<1x2x56x57x32xf32> } -// CHECK: func @tensor_pack_linalg_transpose_fold_no_outer_dims_perm( +// CHECK: func @linalg.pack_linalg_transpose_fold_no_outer_dims_perm( // CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x64xf32>) // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x2x56x57x32xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [2, 3, 0, 1] // CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[INIT]] @@ -194,9 +194,9 @@ func.func @tensor_pack_linalg_transpose_fold_no_outer_dims_perm(%arg0: tensor<56 // ----- -func.func @tensor_pack_linalg_transpose_fold_tile_dims_transpose(%arg0: tensor<56x72x24x128xf32>) -> tensor<12x56x4x9x32x8x2xf32> { +func.func @linalg.pack_linalg_transpose_fold_tile_dims_transpose(%arg0: tensor<56x72x24x128xf32>) -> tensor<12x56x4x9x32x8x2xf32> { %0 = tensor.empty() : tensor<4x9x12x56x8x2x32xf32> - %pack = tensor.pack %arg0 + %pack = linalg.pack %arg0 outer_dims_perm = [3, 1, 2, 0] inner_dims_pos = [1, 2, 3] inner_tiles = [8, 2, 32] @@ -209,10 +209,10 @@ func.func @tensor_pack_linalg_transpose_fold_tile_dims_transpose(%arg0: tensor<5 permutation = [2, 3, 0, 1, 6, 4, 5] return %transposed : tensor<12x56x4x9x32x8x2xf32> } -// CHECK: func @tensor_pack_linalg_transpose_fold_tile_dims_transpose( +// CHECK: func @linalg.pack_linalg_transpose_fold_tile_dims_transpose( // CHECK-SAME: %[[ARG0:.+]]: tensor<56x72x24x128xf32>) // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<12x56x4x9x32x8x2xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [2, 0, 3, 1] // CHECK-SAME: inner_dims_pos = [3, 1, 2] inner_tiles = [32, 8, 2] // CHECK-SAME: into %[[INIT]] @@ -220,9 +220,9 @@ func.func @tensor_pack_linalg_transpose_fold_tile_dims_transpose(%arg0: tensor<5 // ----- -func.func @tensor_pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(%arg0: tensor<56x72x24x128xf32>) -> tensor<9x56x2x12x32x8x4xf32> { +func.func @linalg.pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(%arg0: tensor<56x72x24x128xf32>) -> tensor<9x56x2x12x32x8x4xf32> { %0 = tensor.empty() : tensor<4x12x9x56x8x2x32xf32> - %pack = tensor.pack %arg0 + %pack = linalg.pack %arg0 outer_dims_perm = [3, 2, 1, 0] inner_dims_pos = [1, 2, 3] inner_tiles = [8, 2, 32] @@ -235,16 +235,16 @@ func.func @tensor_pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(%arg permutation = [2, 3, 5, 1, 6, 4, 0] return %transposed : tensor<9x56x2x12x32x8x4xf32> } -// CHECK: func @tensor_pack_linalg_transpose_fold_tile_dims_outer_dims_transpose( +// CHECK: func @linalg.pack_linalg_transpose_fold_tile_dims_outer_dims_transpose( // CHECK-SAME: %[[ARG0:.+]]: tensor<56x72x24x128xf32>) -// CHECK: tensor.pack +// CHECK: linalg.pack // CHECK: linalg.transpose // ----- -func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims(%arg0: tensor<56x?x?x64xf32>) -> tensor { +func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_dims(%arg0: tensor<56x?x?x64xf32>) -> tensor { %0 = tensor.empty() : tensor<56x2x1x57x32xf32> - %pack = tensor.pack %arg0 + %pack = linalg.pack %arg0 outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [3] inner_tiles = [32] @@ -259,14 +259,14 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims(%arg0: tensor<56 %return_value = tensor.cast %transposed : tensor<1x57x56x2x32xf32> to tensor return %return_value : tensor } -// CHECK: func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims( +// CHECK: func @linalg.pack_linalg_transpose_fold_dynamic_outer_dims( // CHECK-SAME: %[[ARG0:.+]]: tensor<56x?x?x64xf32>) // CHECK-DAG: %[[c1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[c2:.+]] = arith.constant 2 : index // CHECK: %[[dim:.+]] = tensor.dim %[[ARG0]], %[[c1]] : tensor<56x?x?x64xf32> // CHECK: %[[dim_0:.+]] = tensor.dim %[[ARG0]], %[[c2]] : tensor<56x?x?x64xf32> // CHECK: %[[INIT:.+]] = tensor.empty(%[[dim_0]], %[[dim]]) : tensor -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [2, 1, 0, 3] // CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[INIT]] @@ -274,9 +274,9 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims(%arg0: tensor<56 // ----- -func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(%arg0: tensor<56x?x?x128xf32>) -> tensor { +func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(%arg0: tensor<56x?x?x128xf32>) -> tensor { %0 = tensor.empty() : tensor<56x9x12x4x8x2x32xf32> - %pack = tensor.pack %arg0 + %pack = linalg.pack %arg0 inner_dims_pos = [1, 2, 3] inner_tiles = [8, 2, 32] into %0 : tensor<56x?x?x128xf32> -> tensor<56x9x12x4x8x2x32xf32> @@ -292,7 +292,7 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(%arg0: } // CHECK-DAG: #[[$MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> // CHECK-DAG: #[[$MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)> -// CHECK-LABEL: func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_and_tile_dims( +// CHECK-LABEL: func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_and_tile_dims( // CHECK-SAME: %[[ARG0:.+]]: tensor<56x?x?x128xf32>) // CHECK-DAG: %[[c1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[c2:.+]] = arith.constant 2 : index @@ -301,15 +301,15 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(%arg0: // CHECK: %[[mapped_dim1:.+]] = affine.apply #[[$MAP0]]()[%[[dim]]] // CHECK: %[[mapped_dim2:.+]] = affine.apply #[[$MAP1]]()[%[[dim_0]]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[mapped_dim2]], %[[mapped_dim1]]) : tensor -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [2, 3, 0, 1] inner_dims_pos = [3, 1, 2] inner_tiles = [32, 8, 2] into %[[INIT]] : tensor<56x?x?x128xf32> -> tensor +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [2, 3, 0, 1] inner_dims_pos = [3, 1, 2] inner_tiles = [32, 8, 2] into %[[INIT]] : tensor<56x?x?x128xf32> -> tensor // CHECK: %[[CAST:.+]] = tensor.cast %[[PACK]] : tensor to tensor // CHECK: return %[[CAST]] : tensor // CHECK: } // ----- -func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor, %pack_dest: tensor, %transpose_dest: tensor, %tile_p : index, %tile_q : index, %tile_r : index) -> tensor { - %pack = tensor.pack %arg0 +func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor, %pack_dest: tensor, %transpose_dest: tensor, %tile_p : index, %tile_q : index, %tile_r : index) -> tensor { + %pack = linalg.pack %arg0 outer_dims_perm = [3, 0, 2, 1] inner_dims_pos = [1, 2, 3] inner_tiles = [%tile_p, %tile_q, %tile_r] @@ -324,7 +324,7 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_s } // CHECK: #[[$MAP:.+]] = affine_map<()[s0, s1] -> (s0 ceildiv s1)> // CHECK: module { -// CHECK: func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_sizes( +// CHECK: func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_sizes( // CHECK-SAME: %[[ARG0:.+]]: tensor, // CHECK-SAME: %[[PACK_DEST:.+]]: tensor, %[[TRANSPOSE_DEST:.+]]: tensor, // CHECK-SAME: %[[ARG1:.+]]: index, %[[ARG2:.+]]: index, @@ -341,13 +341,13 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_s // CHECK: %[[mapped_dim1:.+]] = affine.apply #[[$MAP]]()[%[[dim_0]], %[[ARG1]]] // CHECK: %[[mapped_dim2:.+]] = affine.apply #[[$MAP]]()[%[[dim_1]], %[[ARG2]]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[mapped_dim2]], %[[mapped_dim1]], %[[mapped_dim0]], %[[dim]], %[[ARG3]], %[[ARG1]], %[[ARG2]]) : tensor -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [2, 1, 3, 0] inner_dims_pos = [3, 1, 2] inner_tiles = [%[[ARG3]], %[[ARG1]], %[[ARG2]]] into %[[INIT]] : tensor -> tensor +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [2, 1, 3, 0] inner_dims_pos = [3, 1, 2] inner_tiles = [%[[ARG3]], %[[ARG1]], %[[ARG2]]] into %[[INIT]] : tensor -> tensor // CHECK: return %[[PACK]] : tensor // CHECK: } // ----- -func.func @linalg_transpose_tensor_pack_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x57x56x2x32xf32> { +func.func @linalg_transpose_linalg.pack_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x57x56x2x32xf32> { %0 = tensor.empty() : tensor<1x56x57x64xf32> %transposed = linalg.transpose ins(%arg0 : tensor<56x57x1x64xf32>) @@ -355,17 +355,17 @@ func.func @linalg_transpose_tensor_pack_fold(%arg0: tensor<56x57x1x64xf32>) -> t permutation = [2, 0, 1, 3] %1 = tensor.empty() : tensor<1x57x56x2x32xf32> - %pack = tensor.pack %transposed + %pack = linalg.pack %transposed outer_dims_perm = [0, 2, 1, 3] inner_dims_pos = [3] inner_tiles = [32] into %1 : tensor<1x56x57x64xf32> -> tensor<1x57x56x2x32xf32> return %pack : tensor<1x57x56x2x32xf32> } -//CHECK-LABEL: func @linalg_transpose_tensor_pack_fold( +//CHECK-LABEL: func @linalg_transpose_linalg.pack_fold( // CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x64xf32>) // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x57x56x2x32xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [2, 1, 0, 3] // CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[INIT]] @@ -373,7 +373,7 @@ func.func @linalg_transpose_tensor_pack_fold(%arg0: tensor<56x57x1x64xf32>) -> t // ----- -func.func @linalg_transpose_tensor_pack_fold_with_padding(%arg0: tensor<56x57x1x55xf32>, %padding: f32) -> tensor<1x57x56x2x32xf32> { +func.func @linalg_transpose_linalg.pack_fold_with_padding(%arg0: tensor<56x57x1x55xf32>, %padding: f32) -> tensor<1x57x56x2x32xf32> { %0 = tensor.empty() : tensor<1x56x57x55xf32> %transpose = linalg.transpose ins(%arg0 : tensor<56x57x1x55xf32>) @@ -381,17 +381,17 @@ func.func @linalg_transpose_tensor_pack_fold_with_padding(%arg0: tensor<56x57x1x permutation = [2, 0, 1, 3] %1 = tensor.empty() : tensor<1x57x56x2x32xf32> - %pack = tensor.pack %transpose padding_value(%padding : f32) + %pack = linalg.pack %transpose padding_value(%padding : f32) outer_dims_perm = [0, 2, 1, 3] inner_dims_pos = [3] inner_tiles = [32] into %1 : tensor<1x56x57x55xf32> -> tensor<1x57x56x2x32xf32> return %pack : tensor<1x57x56x2x32xf32> } -//CHECK-LABEL: func @linalg_transpose_tensor_pack_fold_with_padding( +//CHECK-LABEL: func @linalg_transpose_linalg.pack_fold_with_padding( // CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x55xf32>, %[[PADDING:.+]]: f32) // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x57x56x2x32xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] padding_value(%[[PADDING]] : f32) +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] padding_value(%[[PADDING]] : f32) // CHECK-SAME: outer_dims_perm = [2, 1, 0, 3] // CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[INIT]] @@ -399,7 +399,7 @@ func.func @linalg_transpose_tensor_pack_fold_with_padding(%arg0: tensor<56x57x1x // ----- -func.func @linalg_transpose_tensor_pack_fold_no_outer_dims_perm(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x56x57x2x32xf32> { +func.func @linalg_transpose_linalg.pack_fold_no_outer_dims_perm(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x56x57x2x32xf32> { %0 = tensor.empty() : tensor<1x56x57x64xf32> %transposed = linalg.transpose ins(%arg0 : tensor<56x57x1x64xf32>) @@ -407,16 +407,16 @@ func.func @linalg_transpose_tensor_pack_fold_no_outer_dims_perm(%arg0: tensor<56 permutation = [2, 0, 1, 3] %1 = tensor.empty() : tensor<1x56x57x2x32xf32> - %pack = tensor.pack %transposed + %pack = linalg.pack %transposed inner_dims_pos = [3] inner_tiles = [32] into %1 : tensor<1x56x57x64xf32> -> tensor<1x56x57x2x32xf32> return %pack : tensor<1x56x57x2x32xf32> } -//CHECK-LABEL: func @linalg_transpose_tensor_pack_fold_no_outer_dims_perm( +//CHECK-LABEL: func @linalg_transpose_linalg.pack_fold_no_outer_dims_perm( // CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x64xf32>) // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x56x57x2x32xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [2, 0, 1, 3] // CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[INIT]] @@ -424,25 +424,25 @@ func.func @linalg_transpose_tensor_pack_fold_no_outer_dims_perm(%arg0: tensor<56 // ----- -func.func @linalg_transpose_tensor_pack_fold_complex_inner_dims_change(%arg0: tensor<25x30x35x40xf32>, %transpose_dest: tensor<35x40x25x30xf32>, %pack_dest: tensor<3x35x5x8x5x10x5xf32>) -> tensor<3x35x5x8x5x10x5xf32> { +func.func @linalg_transpose_linalg.pack_fold_complex_inner_dims_change(%arg0: tensor<25x30x35x40xf32>, %transpose_dest: tensor<35x40x25x30xf32>, %pack_dest: tensor<3x35x5x8x5x10x5xf32>) -> tensor<3x35x5x8x5x10x5xf32> { %transposed = linalg.transpose ins(%arg0 : tensor<25x30x35x40xf32>) outs(%transpose_dest : tensor<35x40x25x30xf32>) permutation = [2, 3, 0, 1] - %pack = tensor.pack %transposed + %pack = linalg.pack %transposed outer_dims_perm = [3, 0, 2, 1] inner_dims_pos = [1, 3, 2] inner_tiles = [5, 10, 5] into %pack_dest : tensor<35x40x25x30xf32> -> tensor<3x35x5x8x5x10x5xf32> return %pack : tensor<3x35x5x8x5x10x5xf32> } -//CHECK-LABEL: func.func @linalg_transpose_tensor_pack_fold_complex_inner_dims_change( +//CHECK-LABEL: func.func @linalg_transpose_linalg.pack_fold_complex_inner_dims_change( // CHECK-SAME: %[[ARG0:.+]]: tensor<25x30x35x40xf32>, // CHECK-SAME: %[[ARG1:.+]]: tensor<35x40x25x30xf32>, // CHECK-SAME: %[[ARG2:.+]]: tensor<3x35x5x8x5x10x5xf32>) -> tensor<3x35x5x8x5x10x5xf32> { // CHECK: %[[VAL0:.+]] = tensor.empty() : tensor<3x35x5x8x5x10x5xf32> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [1, 2, 0, 3] // CHECK-SAME: inner_dims_pos = [3, 1, 0] // CHECK-SAME: inner_tiles = [5, 10, 5] @@ -451,13 +451,13 @@ func.func @linalg_transpose_tensor_pack_fold_complex_inner_dims_change(%arg0: te // ----- -func.func @linalg_transpose_tensor_pack_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor, %transpose_dest: tensor, %pack_dest: tensor, %tile_p : index, %tile_q : index, %tile_r : index) -> tensor { +func.func @linalg_transpose_linalg.pack_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor, %transpose_dest: tensor, %pack_dest: tensor, %tile_p : index, %tile_q : index, %tile_r : index) -> tensor { %transposed = linalg.transpose ins(%arg0 : tensor) outs(%transpose_dest : tensor) permutation = [2, 3, 0, 1] - %pack = tensor.pack %transposed + %pack = linalg.pack %transposed outer_dims_perm = [3, 0, 2, 1] inner_dims_pos = [1, 3, 2] inner_tiles = [%tile_p, %tile_q, %tile_r] @@ -465,7 +465,7 @@ func.func @linalg_transpose_tensor_pack_fold_dynamic_outer_dims_tile_dims_tile_s return %pack : tensor } // CHECK: #[[$MAP:.+]] = affine_map<()[s0, s1] -> (s0 ceildiv s1)> -//CHECK-LABEL: func.func @linalg_transpose_tensor_pack_fold_dynamic_outer_dims_tile_dims_tile_sizes( +//CHECK-LABEL: func.func @linalg_transpose_linalg.pack_fold_dynamic_outer_dims_tile_dims_tile_sizes( // CHECK-SAME: %[[ARG0:.+]]: tensor, %[[ARG1:.+]]: tensor, // CHECK-SAME: %[[ARG2:.+]]: tensor, %[[ARG3:.+]]: index, %[[ARG4:.+]]: index, %[[ARG5:.+]]: index) -> tensor { // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index @@ -480,12 +480,12 @@ func.func @linalg_transpose_tensor_pack_fold_dynamic_outer_dims_tile_dims_tile_s // CHECK: %[[VAL1:.+]] = affine.apply #[[$MAP]]()[%[[DIM0]], %[[ARG4]]] // CHECK: %[[VAL2:.+]] = affine.apply #[[$MAP]]()[%[[DIM]], %[[ARG5]]] // CHECK: %[[VAL3:.+]] = tensor.empty(%[[VAL1]], %[[DIM1]], %[[VAL2]], %[[VAL0]], %[[ARG3]], %[[ARG4]], %[[ARG5]]) : tensor -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [1, 2, 0, 3] inner_dims_pos = [3, 1, 0] inner_tiles = [%[[ARG3]], %[[ARG4]], %[[ARG5]]] into %[[VAL3]] : tensor -> tensor +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [1, 2, 0, 3] inner_dims_pos = [3, 1, 0] inner_tiles = [%[[ARG3]], %[[ARG4]], %[[ARG5]]] into %[[VAL3]] : tensor -> tensor // CHECK: return %[[PACK]] : tensor // ----- -func.func @linalg_transpose_tensor_pack_multiple_tiles(%arg0: tensor) -> tensor<32x?x64x16x2xbf16> { +func.func @linalg_transpose_linalg.pack_multiple_tiles(%arg0: tensor) -> tensor<32x?x64x16x2xbf16> { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : bf16 %dim = tensor.dim %arg0, %c0 : tensor @@ -497,7 +497,7 @@ func.func @linalg_transpose_tensor_pack_multiple_tiles(%arg0: tensor - %pack = tensor.pack %transposed + %pack = linalg.pack %transposed padding_value(%cst : bf16) outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] @@ -506,14 +506,14 @@ func.func @linalg_transpose_tensor_pack_multiple_tiles(%arg0: tensor } // CHECK: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)> -//CHECK-LABEL: func.func @linalg_transpose_tensor_pack_multiple_tiles( +//CHECK-LABEL: func.func @linalg_transpose_linalg.pack_multiple_tiles( // CHECK-SAME: %[[ARG0:.+]]: tensor) -> tensor<32x?x64x16x2xbf16> { // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[CST:.+]] = arith.constant 0.000000e+00 : bf16 // CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor // CHECK: %[[VAL0:.+]] = affine.apply #[[$MAP]]()[%[[DIM]]] // CHECK: %[[VAL1:.+]] = tensor.empty(%[[VAL0]]) : tensor<32x?x64x16x2xbf16> -// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] +// CHECK: %[[PACK:.+]] = linalg.pack %[[ARG0]] // CHECK-SAME: padding_value(%[[CST]] : bf16) // CHECK-SAME: outer_dims_perm = [1, 0, 2] // CHECK-SAME: inner_dims_pos = [0, 2] @@ -524,23 +524,23 @@ func.func @linalg_transpose_tensor_pack_multiple_tiles(%arg0: tensor) -> tensor<16x4xi32> { +func.func @linalg_transpose_linalg.unpack_fold(%arg0: tensor<1x1x4x16xi32>) -> tensor<16x4xi32> { %0 = tensor.empty() : tensor<1x1x16x4xi32> %transposed = linalg.transpose ins(%arg0 : tensor<1x1x4x16xi32>) outs(%0 : tensor<1x1x16x4xi32>) permutation = [1, 0, 3, 2] %1 = tensor.empty() : tensor<16x4xi32> - %unpack = tensor.unpack %transposed + %unpack = linalg.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %1 : tensor<1x1x16x4xi32> -> tensor<16x4xi32> return %unpack : tensor<16x4xi32> } -//CHECK-LABEL: func.func @linalg_transpose_tensor_unpack_fold( +//CHECK-LABEL: func.func @linalg_transpose_linalg.unpack_fold( // CHECK-SAME: %[[ARG0:.+]]: tensor<1x1x4x16xi32>) -> tensor<16x4xi32> { // CHECK: %[[OUT:.+]] = tensor.empty() : tensor<16x4xi32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [1, 0] // CHECK-SAME: inner_dims_pos = [1, 0] // CHECK-SAME: inner_tiles = [4, 16] @@ -550,23 +550,23 @@ func.func @linalg_transpose_tensor_unpack_fold(%arg0: tensor<1x1x4x16xi32>) -> t // ----- -func.func @linalg_transpose_tensor_unpack_fold_partial_tile(%arg0: tensor<1x1x4x16xi32>) -> tensor<15x3xi32> { +func.func @linalg_transpose_linalg.unpack_fold_partial_tile(%arg0: tensor<1x1x4x16xi32>) -> tensor<15x3xi32> { %0 = tensor.empty() : tensor<1x1x16x4xi32> %transposed = linalg.transpose ins(%arg0 : tensor<1x1x4x16xi32>) outs(%0 : tensor<1x1x16x4xi32>) permutation = [1, 0, 3, 2] %1 = tensor.empty() : tensor<15x3xi32> - %unpack = tensor.unpack %transposed + %unpack = linalg.unpack %transposed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 4] into %1 : tensor<1x1x16x4xi32> -> tensor<15x3xi32> return %unpack : tensor<15x3xi32> } -//CHECK-LABEL: func.func @linalg_transpose_tensor_unpack_fold_partial_tile( +//CHECK-LABEL: func.func @linalg_transpose_linalg.unpack_fold_partial_tile( // CHECK-SAME: %[[ARG0:.+]]: tensor<1x1x4x16xi32>) -> tensor<15x3xi32> { // CHECK: %[[OUT:.+]] = tensor.empty() : tensor<15x3xi32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [1, 0] // CHECK-SAME: inner_dims_pos = [1, 0] // CHECK-SAME: inner_tiles = [4, 16] @@ -576,20 +576,20 @@ func.func @linalg_transpose_tensor_unpack_fold_partial_tile(%arg0: tensor<1x1x4x // ----- -func.func @linalg_transpose_tensor_unpack_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor, %transpose_dest: tensor, %unpack_dest: tensor, %tile_p : index, %tile_q : index) -> tensor { +func.func @linalg_transpose_linalg.unpack_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor, %transpose_dest: tensor, %unpack_dest: tensor, %tile_p : index, %tile_q : index) -> tensor { %transposed = linalg.transpose ins(%arg0 : tensor) outs(%transpose_dest : tensor) permutation = [1, 0, 3, 2] - %unpack = tensor.unpack %transposed + %unpack = linalg.unpack %transposed outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [%tile_p, %tile_q] into %unpack_dest : tensor -> tensor return %unpack : tensor } -// CHECK-LABEL: func.func @linalg_transpose_tensor_unpack_fold_dynamic_outer_dims_tile_dims_tile_sizes( +// CHECK-LABEL: func.func @linalg_transpose_linalg.unpack_fold_dynamic_outer_dims_tile_dims_tile_sizes( // CHECK-SAME: %[[ARG0:.+]]: tensor, %[[ARG1:.+]]: tensor, %[[ARG2:.+]]: tensor, // CHECK-SAME: %[[IDX1:.+]]: index, %[[IDX2:.+]]: index) -> tensor { // CHECK-DAG: %[[CST1:.+]] = arith.constant 1 : index @@ -597,7 +597,7 @@ func.func @linalg_transpose_tensor_unpack_fold_dynamic_outer_dims_tile_dims_tile // CHECK-DAG: %[[DIM0:.+]] = tensor.dim %[[ARG2]], %[[CST0]] : tensor // CHECK-DAG: %[[DIM1:.+]] = tensor.dim %[[ARG2]], %[[CST1]] : tensor // CHECK: %[[OUT:.+]] = tensor.empty(%[[DIM0]], %[[DIM1]]) : tensor -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [0, 1] // CHECK-SAME: inner_dims_pos = [1, 0] // CHECK-SAME: inner_tiles = [%[[IDX2]], %[[IDX1]]] @@ -607,9 +607,9 @@ func.func @linalg_transpose_tensor_unpack_fold_dynamic_outer_dims_tile_dims_tile // ----- -func.func @tensor_unpack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<3648x56xf32> { +func.func @linalg.unpack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<3648x56xf32> { %0 = tensor.empty() : tensor<56x3648xf32> - %pack = tensor.unpack %arg0 + %pack = linalg.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [1, 64] @@ -622,10 +622,10 @@ func.func @tensor_unpack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> permutation = [1,0] return %transposed : tensor<3648x56xf32> } -// CHECK-LABEL: func.func @tensor_unpack_linalg_transpose_fold( +// CHECK-LABEL: func.func @linalg.unpack_linalg_transpose_fold( // CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x64xf32>) -> tensor<3648x56xf32> { // CHECK: %[[OUT:.+]] = tensor.empty() : tensor<3648x56xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [1, 0] // CHECK-SAME: inner_dims_pos = [1, 0] // CHECK-SAME: inner_tiles = [1, 64] @@ -637,7 +637,7 @@ func.func @tensor_unpack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> func.func @tensor_padded_unpack_linalg_transpose_fold(%arg0: tensor<71x7x4x16x16xf32>) -> tensor<100x71x64xf32> { %0 = tensor.empty() : tensor<71x100x64xf32> - %pack = tensor.unpack %arg0 + %pack = linalg.unpack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %0 : tensor<71x7x4x16x16xf32> -> tensor<71x100x64xf32> @@ -652,7 +652,7 @@ func.func @tensor_padded_unpack_linalg_transpose_fold(%arg0: tensor<71x7x4x16x16 // CHECK-LABEL: func.func @tensor_padded_unpack_linalg_transpose_fold( // CHECK-SAME: %[[ARG0:.+]]: tensor<71x7x4x16x16xf32>) -> tensor<100x71x64xf32> { // CHECK: %[[OUT:.+]] = tensor.empty() : tensor<100x71x64xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [1, 0, 2] // CHECK-SAME: inner_dims_pos = [0, 2] // CHECK-SAME: inner_tiles = [16, 16] @@ -668,7 +668,7 @@ func.func @non_involution_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) - outs(%0 : tensor<5x2x3x16x4xi32>) permutation = [2, 0, 1, 4, 3] %1 = tensor.empty() : tensor<5x48x8xi32> - %unpack = tensor.unpack %transposed + %unpack = linalg.unpack %transposed outer_dims_perm = [0, 2, 1] inner_dims_pos = [1, 2] inner_tiles = [16, 4] into @@ -678,7 +678,7 @@ func.func @non_involution_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) - //CHECK-LABEL: func.func @non_involution_transpose_unpack_fold( // CHECK-SAME: %[[ARG0:.+]]: tensor<2x3x5x4x16xi32>) -> tensor<5x48x8xi32> { // CHECK: %[[OUT:.+]] = tensor.empty() : tensor<5x48x8xi32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [2, 1, 0] // CHECK-SAME: inner_dims_pos = [2, 1] // CHECK-SAME: inner_tiles = [4, 16] @@ -690,7 +690,7 @@ func.func @non_involution_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) - func.func @unpack_non_involution_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> { %0 = tensor.empty() : tensor<3x56x3648xf32> - %unpack = tensor.unpack %arg0 + %unpack = linalg.unpack %arg0 outer_dims_perm = [2, 0, 1] inner_dims_pos = [1, 2] inner_tiles = [1, 64] @@ -706,7 +706,7 @@ func.func @unpack_non_involution_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>) // CHECK-LABEL: func.func @unpack_non_involution_transpose_fold( // CHECK-SAME: %[[ARG0:.+]]: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> { // CHECK: %[[OUT:.+]] = tensor.empty() : tensor<3648x3x56xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [0, 1, 2] // CHECK-SAME: inner_dims_pos = [2, 0] // CHECK-SAME: inner_tiles = [1, 64] @@ -722,7 +722,7 @@ func.func @transpose_unpacked_dims_no_fold(%arg0: tensor<2x16x5x4x3xi32>) -> ten outs(%0 : tensor<5x2x3x16x4xi32>) permutation = [2, 0, 4, 1, 3] %1 = tensor.empty() : tensor<5x32x12xi32> - %unpack = tensor.unpack %transposed + %unpack = linalg.unpack %transposed inner_dims_pos = [1, 2] inner_tiles = [16, 4] into %1 : tensor<5x2x3x16x4xi32> -> tensor<5x32x12xi32> @@ -730,7 +730,7 @@ func.func @transpose_unpacked_dims_no_fold(%arg0: tensor<2x16x5x4x3xi32>) -> ten } //CHECK-LABEL: func.func @transpose_unpacked_dims_no_fold( // CHECK: linalg.transpose -// CHECK: tensor.unpack +// CHECK: linalg.unpack // ----- @@ -747,7 +747,7 @@ func.func @generic_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -> tenso linalg.yield %in : i32 } -> tensor<5x2x3x16x4xi32> %1 = tensor.empty() : tensor<5x48x8xi32> - %unpack = tensor.unpack %transposed + %unpack = linalg.unpack %transposed outer_dims_perm = [0, 2, 1] inner_dims_pos = [1, 2] inner_tiles = [16, 4] into @@ -757,7 +757,7 @@ func.func @generic_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -> tenso //CHECK-LABEL: func.func @generic_transpose_unpack_fold( // CHECK-SAME: %[[ARG0:.+]]: tensor<2x3x5x4x16xi32>) -> tensor<5x48x8xi32> { // CHECK: %[[OUT:.+]] = tensor.empty() : tensor<5x48x8xi32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [2, 1, 0] // CHECK-SAME: inner_dims_pos = [2, 1] // CHECK-SAME: inner_tiles = [4, 16] @@ -771,7 +771,7 @@ func.func @generic_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -> tenso #map1 = affine_map<(d0, d1, d2)->(d0, d1, d2)> func.func @unpack_generic_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> { %0 = tensor.empty() : tensor<3x56x3648xf32> - %unpack = tensor.unpack %arg0 + %unpack = linalg.unpack %arg0 outer_dims_perm = [2, 0, 1] inner_dims_pos = [1, 2] inner_tiles = [1, 64] @@ -791,7 +791,7 @@ func.func @unpack_generic_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>) -> ten // CHECK-LABEL: func.func @unpack_generic_transpose_fold( // CHECK-SAME: %[[ARG0:.+]]: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> { // CHECK: %[[OUT:.+]] = tensor.empty() : tensor<3648x3x56xf32> -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [0, 1, 2] // CHECK-SAME: inner_dims_pos = [2, 0] // CHECK-SAME: inner_tiles = [1, 64] diff --git a/mlir/test/Dialect/Tensor/tiling.mlir b/mlir/test/Dialect/Tensor/tiling.mlir index 193fbe93e0f9e..04a99b5fd0d68 100644 --- a/mlir/test/Dialect/Tensor/tiling.mlir +++ b/mlir/test/Dialect/Tensor/tiling.mlir @@ -224,495 +224,3 @@ module attributes {transform.with_named_sequence} { transform.yield } } - -// ----- - -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)> -// CHECK: func.func @NC_to_NCnc -// CHECK-SAME: %[[IN:.*]]: tensor<128x256xf32>, -// CHECK-SAME: %[[OUT:.*]]: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> { -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK: %[[RES0:.*]] = scf.for %[[N:.*]] = %[[C0]] to %[[C4]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<4x8x32x32xf32>) { -// CHECK: %[[RES1:.+]] = scf.for %[[C:.*]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<4x8x32x32xf32>) { -// CHECK-DAG: %[[IN_N:.+]] = affine.apply #[[MAP0]](%[[N]]) -// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]]) -// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_N]], %[[IN_C]]] [64, 128] [1, 1] : tensor<128x256xf32> to tensor<64x128xf32> -// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[N]], %[[C]], 0, 0] [2, 4, 32, 32] [1, 1, 1, 1] : tensor<4x8x32x32xf32> to tensor<2x4x32x32xf32> -// CHECK: %[[SUB_RES:.*]] = tensor.pack -// CHECK-SAME: %[[SUB_IN]] inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %[[SUB_OUT]] -// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]] -// CHECK: scf.yield %[[INSERT]] : tensor<4x8x32x32xf32> -// CHECK: } -// CHECK: scf.yield %[[RES1:.*]] : tensor<4x8x32x32xf32> -// CHECK: } -// CHECK: return %[[RES0:.*]] : tensor<4x8x32x32xf32> -// CHECK: } -func.func @NC_to_NCnc(%arg0: tensor<128x256xf32>, %arg1: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> { - %0 = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %arg1 : tensor<128x256xf32> -> tensor<4x8x32x32xf32> - return %0 : tensor<4x8x32x32xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - transform.yield - } -} - -// ----- - -// CHECK: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 8)> -// CHECK: func.func @KC_to_CKkc -// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: -// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index -// CHECK-DAG: %[[C32:.+]] = arith.constant 32 : index -// CHECK: scf.for %[[C:.+]] = %[[C0]] to %[[C32]] step %[[C2]] -// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]]) -// CHECK: %[[INPUT_SLICE:.+]] = tensor.extract_slice %[[IN]] -// CHECK-SAME: [0, %[[IN_C]]] [128, 16] -// CHECK: %[[OUTPUT_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[C]], 0, 0, 0] [2, 4, 32, 8] -// CHECK: tensor.pack -// CHECK-SAME: %[[INPUT_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] -// CHECK-SAME: into %[[OUTPUT_SLICE]] -func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) -> tensor<32x4x32x8xf32> { - %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32> - return %0 : tensor<32x4x32x8xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - transform.yield - } -} - -// ----- - -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 2)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * -2 + 15, 8)> -// CHECK: func.func @pad_and_pack_static( -// CHECK-SAME: %[[IN:.*]]: tensor<13x15xf32>, -// CHECK-SAME: %[[OUT:.*]]: tensor<2x8x8x2xf32>, -// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor<2x8x8x2xf32> { -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index -// CHECK-DAG: %[[RES0:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[OUT]]) -> (tensor<2x8x8x2xf32>) { -// CHECK-DAG: %[[IN_J:.*]] = affine.apply #[[MAP0]](%[[J]]) -// CHECK-DAG: %[[IN_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]]) -// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][0, %[[IN_J]]] [13, %[[IN_J_SZ]]] [1, 1] -// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][0, %[[J]], 0, 0] [2, 4, 8, 2] [1, 1, 1, 1] -// CHECK: %[[SUB_RES:.*]] = tensor.pack -// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] -// CHECK-SAME: into %[[SUB_OUT]] -// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]] -// CHECK: scf.yield %[[INSERT]] : tensor<2x8x8x2xf32> -// CHECK: } -// CHECK: return %[[RES0:.*]] : tensor<2x8x8x2xf32> -// CHECK: } -func.func @pad_and_pack_static(%input: tensor<13x15xf32>, %output: tensor<2x8x8x2xf32>, %pad: f32) -> tensor<2x8x8x2xf32> { - %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<13x15xf32> -> tensor<2x8x8x2xf32> - return %0 : tensor<2x8x8x2xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - transform.yield - } -} - -// ----- - -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> (d0 * 8)> -// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -8 + s0, d0 * 8)> -// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 * 2)> -// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -2 + s0, d0 * 2)> -// CHECK: func.func @pad_and_pack_partially_dynamic( -// CHECK-SAME: %[[IN:.*]]: tensor, -// CHECK-SAME: %[[OUT:.*]]: tensor, -// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor { -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor -// CHECK-DAG: %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor -// CHECK: %[[RES0:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor) { -// CHECK: %[[RES1:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor) { -// CHECK-DAG: %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]] -// CHECK-DAG: %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]] -// CHECK-DAG: %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]]) -// CHECK-DAG: %[[IN_I_SZ:.*]] = affine.min #[[MAP3]] -// CHECK-DAG: %[[IN_J:.*]] = affine.apply #[[MAP4]](%[[J]]) -// CHECK-DAG: %[[IN_J_SZ:.*]] = affine.min #[[MAP5]] -// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor to tensor -// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], 8, 2] [1, 1, 1, 1] : tensor to tensor -// CHECK: %[[SUB_RES:.*]] = tensor.pack -// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] -// CHECK-SAME: into %[[SUB_OUT]] -// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]] -// CHECK: scf.yield %[[INSERT]] : tensor -// CHECK: } -// CHECK: scf.yield %[[RES1:.*]] : tensor -// CHECK: } -// CHECK: return %[[VAL_34:.*]] : tensor -// CHECK: } -func.func @pad_and_pack_partially_dynamic(%input: tensor, %output: tensor, %pad: f32) -> tensor { - %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor -> tensor - return %0 : tensor -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - transform.yield - } -} - -// ----- - -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (d0 * s0)> -// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s0, -(d1 * s0) + s1)> -// CHECK: func.func @pad_and_pack_fully_dynamic( -// CHECK-SAME: %[[IN:.*]]: tensor, -// CHECK-SAME: %[[OUT:.*]]: tensor, -// CHECK-SAME: %[[PAD:.*]]: f32, -// CHECK-SAME: %[[TILE_0:.*]]: index, -// CHECK-SAME: %[[TILE_1:.*]]: index) -> tensor { -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor -// CHECK-DAG: %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor -// CHECK: %[[RES0:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor) { -// CHECK: %[[RES1:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor) { -// CHECK-DAG: %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]] -// CHECK-DAG: %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]] -// CHECK-DAG: %[[IN_D0:.*]] = tensor.dim %[[IN]], %[[C0]] -// CHECK-DAG: %[[IN_D1:.*]] = tensor.dim %[[IN]], %[[C1]] -// CHECK: %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]])[%[[TILE_0]]] -// CHECK: %[[IN_I_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_I_SZ]], %[[I]])[%[[TILE_0]], %[[IN_D0]]] -// CHECK: %[[IN_J:.*]] = affine.apply #[[MAP2]](%[[J]])[%[[TILE_1]]] -// CHECK: %[[IN_J_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_J_SZ]], %[[J]])[%[[TILE_1]], %[[IN_D1]]] -// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor to tensor -// CHECK: %[[OUT_D2:.+]] = tensor.dim %[[ITER1]], %[[C2]] -// CHECK: %[[OUT_D3:.+]] = tensor.dim %[[ITER1]], %[[C3]] -// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], %[[OUT_D2]], %[[OUT_D3]]] [1, 1, 1, 1] : tensor to tensor -// CHECK: %[[PACK:.*]] = tensor.pack -// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [%[[TILE_0]], %[[TILE_1]]] -// CHECK-SAME: into %[[SUB_OUT]] -// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[PACK]] into %[[ITER1]] -// CHECK: scf.yield %[[INSERT]] : tensor -// CHECK: } -// CHECK: scf.yield %[[RES1:.*]] : tensor -// CHECK: } -// CHECK: return %[[RES0:.*]] : tensor -// CHECK: } -func.func @pad_and_pack_fully_dynamic(%source: tensor, %dest: tensor, %pad: f32, %tile_n : index, %tile_m : index) -> tensor { - %0 = tensor.pack %source padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor -> tensor - return %0 : tensor -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - transform.yield - } -} - -// ----- - -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)> -// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 16)> -// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 16)> -// CHECK-DAG: #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 16 - d0 floordiv 16 + 1)> -// CHECK: func.func @NCnc_to_NC -// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: -// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index -// CHECK-DAG: %[[C256:.*]] = arith.constant 256 : index -// CHECK: %{{.+}} = scf.for %[[I:.+]] = %[[C0]] to %[[C256]] step %[[C2]] -// CHECK: %{{.+}} = scf.for %[[J:.+]] = %[[C0]] to %[[C128]] step %[[C4]] -// CHECK-DAG: %[[IN_I:.+]] = affine.apply #[[MAP0]](%[[I]]) -// CHECK-DAG: %[[OFFSET_I:.+]] = affine.apply #[[MAP1]](%[[I]]) -// CHECK-DAG: %[[IN_I_SZ:.+]] = affine.apply #[[MAP2]](%[[I]]) -// CHECK-DAG: %[[IN_J:.+]] = affine.apply #[[MAP4]](%[[J]]) -// CHECK-DAG: %[[OFFSET_J:.+]] = affine.apply #[[MAP5]](%[[J]]) -// CHECK-DAG: %[[IN_J_SZ:.+]] = affine.apply #[[MAP6]](%[[J]]) -// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[IN]] -// CHECK-SAME: [%[[IN_I]], %[[IN_J]], 0, 0] [%[[IN_I_SZ]], %[[IN_J_SZ]], 32, 16] -// CHECK-SAME: : tensor<8x8x32x16xf32> to tensor -// CHECK: %[[EMPTY:.+]] = tensor.empty -// CHECK: %[[UNPACK:.+]] = tensor.unpack -// CHECK-SAME: %[[SLICE]] inner_dims_pos = [0, 1] inner_tiles = [32, 16] -// CHECK-SAME: into %[[EMPTY]] -// CHECK: %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]] -// CHECK-SAME: [%[[OFFSET_I]], %[[OFFSET_J]]] [2, 4] -// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]] -// CHECK-SAME: into %{{.+}}[%[[I]], %[[J]]] [2, 4] -// CHECK: scf.yield %[[RES]] -func.func @NCnc_to_NC(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { - %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> - return %0 : tensor<256x128xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - transform.yield - } -} - -// ----- - -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)> -// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 8)> -// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 8)> -// CHECK-DAG: #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 8 - d0 floordiv 8 + 1)> -// CHECK: func.func @CKkc_to_KC -// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: -// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index -// CHECK-DAG: %[[C256:.*]] = arith.constant 256 : index -// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C128]] step %[[C2]] -// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[C256]] step %[[C4]] -// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]]) -// CHECK-DAG: %[[OFFSET_K:.+]] = affine.apply #[[MAP1]](%[[K]]) -// CHECK-DAG: %[[IN_K_SZ:.+]] = affine.apply #[[MAP2]](%[[K]]) -// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP4]](%[[C]]) -// CHECK-DAG: %[[OFFSET_C:.+]] = affine.apply #[[MAP5]](%[[C]]) -// CHECK-DAG: %[[IN_C_SZ:.+]] = affine.apply #[[MAP6]](%[[C]]) -// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]] -// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [%[[IN_C_SZ]], %[[IN_K_SZ]], 32, 8] -// CHECK: %[[EMPTY:.+]] = tensor.empty -// CHECK: %[[UNPACK:.+]] = tensor.unpack -// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] -// CHECK-SAME: into %[[EMPTY]] -// CHECK: %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]] -// CHECK-SAME: [%[[OFFSET_K]], %[[OFFSET_C]]] [2, 4] -// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]] -// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [2, 4] -// CHECK: scf.yield %[[RES]] -func.func @CKkc_to_KC(%source: tensor<32x4x32x8xf32>, %dest: tensor<128x256xf32>) -> tensor<128x256xf32> { - %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %dest : tensor<32x4x32x8xf32> -> tensor<128x256xf32> - return %0 : tensor<128x256xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - transform.yield - } -} - -// ----- - -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 2)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 floordiv 4)> -// CHECK: func.func @perfect_CKkc_to_KC -// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: -// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index -// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index -// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C8]] step %[[C2]] -// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[C128]] step %[[C4]] -// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]]) -// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP1]](%[[C]]) -// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]] -// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [1, 1, 2, 4] -// CHECK: %[[ITER_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[K]], %[[C]]] [2, 4] -// CHECK: %[[UNPACK:.+]] = tensor.unpack -// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 4] -// CHECK-SAME: into %[[ITER_SLICE]] -// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]] -// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [2, 4] -// CHECK: scf.yield %[[RES]] -func.func @perfect_CKkc_to_KC(%source: tensor<32x4x2x4xf32>, %dest: tensor<8x128xf32>) -> tensor<8x128xf32> { - %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %dest : tensor<32x4x2x4xf32> -> tensor<8x128xf32> - return %0 : tensor<8x128xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - transform.yield - } -} - -// ----- - -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> (d0 floordiv 2)> -// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0) -> (d0 ceildiv 2)> -// CHECK: func.func @dynamic_perfect_CKkc_to_KC -// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: -// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[DIM_0:.+]] = tensor.dim %[[OUT]], %[[C0]] -// CHECK-DAG: %[[DIM_1:.+]] = tensor.dim %[[OUT]], %[[C1]] -// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[DIM_0]] step %[[C2]] -// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[DIM_1]] step %[[C4]] -// CHECK-DAG: %[[OUT_K_SZ:.+]] = affine.min #[[MAP0]](%[[K]])[%[[DIM_0]]] -// CHECK-DAG: %[[OUT_C_SZ:.+]] = affine.min #[[MAP1]](%[[C]])[%[[DIM_1]]] -// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP2]](%[[K]]) -// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP2]](%[[C]]) -// CHECK-DAG: %[[IN_C_SZ:.+]] = affine.apply #[[MAP3]](%[[OUT_C_SZ]]) -// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]] -// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [%[[IN_C_SZ]], 1, 2, 2] -// CHECK: %[[ITER_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[K]], %[[C]]] [%[[OUT_K_SZ]], %[[OUT_C_SZ]]] -// CHECK: %[[UNPACK:.+]] = tensor.unpack -// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 2] -// CHECK-SAME: into %[[ITER_SLICE]] -// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]] -// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [%[[OUT_K_SZ]], %[[OUT_C_SZ]]] -// CHECK: scf.yield %[[RES]] - -func.func @dynamic_perfect_CKkc_to_KC(%source: tensor, %dest: tensor) -> tensor { - %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %dest : tensor -> tensor - return %0 : tensor -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - transform.yield - } -} - -// ----- - -// CHECK: #[[MAP:.+]] = affine_map<(d0) -> (d0 floordiv 2)> -// CHECK: func.func @perfect_NKPQk_to_NPQK( -// CHECK-SAME: %[[SOURCE:.+]]: tensor<1x4x6x6x2xf32>, -// CHECK-SAME: %{{.+}}: tensor<1x6x6x8xf32>) -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index -// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK: %{{.+}} = scf.for %[[P:.+]] = %[[C0]] to %[[C6]] step %[[C1]] -// CHECK: %{{.+}} = scf.for %[[Q:.+]] = %[[C0]] to %[[C6]] step %[[C1]] -// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C8]] step %[[C4]] -// CHECK: %[[K_SZ:.+]] = affine.apply #[[MAP]](%[[K]]) -// CHECK: %[[SLICE_SOURCE:.+]] = tensor.extract_slice %[[SOURCE]][0, %[[K_SZ]], %[[P]], %[[Q]], 0] -// CHECK: %[[SLICE_DEST:.+]] = tensor.extract_slice %{{.+}}[0, %[[P]], %[[Q]], %[[K]]] -// CHECK: %[[UNPACK:.+]] = tensor.unpack -// CHECK-SAME: %[[SLICE_SOURCE]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] -// CHECK-SAME: into %[[SLICE_DEST]] -// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]] -// CHECK-SAME: into %{{.+}}[0, %[[P]], %[[Q]], %[[K]]] -// CHECK: scf.yield %[[RES]] - -func.func @perfect_NKPQk_to_NPQK(%source: tensor<1x4x6x6x2xf32>, %dest: tensor<1x6x6x8xf32>) -> tensor<1x6x6x8xf32> { - %0 = tensor.unpack %source outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %dest : tensor<1x4x6x6x2xf32> -> tensor<1x6x6x8xf32> - return %0 : tensor<1x6x6x8xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) - transform.yield - } -} - -// ----- - -func.func private @get_dynamic_tile_size() -> index - -// CHECK-LABEL: func.func @fully_dynamic_unpack -// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] -// CHECK-SAME: %[[DST:[0-9a-zA-Z]+]] -// CHECK: %[[INNER_TS:.+]] = call @get_dynamic_tile_size() : () -> index -// CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[DST]]) -// CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[SRC]] -// CHECK: %[[EMPTY:.+]] = tensor.empty -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[SLICE]] -// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [%[[INNER_TS]], %[[INNER_TS]]] into %[[EMPTY]] -func.func @fully_dynamic_unpack(%source: tensor, %dest: tensor) -> tensor { - %0 = func.call @get_dynamic_tile_size() : () -> index - %1 = tensor.unpack %source inner_dims_pos = [1, 0] inner_tiles = [%0, %0] into %dest : tensor -> tensor - return %1 : tensor -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [4, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - transform.yield - } -} - -// ----- - -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * 2)> -// CHECK: func.func @perfect_NPQK_to_NKPQk -// CHECK-SAME: %[[SOURCE:.+]]: tensor<1x6x6x8xf32>, -// CHECK-SAME: %{{.+}}: tensor<1x4x6x6x2xf32>) -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index -// CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index -// CHECK: %{{.+}} = scf.for %[[ARG2:.+]] = %[[C0]] to %[[C4]] step %[[C1]] -// CHECK: %{{.+}} = scf.for %[[ARG4:.+]] = %[[C0]] to %[[C6]] step %[[C1]] -// CHECK: %{{.+}} = scf.for %[[ARG6:.+]] = %[[C0]] to %[[C6]] step %[[C1]] -// CHECK: %[[APPLY:.+]] = affine.apply #[[MAP1]](%[[ARG2]]) -// CHECK: %[[SLICE_SOURCE:.+]] = tensor.extract_slice %[[SOURCE]][0, %[[ARG4]], %[[ARG6]], %[[APPLY]]] -// CHECK: %[[SLICE_DEST:.+]] = tensor.extract_slice %{{.+}}[0, %[[ARG2]], %[[ARG4]], %[[ARG6]], 0] -// CHECK: %[[PACK:.+]] = tensor.pack -// CHECK-SAME: %[[SLICE_SOURCE]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] -// CHECK-SAME: into %[[SLICE_DEST]] -// CHECK: %[[RES:.+]] = tensor.insert_slice %[[PACK]] -// CHECK-SAME: into %{{.+}}[0, %[[ARG2]], %[[ARG4]], %[[ARG6]], 0] -// CHECK: scf.yield %[[RES]] - -func.func @perfect_NPQK_to_NKPQk(%source: tensor<1x6x6x8xf32>, %dest: tensor<1x4x6x6x2xf32>) -> tensor<1x4x6x6x2xf32> { - %0 = tensor.pack %source outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %dest : tensor<1x6x6x8xf32> -> tensor<1x4x6x6x2xf32> - return %0 : tensor<1x4x6x6x2xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) - transform.yield - } -} diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/pack-scalable-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/pack-scalable-inner-tile.mlir index a0fd3f7d87083..bca94d4a64416 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/pack-scalable-inner-tile.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/pack-scalable-inner-tile.mlir @@ -22,7 +22,7 @@ // RUN: rm -f %t && %{compile} && %{run} | FileCheck %s -/// End-to-end test for tensor.pack where one of the inner tile sizes is +/// End-to-end test for linalg.pack where one of the inner tile sizes is /// scalable. func.func @main() { @@ -60,7 +60,7 @@ func.func private @pack(%A: tensor<7x16xi32>) { %A_pack_empty = tensor.empty(%c1, %tile_size) : tensor - %A_pack = tensor.pack %A + %A_pack = linalg.pack %A padding_value(%pad_val : i32) inner_dims_pos = [0, 1] inner_tiles = [%tile_size, 1] @@ -117,9 +117,9 @@ func.func private @pack(%A: tensor<7x16xi32>) { module @transforms attributes { transform.with_named_sequence } { transform.named_sequence @__transform_main(%module: !transform.any_op {transform.consume}) { - %pack = transform.structured.match ops{["tensor.pack"]} in %module : (!transform.any_op) -> !transform.any_op + %pack = transform.structured.match ops{["linalg.pack"]} in %module : (!transform.any_op) -> !transform.any_op - // 1. Tile so that we can decompose tensor.pack into tensor.pad and other + // 1. Tile so that we can decompose linalg.pack into tensor.pad and other // Ops (see step 2) %tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir index 15edae8b6d3f8..a8daa0b855d00 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir @@ -8,7 +8,7 @@ // RUN: rm -f %t && %{compile} && %{run} | FileCheck %s -/// End-to-end test for tensor.pack where one of the inner tile sizes is +/// End-to-end test for linalg.pack where one of the inner tile sizes is /// dynamic. func.func @main() { @@ -38,7 +38,7 @@ func.func private @pack(%A: tensor<7x16xi32>) { %tile_size = arith.constant 8 : index %A_pack_empty = tensor.empty(%c1, %tile_size) : tensor - %A_pack = tensor.pack %A + %A_pack = linalg.pack %A padding_value(%pad_val : i32) inner_dims_pos = [0, 1] inner_tiles = [%tile_size, 1] @@ -78,9 +78,9 @@ func.func private @pack(%A: tensor<7x16xi32>) { module @transforms attributes { transform.with_named_sequence } { transform.named_sequence @__transform_main(%module: !transform.any_op {transform.consume}) { - %pack = transform.structured.match ops{["tensor.pack"]} in %module : (!transform.any_op) -> !transform.any_op + %pack = transform.structured.match ops{["linalg.pack"]} in %module : (!transform.any_op) -> !transform.any_op - // 1. Tile so that we can decompose tensor.pack into tensor.pad and other + // 1. Tile so that we can decompose linalg.pack into tensor.pad and other // Ops (see step 2) %tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir index 63622d761bc5b..05e678227de32 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir @@ -12,9 +12,9 @@ /// End-to-end test for computing matrix-multiplication using linalg.mmt4d. In /// particular, demonstrates how the following MLIR sequence (implemented in @mmt4d): /// -/// A_pack = tensor.pack A -/// B_pack = tensor.pack B -/// C_pack = tensor.pack C +/// A_pack = linalg.pack A +/// B_pack = linalg.pack B +/// C_pack = linalg.pack C /// out_pack = linalg.mmt4d(A_pack, B_pack, C_pack) /// /// is equivalent to: @@ -86,16 +86,16 @@ func.func private @mmt4d(%A: tensor<7x16xi32>, %B: tensor<16x13xi32>, %C: tensor %C_pack_empty = tensor.empty() : tensor<2x2x8x8xi32> // Pack matrices - %A_pack = tensor.pack %A padding_value(%zero : i32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %A_pack_empty : tensor<7x16xi32> -> tensor<2x16x8x1xi32> - %B_pack = tensor.pack %B padding_value(%zero : i32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 1] into %B_pack_empty : tensor<16x13xi32> -> tensor<2x16x8x1xi32> - %C_pack = tensor.pack %C padding_value(%zero : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_pack_empty : tensor<7x13xi32> -> tensor<2x2x8x8xi32> + %A_pack = linalg.pack %A padding_value(%zero : i32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %A_pack_empty : tensor<7x16xi32> -> tensor<2x16x8x1xi32> + %B_pack = linalg.pack %B padding_value(%zero : i32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 1] into %B_pack_empty : tensor<16x13xi32> -> tensor<2x16x8x1xi32> + %C_pack = linalg.pack %C padding_value(%zero : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_pack_empty : tensor<7x13xi32> -> tensor<2x2x8x8xi32> // MMT4D %mmt4d = linalg.mmt4d ins(%A_pack, %B_pack : tensor<2x16x8x1xi32>, tensor<2x16x8x1xi32>) outs(%C_pack : tensor<2x2x8x8xi32>) -> tensor<2x2x8x8xi32> // Unpack output %C_out_empty = tensor.empty() : tensor<7x13xi32> - %C_out_unpack = tensor.unpack %mmt4d outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_out_empty : tensor<2x2x8x8xi32> -> tensor<7x13xi32> + %C_out_unpack = linalg.unpack %mmt4d outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_out_empty : tensor<2x2x8x8xi32> -> tensor<7x13xi32> return %C_out_unpack : tensor<7x13xi32> } @@ -146,16 +146,16 @@ module @transforms attributes { transform.with_named_sequence } { transform.apply_patterns.canonicalization } : !transform.op<"func.func"> - // Step 4. Lower tensor.pack - %pack = transform.structured.match ops{["tensor.pack"]} in %func_h - : (!transform.op<"func.func">) -> !transform.op<"tensor.pack"> - transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">) + // Step 4. Lower linalg.pack + %pack = transform.structured.match ops{["linalg.pack"]} in %func_h + : (!transform.op<"func.func">) -> !transform.op<"linalg.pack"> + transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">) -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">) - // Step 5. Lower tensor.unpack - %unpack = transform.structured.match ops{["tensor.unpack"]} in %func_h - : (!transform.op<"func.func">) -> !transform.op<"tensor.unpack"> - transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">) + // Step 5. Lower linalg.unpack + %unpack = transform.structured.match ops{["linalg.unpack"]} in %func_h + : (!transform.op<"func.func">) -> !transform.op<"linalg.unpack"> + transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">) -> (!transform.op<"tensor.empty">, !transform.op<"linalg.transpose">, !transform.op<"tensor.collapse_shape">, diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir index 4395dfe74914e..c5360ee1ec954 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir @@ -8,7 +8,7 @@ // RUN: rm -f %t && %{compile} && %{run} | FileCheck %s -/// End-to-end test for tensor.unpack where one of the inner tile sizes is +/// End-to-end test for linalg.unpack where one of the inner tile sizes is /// dynamic. func.func @main() { @@ -56,7 +56,7 @@ func.func private @unpack(%A: tensor) { %tile_size = arith.constant 8 : index %A_unpack_empty = tensor.empty() : tensor<7x3xi32> - %A_unpack = tensor.unpack %A + %A_unpack = linalg.unpack %A inner_dims_pos = [0, 1] inner_tiles = [%tile_size, 1] into %A_unpack_empty : tensor -> tensor<7x3xi32> @@ -78,9 +78,9 @@ func.func private @unpack(%A: tensor) { module @transforms attributes { transform.with_named_sequence } { transform.named_sequence @__transform_main(%module: !transform.any_op {transform.consume}) { - %pack = transform.structured.match ops{["tensor.unpack"]} in %module : (!transform.any_op) -> !transform.any_op + %pack = transform.structured.match ops{["linalg.unpack"]} in %module : (!transform.any_op) -> !transform.any_op - // 1. Tile so that we can decompose tensor.pack + // 1. Tile so that we can decompose linalg.pack // Ops (see step 2) %c8 = transform.param.constant 8 : i64 -> !transform.param %tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [%c8, 1] diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir index 2d35be403ef99..8ce05d94c4ad0 100644 --- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir @@ -211,7 +211,7 @@ module { linalg.yield %7, %8 : f32, f32 } -> (tensor<64x64xf32>, tensor<64x64xf32>) %5 = tensor.empty() : tensor<2048xf32> - %unpack = tensor.unpack %0#0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %5 : tensor<64x32xf32> -> tensor<2048xf32> + %unpack = linalg.unpack %0#0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %5 : tensor<64x32xf32> -> tensor<2048xf32> return %4#1, %unpack : tensor<64x64xf32>, tensor<2048xf32> } } @@ -254,7 +254,7 @@ module attributes {transform.with_named_sequence} { // CHECK: tensor.parallel_insert_slice %[[ELEM_OUT]]#1 into %[[ELEM_OUT_ARG_1]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1] // CHECK: } // CHECK: } -// CHECK: %[[UNPACK:.*]] = tensor.unpack %[[FINAL_RESULT]]#0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %{{.*}} : tensor<64x32xf32> -> tensor<2048xf32> +// CHECK: %[[UNPACK:.*]] = linalg.unpack %[[FINAL_RESULT]]#0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %{{.*}} : tensor<64x32xf32> -> tensor<2048xf32> // CHECK: return %[[FINAL_RESULT]]#3, %[[UNPACK]] : // ----- @@ -278,7 +278,7 @@ module { } } %output = tensor.empty() : tensor<2048xf32> - %unpack = tensor.unpack %1 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %output : tensor<64x32xf32> -> tensor<2048xf32> + %unpack = linalg.unpack %1 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %output : tensor<64x32xf32> -> tensor<2048xf32> return %unpack : tensor<2048xf32> } } @@ -308,7 +308,7 @@ module attributes {transform.with_named_sequence} { // CHECK-DAG: %[[UNPACK_RESULT_OFFSET:.*]] = affine.apply #[[UNPACK_RESULT_OFFSET_MAP]](%[[IV1]]) // CHECK-DAG: %[[UNPACK_RESULT_SIZE:.*]] = affine.min #[[UNPACK_RESULT_SIZE_MAP]](%[[IV1]]) // CHECK: %[[TILED_UNPACK_DEST:.*]] = tensor.extract_slice %[[UNPACK_OUT_ARG]][%[[UNPACK_RESULT_OFFSET]]] [%[[UNPACK_RESULT_SIZE]]] [1] -// CHECK: %[[TILED_UNPACK_OUT:.*]] = tensor.unpack %[[GENERIC_OUT]] +// CHECK: %[[TILED_UNPACK_OUT:.*]] = linalg.unpack %[[GENERIC_OUT]] // CHECK-SAME: outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] // CHECK-SAME: into %[[TILED_UNPACK_DEST]] // CHECK: scf.forall.in_parallel { @@ -339,7 +339,7 @@ module { } } %output = tensor.empty() : tensor<2047xf32> - %unpack = tensor.unpack %1 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %output : tensor<64x32xf32> -> tensor<2047xf32> + %unpack = linalg.unpack %1 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %output : tensor<64x32xf32> -> tensor<2047xf32> return %unpack : tensor<2047xf32> } } @@ -369,7 +369,7 @@ module attributes {transform.with_named_sequence} { // CHECK-DAG: %[[UNPACK_RESULT_OFFSET:.*]] = affine.apply #[[UNPACK_RESULT_OFFSET_MAP]](%[[IV1]]) // CHECK-DAG: %[[UNPACK_RESULT_SIZE:.*]] = affine.min #[[UNPACK_RESULT_SIZE_MAP]](%[[IV1]]) // CHECK: %[[TILED_UNPACK_DEST:.*]] = tensor.extract_slice %[[UNPACK_OUT_ARG]][%[[UNPACK_RESULT_OFFSET]]] [%[[UNPACK_RESULT_SIZE]]] [1] -// CHECK: %[[TILED_UNPACK_OUT:.*]] = tensor.unpack %[[GENERIC_OUT]] +// CHECK: %[[TILED_UNPACK_OUT:.*]] = linalg.unpack %[[GENERIC_OUT]] // CHECK-SAME: outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] // CHECK-SAME: into %[[TILED_UNPACK_DEST]] // CHECK: scf.forall.in_parallel { @@ -400,7 +400,7 @@ module { } } %output = tensor.empty() : tensor<4x32x16xf32> - %pack = tensor.pack %1 inner_dims_pos = [0] inner_tiles = [16] into %output : tensor<64x32xf32> -> tensor<4x32x16xf32> + %pack = linalg.pack %1 inner_dims_pos = [0] inner_tiles = [16] into %output : tensor<64x32xf32> -> tensor<4x32x16xf32> return %pack : tensor<4x32x16xf32> } } @@ -428,7 +428,7 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: outs(%[[GENERIC_OUT_SLICE]] : // CHECK: %[[PACK_RESULT_OFFSET:.*]] = affine.apply #[[PACK_RESULT_MAP]](%[[IV1]]) // CHECK: %[[TILED_PACK_DEST:.*]] = tensor.extract_slice %[[PACK_OUT_ARG]][%[[PACK_RESULT_OFFSET]], %[[IV2]], 0] [2, 32, 16] [1, 1, 1] -// CHECK: %[[TILED_PACK_OUT:.*]] = tensor.pack %[[GENERIC_OUT]] +// CHECK: %[[TILED_PACK_OUT:.*]] = linalg.pack %[[GENERIC_OUT]] // CHECK-SAME: inner_dims_pos = [0] inner_tiles = [16] // CHECK-SAME: into %[[TILED_PACK_DEST]] // CHECK: scf.forall.in_parallel { diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir index 5f7663af773a4..bc27840fdf5e9 100644 --- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir @@ -591,7 +591,7 @@ module attributes {transform.with_named_sequence} { // ----- func.func @imperfect_unpack_producer_fusion(%source: tensor<1x1x288x8x4xf32>, %dest: tensor<1x2x1152xf32>) -> tensor<1x2x1152xf32> { - %0 = tensor.unpack %source + %0 = linalg.unpack %source outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %dest @@ -625,7 +625,7 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[ARG1:.+]]: tensor<1x2x1152xf32> // CHECK: %[[FOR_RESULT:.+]] = scf.for{{.*}}iter_args(%[[ITER_ARG:.+]] = {{.*}}) // CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]] -// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[SLICE]] +// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[SLICE]] // CHECK-DAG: %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]] // CHECK-DAG: %[[INIT_SLICE:.+]] = tensor.extract_slice %[[ITER_ARG]] // CHECK: %[[GENERIC:.+]] = linalg.generic diff --git a/mlir/test/Transforms/loop-invariant-code-motion.mlir b/mlir/test/Transforms/loop-invariant-code-motion.mlir index 5133c14414c97..c1604e226a334 100644 --- a/mlir/test/Transforms/loop-invariant-code-motion.mlir +++ b/mlir/test/Transforms/loop-invariant-code-motion.mlir @@ -1163,18 +1163,18 @@ func.func @speculate_ceildivsi_range( func.func @speculate_static_pack_and_unpack(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>, %lb: index, %ub: index, %step: index) { - // CHECK: tensor.pack + // CHECK: linalg.pack // CHECK-NEXT: scf.for scf.for %i = %lb to %ub step %step { - %packed = tensor.pack %source + %packed = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32> } - // CHECK: tensor.unpack + // CHECK: linalg.unpack // CHECK-NEXT: scf.for scf.for %i = %lb to %ub step %step { - %unpacked = tensor.unpack %dest + %unpacked = linalg.unpack %dest inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32> } @@ -1188,25 +1188,25 @@ func.func @speculate_dynamic_pack_and_unpack(%source: tensor, %tile_m: index, %tile_n: index, %pad: f32) { // CHECK: scf.for - // CHECK-NEXT: tensor.pack + // CHECK-NEXT: linalg.pack scf.for %i = %lb to %ub step %step { - %packed = tensor.pack %source + %packed = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor -> tensor } // CHECK: scf.for - // CHECK-NEXT: tensor.unpack + // CHECK-NEXT: linalg.unpack scf.for %i = %lb to %ub step %step { - %unpacked = tensor.unpack %dest + %unpacked = linalg.unpack %dest inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %source : tensor -> tensor } - // CHECK: tensor.pack + // CHECK: linalg.pack // CHECK-NEXT: scf.for scf.for %i = %lb to %ub step %step { - %packed = tensor.pack %source padding_value(%pad : f32) + %packed = linalg.pack %source padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor -> tensor } diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp index fa2a27dcfa991..046b9a65f3359 100644 --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -74,8 +74,9 @@ struct TestLinalgTransforms *this, "test-decompose-pad-tensor", llvm::cl::desc("Test transform pad tensor by copying with generic ops"), llvm::cl::init(false)}; + // TODO: This is not used - delete. Option testDecomposeTensorPackOp{ - *this, "test-decompose-tensor-pack", + *this, "test-decompose-linalg-pack", llvm::cl::desc("Test transform that generalizes pack ops into a sequence " "of tensor and Linalg ops"), llvm::cl::init(false)}; @@ -130,6 +131,14 @@ struct TestLinalgTransforms Option testDecomposeWinogradOps{ *this, "test-decompose-winograd-ops", llvm::cl::desc("Test decompose Winograd ops"), llvm::cl::init(false)}; + Option testFoldIntoPackAndUnpack{ + *this, "test-fold-into-pack-and-unpack", + llvm::cl::desc("Test folding ops into linalg.pack and linalg.unpack"), + llvm::cl::init(false)}; + Option testSimplifyPackUnpackPatterns{ + *this, "test-simplify-pack-unpack-patterns", + llvm::cl::desc("Test patterns to simplify linalg.pack and linalg.unpack"), + llvm::cl::init(false)}; }; } // namespace @@ -227,6 +236,18 @@ static void applyDecomposeWinogradOps(func::FuncOp funcOp) { (void)applyPatternsGreedily(funcOp, std::move(patterns)); } +static void applyFoldIntoPackAndUnpackPatterns(Operation *rootOp) { + RewritePatternSet patterns(rootOp->getContext()); + linalg::populateFoldIntoPackAndUnpackPatterns(patterns); + (void)applyPatternsGreedily(rootOp, std::move(patterns)); +} + +static void applySimplifyPackUnpackPatterns(Operation *rootOp) { + RewritePatternSet patterns(rootOp->getContext()); + linalg::populateSimplifyPackAndUnpackPatterns(patterns); + (void)applyPatternsGreedily(rootOp, std::move(patterns)); +} + /// Apply transformations specified as patterns. void TestLinalgTransforms::runOnOperation() { if (testPatterns) @@ -255,6 +276,11 @@ void TestLinalgTransforms::runOnOperation() { return applyWinogradConv2D(getOperation()); if (testDecomposeWinogradOps) return applyDecomposeWinogradOps(getOperation()); + Operation *rootOp = getOperation(); + if (testFoldIntoPackAndUnpack) + applyFoldIntoPackAndUnpackPatterns(rootOp); + if (testSimplifyPackUnpackPatterns) + applySimplifyPackUnpackPatterns(rootOp); } namespace mlir { diff --git a/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp b/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp index 173bfd8955f2b..e435130c2a417 100644 --- a/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp +++ b/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp @@ -77,11 +77,6 @@ struct TestTensorTransforms llvm::cl::desc("Test folding of expand_shape/collapse_shape"), llvm::cl::init(false)}; - Option testFoldIntoPackAndUnpack{ - *this, "test-fold-into-pack-and-unpack", - llvm::cl::desc("Test folding ops into tensor.pack and tensor.unpack"), - llvm::cl::init(false)}; - Option useForeach{ *this, "use-foreach", llvm::cl::desc( @@ -89,11 +84,6 @@ struct TestTensorTransforms "the extract_slice of collapse_shape pattern"), llvm::cl::init(false)}; - Option testSimplifyPackUnpackPatterns{ - *this, "test-simplify-pack-unpack-patterns", - llvm::cl::desc("Test patterns to simplify tensor.pack and tensor.unpack"), - llvm::cl::init(false)}; - Option testTrackingListener{ *this, "test-tracking-listener", llvm::cl::desc("Test tensor TrackingListener for the transform dialect"), @@ -113,12 +103,6 @@ static void applyBubbleUpExpandShapePatterns(Operation *rootOp) { (void)applyPatternsGreedily(rootOp, std::move(patterns)); } -static void applyFoldIntoPackAndUnpackPatterns(Operation *rootOp) { - RewritePatternSet patterns(rootOp->getContext()); - tensor::populateFoldIntoPackAndUnpackPatterns(patterns); - (void)applyPatternsGreedily(rootOp, std::move(patterns)); -} - static void applyFoldConstantExtractSlicePatterns(Operation *rootOp) { RewritePatternSet patterns(rootOp->getContext()); tensor::ControlConstantExtractSliceFusionFn controlFn = @@ -148,12 +132,6 @@ applyDropRedundantInsertSliceRankExpansionPatterns(Operation *rootOp) { (void)applyPatternsGreedily(rootOp, std::move(patterns)); } -static void applySimplifyPackUnpackPatterns(Operation *rootOp) { - RewritePatternSet patterns(rootOp->getContext()); - tensor::populateSimplifyPackAndUnpackPatterns(patterns); - (void)applyPatternsGreedily(rootOp, std::move(patterns)); -} - namespace { /// Base pattern to rewrite a `tensor.collapse_shape -> tensor.extract_slice`. /// The `tensor.extract_slice` is replaced by a loop or gather operation that @@ -387,8 +365,6 @@ static LogicalResult testTrackingListenerReplacements(Operation *rootOp) { void TestTensorTransforms::runOnOperation() { Operation *rootOp = getOperation(); - if (testSimplifyPackUnpackPatterns) - applySimplifyPackUnpackPatterns(rootOp); if (testFoldConstantExtractSlice) applyFoldConstantExtractSlicePatterns(rootOp); if (testFoldConsecutiveInsertExtractSlice) @@ -399,8 +375,6 @@ void TestTensorTransforms::runOnOperation() { applyReassociativeReshapeFoldingPatterns(rootOp); if (testBubbleUpExpandShapePatterns) applyBubbleUpExpandShapePatterns(rootOp); - if (testFoldIntoPackAndUnpack) - applyFoldIntoPackAndUnpackPatterns(rootOp); if (testRewriteExtractSliceWithTiledCollapseShape) { if (failed( applyRewriteExtractFromCollapseShapePatterns(rootOp, useForeach)))