diff --git a/mlir/include/mlir/Dialect/Tensor/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/Tensor/IR/CMakeLists.txt index 74a05291376b3..cd14fe5c04561 100644 --- a/mlir/include/mlir/Dialect/Tensor/IR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Tensor/IR/CMakeLists.txt @@ -1,8 +1,2 @@ add_mlir_dialect(TensorOps tensor) add_mlir_doc(TensorOps TensorOps Dialects/ -gen-dialect-doc) - -set(LLVM_TARGET_DEFINITIONS TensorInterfaces.td) -mlir_tablegen(TensorInterfaces.h.inc -gen-op-interface-decls) -mlir_tablegen(TensorInterfaces.cpp.inc -gen-op-interface-defs) -add_public_tablegen_target(MLIRTensorInterfacesIncGen) -add_dependencies(mlir-headers MLIRTensorInterfacesIncGen) diff --git a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h index b3ec796a72337..eb550bb469b9f 100644 --- a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h +++ b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h @@ -46,12 +46,6 @@ SmallVector getOrCreateRanges(OffsetSizeAndStrideOpInterface op, #include "mlir/Dialect/Tensor/IR/TensorOpsDialect.h.inc" -//===----------------------------------------------------------------------===// -// Tensor Interfaces -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/Tensor/IR/TensorInterfaces.h.inc" - //===----------------------------------------------------------------------===// // Tensor Dialect Operations //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorInterfaces.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorInterfaces.td deleted file mode 100644 index 522a9c56f3c92..0000000000000 --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorInterfaces.td +++ /dev/null @@ -1,33 +0,0 @@ -//===- TensorInterfaces.td - Tensor Interfaces Declaration -*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is the definition file for the structured interface sfor Tensor ops. -// -//===----------------------------------------------------------------------===// - -#ifndef TENSOR_IR_TENSORINTERFACES -#define TENSOR_IR_TENSORINTERFACES - -include "mlir/Interfaces/DestinationStyleOpInterface.td" -include "mlir/IR/OpBase.td" - -// TODO: To be moved to LinalgInterfaces.td, see: -// * https://github.com/llvm/llvm-project/pull/123902 -// * https://discourse.llvm.org/t/rfc-move-tensor-pack-and-tensor-unpack-into-linalg/ -def TensorRelayoutOpInterface : OpInterface<"RelayoutOpInterface"> { - let description = [{ - A Tensor (soon to be Linalg) relayout-op is either tensor.pack or - tensor.unpack. - - While we could extend this interface with methods from Tensor_RelayoutOp, - this is currently not needed and left as a TODO. - }]; - let cppNamespace = "::mlir::tensor"; -} - -#endif // TENSOR_IR_TENSORINTERFACES diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td index f6927f5ebcfb8..35d0b16628417 100644 --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -10,7 +10,6 @@ #define TENSOR_OPS include "mlir/Dialect/Tensor/IR/TensorBase.td" -include "mlir/Dialect/Tensor/IR/TensorInterfaces.td" include "mlir/Interfaces/CastInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/DestinationStyleOpInterface.td" @@ -1824,315 +1823,6 @@ def Tensor_SplatOp : Tensor_Op<"splat", [ let hasVerifier = 1; } -//===----------------------------------------------------------------------===// -// RelayoutOp -//===----------------------------------------------------------------------===// - -class Tensor_RelayoutOp traits = []> : - Tensor_Op, - DestinationStyleOpInterface, - ConditionallySpeculatable, NoMemoryEffect, - DeclareOpInterfaceMethods, - TensorRelayoutOpInterface, - TypesMatchWith<"result type matches type of dest", - "dest", "result", - "$_self">])> { - - code commonExtraClassDeclaration = [{ - size_t getSourceRank() { return getSourceType().getRank(); }; - size_t getDestRank() { return getDestType().getRank(); }; - RankedTensorType getSourceType() { - return ::llvm::cast(getSource().getType()); }; - RankedTensorType getDestType() { - return ::llvm::cast(getDest().getType()); }; - - MutableOperandRange getDpsInitsMutable() { return getDestMutable(); } - - /// Interface method for ConditionallySpeculatable. - Speculation::Speculatability getSpeculatability(); - - /// Return a mapping from positions `inner_dims_pos` to their - /// tile factors. - DenseMap getDimAndTileMapping(); - - /// Return the tile sizes as OpFoldResult. - SmallVector getMixedTiles(); - - /// Return the tile sizes as `int64_t`. If a tile size is dynamic - /// a sentinel `kDynamic` is introduced at that position in - /// the returned vector. - SmallVector getStaticTiles(); - - /// Retrieve all outer dims for this Pack/UnPack Op, i.e. all the leading - /// dims excluding the trailing dims corresponding to `innerTiles`. Note - /// that this will include both tiled and non-tiled dimensions. The order - /// of the output dimensions is consistent with the shape of the packed - /// tensor. - ArrayRef getAllOuterDims(); - - /// Similar to `getAllOuterDims`, but only retrieve the outer dims that - /// have been tiled. Also, the order of the output dimensions is consistent - /// with `inner_dims_pos` rather than the packed tensor. - SmallVector getTiledOuterDims(); - }]; - - let hasVerifier = 1; -} - -//===----------------------------------------------------------------------===// -// PackOp -//===----------------------------------------------------------------------===// - -def Tensor_PackOp : Tensor_RelayoutOp<"pack", [ - AttrSizedOperandSegments]> { - let summary = "tensor pack operation"; - let description = [{ - The "pack" operation converts a source tensor of rank `n` into a result - tensor of rank `n + k` with a tiled and packed layout (maybe with padding) - and optionally transposes the tiled source tensor dimensions. - - `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are - being tiled, where `0 < k <= n`. The order of the dimensions matters: - - The tiled dimensions (of size `inner_tiles`) are added to the end of the result - tensor in the order in which they appear in `inner_dims_pos`. - - `inner_dims_pos[i]` specifies the source tensor dimension tiled by - `inner_tiles[i]`. - - `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes - correspond to the least significant ("inner") result tensor dimension sizes, - in the same order. Tile sizes can be static or dynamic. - - Example: If `inner_tiles = [16, 32]`, the result tensor has a shape of - `...x16x32`. If `inner_dims_pos = [0, 1]`, the 0th source dimension is tiled - by 16 and the 1st source dimension is tiled by 32. Other source dimensions - (if any) are not tiled. If `inner_dims_pos = [1, 0]`, the 1st dimension is - tiled by 16 and the 0th dimension is tiled by 32. - - Example: - ```mlir - // NC to NCnc - %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32] - into %dest : tensor<128x256xf32> -> tensor<16x8 x 8x32 xf32> - // \ / \ / - // outer dims inner dims - ``` - - `outer_dims_perm` (optional) specifies a permutation for the outer - dimensions. If specified, it must have `n` elements. - - Example: - ```mlir - // CK to KCck - %0 = tensor.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] - inner_tiles = [8, 32] into %dest - : tensor<128x256xf32> -> tensor<8x16 x 8x32 xf32> - // \ / - // compare with "NC to NCnc": outer dims are transposed - ``` - - `padding_value` specifies a padding value at the boundary on non-perfectly - divisible dimensions. Padding is optional: - - If absent, it is UB if the tile does not perfectly divide the dimension. - - If present, it will pad along high dimensions (high-padding) to make the - tile complete. - - Example: - ```mlir - %0 = tensor.pack %arg0 padding_value(%pad : f32) outer_dims_perm = [2, 1, 0] - inner_dims_pos = [1] inner_tiles = [2] into %arg1 - : tensor<200x127x256xf32> -> tensor<256x64x200x2xf32> - // \ - // padded and tiled dim - // - // Source dimension 1 is tiled. 64 does not divide 127 evenly, so 1 padded - // element is added at the end. - // - // Note: Only tiled dimensions can be padded. - ``` - }]; - let arguments = (ins AnyRankedTensor:$source, - AnyRankedTensor:$dest, - Optional:$padding_value, - DefaultValuedOptionalAttr:$outer_dims_perm, - DenseI64ArrayAttr:$inner_dims_pos, - Variadic:$inner_tiles, - DenseI64ArrayAttr:$static_inner_tiles); - let results = (outs AnyRankedTensor:$result); - let assemblyFormat = [{ - $source - (`padding_value` `(` $padding_value^ `:` type($padding_value) `)`)? - (`outer_dims_perm` `=` $outer_dims_perm^)? - `inner_dims_pos` `=` $inner_dims_pos - `inner_tiles` `=` - custom($inner_tiles, $static_inner_tiles) - `into` $dest attr-dict `:` type($source) `->` type($dest) - }]; - - let builders = [ - OpBuilder<(ins "Value":$source, "Value":$dest, - "ArrayRef":$innerDimsPos, - "ArrayRef":$innerTiles, - CArg<"std::optional", "std::nullopt">:$paddingValue, - CArg<"ArrayRef", "{}">:$outerDimsPerm)> - ]; - - let extraClassDeclaration = commonExtraClassDeclaration # [{ - // Method to get the shape of the result as `SmallVector`. - // This is a static method to allow getting the shape of the destination - // expected while creating a `pack` op. - static SmallVector getResultShape(OpBuilder &builder, - Location loc, ArrayRef sourceDims, - ArrayRef innerTileDims, ArrayRef innerDimsPos, - ArrayRef outerDimsPerm = {}); - - // Method to get the `RankedTensorType` of the result based on the inner - // tiles, position of the inner tiles (innerDimsPos) and interchange vector - // of outer loops (outerDimsPerm). - static RankedTensorType inferPackedType(RankedTensorType sourceType, - ArrayRef innerTileSizes, ArrayRef innerDimsPos, - ArrayRef outerDimsPerm = {}); - - // Returns true if we have enough static information to catch undefined - // behavior when the tile size does not divide perfectly the dimension of - // the input tensor. Detecting UB requires that the input size and either - // corresponding tile or output size are static. - static bool requirePaddingValue(ArrayRef inputShape, - ArrayRef innerDimsPos, - ArrayRef outputShape, - ArrayRef outerDimsPerm, - ArrayRef innerTiles); - - static Value createDestinationTensor(OpBuilder &b, Location loc, - Value source, ArrayRef innerTileSizes, - ArrayRef innerDimsPos, ArrayRef outerDimsPerm); - - /// Build and return a new PackOp that is a clone of the current PackOp with - /// (innerDimsPos, innerTiles) (resp. outerDimsPerm) are permuted by - /// innerPermutation (resp. outerPermutation). - /// A new `tensor.empty` of the proper shape is built in the process. - /// Asserts that: - /// - At least one of innerPermutation or outerPermutation is non-empty. - /// - If not empty, innerPermutation is a valid permutation of size - /// matching innerDimPos. - /// - If not empty, outerPermutation is a valid permutation of size - /// matching outerDimsPerm. - PackOp createTransposedClone(OpBuilder &b, - Location loc, - ArrayRef innerPermutation, - ArrayRef outerPermutation); - - /// Check if this PackOp is like a simple pad operation. - /// In other words, this operation: - /// 1. adds useless dimensions (dimension of size 1), - /// 2. pads the other ones, and - /// 3. doesn't shuffle the dimensions - bool isLikePad(); - }]; - - let hasCanonicalizeMethod = 1; - - let hasFolder = 1; -} - -//===----------------------------------------------------------------------===// -// UnPackOp -//===----------------------------------------------------------------------===// - -def Tensor_UnPackOp : Tensor_RelayoutOp<"unpack"> { - let summary = "tensor unpack operation"; - let description = [{ - The "unpack" operation converts a source tensor of rank `n` with a tiled and - packed layout to a result tensor of rank `n - k`. - - `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions with - which the last `k` source tensor dimensions are combined, where - `0 < k <= n/2`. Each `inner_dims_pos` element must be `>= 0` and `< n - k`. - The order of the dimensions in `inner_dims_pos` matters: dimension - `inner_dims_pos[i]` is combined with dimension `n - k + i` (assuming that - `outer_dims_perm` is not specified). - - `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes - correspond to the least significant ("inner") source tensor dimension sizes. - The behavior of this op is undefined if: - - `inner_tiles` do not exactly match with the corresponding source tensor - dimension sizes. - - Or, `inner_tiles[i]` does not divide the size of dimension - `inner_dims_pos[i]` (assuming that `outer_dims_perm` is not specified) - evenly. - - `outer_dims_perm` (optional) specifies a permutation for the outer - dimensions. If specified, it must have `n - k` elements. If specified, this - permutation is applied before combining any dimensions. - - Example: - - ```mlir - // NCnc to NC: - %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32] - into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32> - - // CK to KCck: - %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] - inner_tiles = [8, 32] into %dest - : tensor<8x16x8x32xf32> -> tensor<128x256xf32> - ``` - }]; - let arguments = (ins AnyRankedTensor:$source, - AnyRankedTensor:$dest, - DefaultValuedOptionalAttr:$outer_dims_perm, - DenseI64ArrayAttr:$inner_dims_pos, - Variadic:$inner_tiles, - DenseI64ArrayAttr:$static_inner_tiles); - let results = (outs AnyRankedTensor:$result); - let assemblyFormat = [{ - $source - (`outer_dims_perm` `=` $outer_dims_perm^)? - `inner_dims_pos` `=` $inner_dims_pos - `inner_tiles` `=` - custom($inner_tiles, $static_inner_tiles) - `into` $dest attr-dict `:` type($source) `->` type($dest) - }]; - - let builders = [ - OpBuilder<(ins "Value":$source, "Value":$dest, - "ArrayRef":$innerDimsPos, - "ArrayRef":$innerTiles, - CArg<"ArrayRef", "{}">:$outerDimsPerm)> - ]; - - let extraClassDeclaration = commonExtraClassDeclaration # [{ - static Value createDestinationTensor(OpBuilder &b, Location loc, - Value source, ArrayRef innerTileSizes, - ArrayRef innerDimsPos, ArrayRef outerDimsPerm); - - /// Build and return a new UnPackOp that is a clone of the current UnPackOp - /// with (innerDimsPos, innerTiles) (resp. outerDimsPerm) are permuted by - /// innerPermutation (resp. outerPermutation). - /// Asserts that: - /// - At least one of innerPermutation or outerPermutation is non-empty. - /// - If not empty, innerPermutation is a valid permutation of size - /// matching innerDimPos. - /// - If not empty, outerPermutation is a valid permutation of size - /// matching outerDimsPerm. - UnPackOp createTransposedClone(OpBuilder &b, - Location loc, - Value transposedSource, - ArrayRef innerPermutation, - ArrayRef outerPermutation); - - /// Check if this UnPackOp is like a simple unpad operation. - /// In other words, this operation: - /// 1. drops useless dimensions (dimension of size 1), and - /// 2. reduces dimensions in place (i.e., no transpose.) - bool isLikeUnPad(); - }]; - - let hasCanonicalizeMethod = 1; - - let hasFolder = 1; -} - //===----------------------------------------------------------------------===// // YieldOp //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h index ed1ec1e871482..83cc665b5a4fb 100644 --- a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h @@ -42,25 +42,6 @@ FailureOr computeTransposedType(RankedTensorType rankedTensorType, ArrayRef transposeVector); -/// Shell function to compute the Destination Permutation of PackOp -/// This function uses the helper function `computePackUnPackPerm` to get -/// the permutation vector. Only major difference between UnPack and Pack is -/// that packOp uses destination rank whereas unpack Uses source rank. -SmallVector getPackInverseDestPerm(tensor::PackOp packOp); - -/// Shell function to compute the Source Permutation of unPackOp. -/// This function, like the getPackInverseDestPerm uses the helper function -/// computePackUnPackPerm` to get the permutation vector. -/// Only major difference between UnPack and Pack is that packOp uses -/// destination rank whereas unpack Uses source rank. -SmallVector getUnPackInverseSrcPerm(tensor::UnPackOp unpackOp); - -/// Shell function to compute the Source rank permutation for unpackOp -/// Unpack requires some packing metadata data information, so created -/// another function where this value is passed by reference. -SmallVector getUnPackInverseSrcPerm(tensor::UnPackOp, - PackingMetadata &metadata); - /// A tensor.insert_slice is a cast-like operation if it merely rank-extends the /// source tensor or inserts the source tensor into a destination tensor with /// the same shape. diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 8188a000b09ff..b756a67f3ba7a 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -4061,8 +4061,6 @@ Speculation::Speculatability BatchMatmulOp::getSpeculatability() { // * the corresponding size from mixedTiles is still dynamic. // Otherwise, the original tile size is preserved. // Note - packed-type-dim and mixed-tile-size should always match! -// -// FIXME: Duplicates similar hook from TensorOps.cpp! static SmallVector getNewMixedTileSizes(PatternRewriter &rewriter, Type newPackedTy, SmallVector mixedTiles) { diff --git a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt index d9d09d6361a2f..5425615dac393 100644 --- a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt @@ -16,7 +16,6 @@ add_mlir_dialect_library(MLIRTensorDialect DEPENDS MLIRTensorOpsIncGen - MLIRTensorInterfacesIncGen LINK_LIBS PUBLIC MLIRAffineDialect diff --git a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp index 002077753b132..8af087cbf0f61 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp @@ -63,7 +63,7 @@ void TensorDialect::initialize() { declarePromisedInterfaces(); declarePromisedInterface(); - declarePromisedInterfaces(); + declarePromisedInterfaces(); declarePromisedInterfaces(); } diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index 00724121d373c..e741144647043 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -3873,916 +3873,6 @@ OpFoldResult SplatOp::fold(FoldAdaptor adaptor) { return SplatElementsAttr::get(getType(), {constOperand}); } -//===----------------------------------------------------------------------===// -// PackOp/UnPackOp Common -//===----------------------------------------------------------------------===// - -template -static LogicalResult -reifyResultShapesImpl(OpTy op, OpBuilder &builder, - ReifiedRankedShapedTypeDims &reifiedReturnShapes) { - static_assert(llvm::is_one_of::value, - "applies to only pack or unpack operations"); - int64_t destRank = op.getDestRank(); - reifiedReturnShapes.resize(1, SmallVector(destRank)); - reifiedReturnShapes[0] = - tensor::getMixedSizes(builder, op.getLoc(), op.getDest()); - return success(); -} - -template -static DenseMap getDimAndTileMappingImpl(OpTy op) { - static_assert(llvm::is_one_of::value, - "applies to only pack or unpack operations"); - DenseMap dimAndTileMapping; - ArrayRef dimsToTile = op.getInnerDimsPos(); - SmallVector tiles = op.getMixedTiles(); - assert(tiles.size() == dimsToTile.size() && - "tiles must match indices of dimension to block"); - // bind the dimension `i` with the tile factor. - for (auto i : llvm::seq(0, dimsToTile.size())) - dimAndTileMapping[dimsToTile[i]] = tiles[i]; - return dimAndTileMapping; -} - -template -static SmallVector getMixedTilesImpl(OpTy op) { - static_assert(llvm::is_one_of::value, - "applies to only pack or unpack operations"); - Builder builder(op); - SmallVector mixedInnerTiles; - unsigned dynamicValIndex = 0; - for (int64_t staticTile : op.getStaticInnerTiles()) { - if (!ShapedType::isDynamic(staticTile)) - mixedInnerTiles.push_back(builder.getI64IntegerAttr(staticTile)); - else - mixedInnerTiles.push_back(op.getInnerTiles()[dynamicValIndex++]); - } - return mixedInnerTiles; -} - -template -static SmallVector getStaticTilesImpl(OpTy op) { - static_assert(llvm::is_one_of::value, - "applies to only pack or unpack operations"); - SmallVector dynamicTiles; - SmallVector staticTiles; - dispatchIndexOpFoldResults(op.getMixedTiles(), dynamicTiles, staticTiles); - return staticTiles; -} - -/// Returns true if `dimsPos` is invalid. It is invalid when: -/// a) It contains duplicate. -/// b) At least one dimension is out of bound (`dimPos` is >= 0 and < rank). -/// c) The number of elements in `dimsPos` is > than `rank`. -static bool isInvalidPackingPosSpecification(ArrayRef dimsPos, - size_t rank) { - size_t dimsPosSize = dimsPos.size(); - if (dimsPosSize > rank) - return true; - DenseSet uniqued; - for (int64_t dim : dimsPos) - uniqued.insert(dim); - if (dimsPosSize != uniqued.size()) - return true; - return llvm::any_of(dimsPos, [rank](int64_t dimPos) { - return dimPos < 0 || dimPos >= static_cast(rank); - }); -} - -/// Returns true if the dimension of `sourceShape` is smaller than the dimension -/// of the `limitShape`. -static bool areAllInBound(ArrayRef sourceShape, - ArrayRef limitShape) { - assert( - sourceShape.size() == limitShape.size() && - "expected source shape rank, and limit of the shape to have same rank"); - return llvm::all_of( - llvm::zip(sourceShape, limitShape), [](std::tuple it) { - int64_t sourceExtent = std::get<0>(it); - int64_t limit = std::get<1>(it); - return ShapedType::isDynamic(sourceExtent) || - ShapedType::isDynamic(limit) || sourceExtent <= limit; - }); -} - -template -static LogicalResult commonVerifierPackAndUnPackOp(OpTy packOrUnPack) { - static_assert(llvm::is_one_of::value, - "applies to only pack or unpack operations"); - Operation *op = packOrUnPack.getOperation(); - - // Return true if we have a zero-value tile. - auto hasZeros = [&](ArrayRef tiles) { - return llvm::any_of( - tiles, [](OpFoldResult tile) { return isConstantIntValue(tile, 0); }); - }; - - // Verify tiles. Do not allow zero tiles. - SmallVector mixedTiles = packOrUnPack.getMixedTiles(); - if (hasZeros(mixedTiles)) - return op->emitError("invalid zero tile factor"); - - // Verify inner_dims_pos and outer_dims_perm. - RankedTensorType unpackedType = (std::is_same::value) - ? packOrUnPack.getSourceType() - : packOrUnPack.getDestType(); - size_t unpackedRank = unpackedType.getRank(); - ArrayRef innerDimsPos = packOrUnPack.getInnerDimsPos(); - ArrayRef outerDimPerm = packOrUnPack.getOuterDimsPerm(); - if (isInvalidPackingPosSpecification(innerDimsPos, unpackedRank)) - return op->emitError("invalid inner_dims_pos vector"); - if (isInvalidPackingPosSpecification(outerDimPerm, unpackedRank)) - return op->emitError("invalid outer_dims_perm vector"); - if (!outerDimPerm.empty() && outerDimPerm.size() != unpackedRank) - return op->emitError("outer_dims_perm must be a permutation or empty"); - - // Tiling factors must be less than or equal to the input rank for pack (or - // output rank for unpack), and must match the number of `inner_dims_pos`. - if (mixedTiles.size() > unpackedRank) { - return op->emitError("tiling factors must be less than or equal to the " - "input rank for pack or output rank for unpack"); - } - if (mixedTiles.size() != innerDimsPos.size()) { - return op->emitError( - "tiling factors must equal the number of dimensions to tile"); - } - - ShapedType packedType = (std::is_same::value) - ? packOrUnPack.getDestType() - : packOrUnPack.getSourceType(); - size_t packedRank = packedType.getRank(); - // Require output rank to match input rank + number of blocking factors. - size_t expectedPackedRank = unpackedRank + mixedTiles.size(); - if (expectedPackedRank != packedRank) { - return op->emitError( - "packed rank != (unpacked rank + num tiling factors), got ") - << packedRank << " != " << expectedPackedRank; - } - - // Verify result shape is greater than the minimum expected - // by the pack operation, and that the output shape - // represents full tiles. - RankedTensorType expectedPackedType = PackOp::inferPackedType( - unpackedType, packOrUnPack.getStaticTiles(), innerDimsPos, outerDimPerm); - if (!areAllInBound(expectedPackedType.getShape(), packedType.getShape())) { - return op->emitError("the shape of output is not large enough to hold the " - "packed data. Expected at least ") - << expectedPackedType << ", got " << packedType; - } - if (!llvm::all_of( - llvm::zip(packedType.getShape().take_back(mixedTiles.size()), - mixedTiles), - [](std::tuple it) { - int64_t shape = std::get<0>(it); - if (Attribute attr = - llvm::dyn_cast_if_present(std::get<1>(it))) { - IntegerAttr intAttr = dyn_cast_or_null(attr); - int64_t staticTileSize = intAttr.getValue().getSExtValue(); - return shape == staticTileSize; - } - return ShapedType::isDynamic(shape); - })) { - return op->emitError("mismatch in inner tile sizes specified and shaped of " - "tiled dimension in the packed type"); - } - return success(); -} - -namespace { -/// Subset of PackOp/UnPackOp fields used to compute the result of applying -/// various permutations to the op. -// TODO: Add linalg.transpose + pack/unpack folding patterns that just reuse -// these. These may or may not become true foldings / canonicalizations -// depending on how aggressive we want to be in automatically folding -// transposes. -struct PackOrUnPackTransposeResult { - SmallVector innerDimsPos; - SmallVector innerTiles; - SmallVector outerDimsPerm; -}; -} // namespace - -template -static PackOrUnPackTransposeResult -commonPermutationOfPackAndUnPackOp(OpTy packOrUnPackOp, - ArrayRef innerPermutation, - ArrayRef outerPermutation) { - static_assert(llvm::is_one_of::value, - "applies to only pack or unpack operations"); - assert((!innerPermutation.empty() || !outerPermutation.empty()) && - "some permutation must be non-empty"); - PackOrUnPackTransposeResult metadata; - metadata.innerDimsPos = - SmallVector(packOrUnPackOp.getInnerDimsPos()); - metadata.innerTiles = - SmallVector(packOrUnPackOp.getMixedTiles()); - int64_t numOuterDims = std::is_same::value - ? packOrUnPackOp.getSourceRank() - : packOrUnPackOp.getDestRank(); - metadata.outerDimsPerm = - packOrUnPackOp.getOuterDimsPerm().empty() - ? llvm::to_vector(llvm::seq(0, numOuterDims)) - : SmallVector(packOrUnPackOp.getOuterDimsPerm()); - if (!innerPermutation.empty()) { - assert(innerPermutation.size() == metadata.innerDimsPos.size() && - isPermutationVector(innerPermutation) && - "invalid inner permutation"); - applyPermutationToVector(metadata.innerDimsPos, innerPermutation); - applyPermutationToVector(metadata.innerTiles, innerPermutation); - } - if (!outerPermutation.empty()) { - assert(outerPermutation.size() == metadata.outerDimsPerm.size() && - isPermutationVector(outerPermutation) && - "invalid outer permutation"); - applyPermutationToVector(metadata.outerDimsPerm, outerPermutation); - } - return metadata; -} - -//===----------------------------------------------------------------------===// -// PackOp -//===----------------------------------------------------------------------===// - -void PackOp::getAsmResultNames(function_ref setNameFn) { - setNameFn(getResult(), "pack"); -} - -void PackOp::build(OpBuilder &builder, OperationState &state, Value source, - Value dest, ArrayRef innerDimsPos, - ArrayRef innerTiles, - std::optional paddingValue, - ArrayRef outerDimsPerm) { - assert(innerDimsPos.size() == innerTiles.size() && - "number of tile sizes specified must match the specified number of " - "original dimensions to be tiled"); - SmallVector staticTileSizes; - SmallVector dynamicTileSizes; - dispatchIndexOpFoldResults(innerTiles, dynamicTileSizes, staticTileSizes); - build(builder, state, dest.getType(), source, dest, - paddingValue ? *paddingValue : nullptr, - outerDimsPerm.empty() ? nullptr - : builder.getDenseI64ArrayAttr(outerDimsPerm), - builder.getDenseI64ArrayAttr(innerDimsPos), dynamicTileSizes, - builder.getDenseI64ArrayAttr(staticTileSizes)); -} - -LogicalResult -PackOp::reifyResultShapes(OpBuilder &builder, - ReifiedRankedShapedTypeDims &reifiedReturnShapes) { - return reifyResultShapesImpl(*this, builder, reifiedReturnShapes); -} - -DenseMap PackOp::getDimAndTileMapping() { - return getDimAndTileMappingImpl(*this); -} - -SmallVector PackOp::getMixedTiles() { - return getMixedTilesImpl(*this); -} - -SmallVector PackOp::getStaticTiles() { - return getStaticTilesImpl(*this); -} - -ArrayRef PackOp::getAllOuterDims() { - ShapedType inputType = getSourceType(); - int64_t inputRank = inputType.getRank(); - return getDestType().getShape().take_front(inputRank); -} - -SmallVector PackOp::getTiledOuterDims() { - auto innerDimsPos = getInnerDimsPos(); - auto packedShape = getDestType().getShape(); - SmallVector res; - - for (auto index : innerDimsPos) - res.push_back(packedShape[index]); - - return res; -} - -bool PackOp::requirePaddingValue(ArrayRef inputShape, - ArrayRef innerDimsPos, - ArrayRef outputShape, - ArrayRef outerDimsPerm, - ArrayRef innerTiles) { - SmallVector outputTileSizes( - outputShape.take_front(inputShape.size())); - if (!outerDimsPerm.empty()) { - assert(outerDimsPerm.size() == outputTileSizes.size() && - "expected output and outer_dims_perm to have same size"); - applyPermutationToVector(outputTileSizes, - invertPermutationVector(outerDimsPerm)); - } - for (auto [pos, tileSize] : llvm::zip_equal(innerDimsPos, innerTiles)) { - if (ShapedType::isDynamic(inputShape[pos])) - continue; - std::optional constantTile = getConstantIntValue(tileSize); - - if (!constantTile) { - if (!ShapedType::isDynamic(outputTileSizes[pos]) && - (inputShape[pos] % outputTileSizes[pos] != 0)) - return true; - } else if (inputShape[pos] % (*constantTile) != 0) { - return true; - } - } - return false; -} - -LogicalResult PackOp::verify() { - if (failed(commonVerifierPackAndUnPackOp(*this))) - return failure(); - - // Verify padding value, and bail out if the tile does not divide the - // dimension fully. In the case of dynamic tile factors or dimensions, having - // a partial tile is undefined behavior. - auto paddingValue = getPaddingValue(); - if (paddingValue && - paddingValue.getType() != getSourceType().getElementType()) { - return emitOpError("expected padding_value has ") - << getSourceType().getElementType() - << " but got: " << paddingValue.getType(); - } - - if (!paddingValue && - requirePaddingValue(getSourceType().getShape(), getInnerDimsPos(), - getDestType().getShape(), getOuterDimsPerm(), - getMixedTiles())) { - return emitOpError( - "invalid tile factor or output size provided. Only full tiles are " - "supported when padding_value is not set"); - } - return success(); -} - -/// Converts OpFoldResults to int64_t shape entries, unconditionally mapping all -/// Value's to kDynamic, even if they are arith.constant values. -static SmallVector -asShapeWithAnyValueAsDynamic(ArrayRef ofrs) { - SmallVector result; - for (auto o : ofrs) { - // Have to do this first, as getConstantIntValue special-cases constants. - if (llvm::dyn_cast_if_present(o)) - result.push_back(ShapedType::kDynamic); - else - result.push_back(getConstantIntValue(o).value_or(ShapedType::kDynamic)); - } - return result; -} - -/// Helper for PackOp::{getResultShape,inferPackedType}. Returns the shape of -/// the packed type. Having a shared helper helps implement these two methods in -/// a way that ensures that they agree on which dimensions are dynamic. -static SmallVector getPackOpResultTypeShape( - ArrayRef sourceShape, ArrayRef innerTileSizes, - ArrayRef innerDimsPos, ArrayRef outerDimsPerm) { - SmallVector resultShape = llvm::to_vector(sourceShape); - for (auto tiledDim : llvm::enumerate(llvm::to_vector(innerDimsPos))) { - if (ShapedType::isDynamic(resultShape[tiledDim.value()])) - continue; - if (ShapedType::isDynamic(innerTileSizes[tiledDim.index()])) { - resultShape[tiledDim.value()] = ShapedType::kDynamic; - continue; - } - resultShape[tiledDim.value()] = divideCeilSigned( - resultShape[tiledDim.value()], innerTileSizes[tiledDim.index()]); - } - - // Swap tile loops if outer_dims_perm is available. - if (!outerDimsPerm.empty()) - applyPermutationToVector(resultShape, outerDimsPerm); - - // Append the inner tile dimensions. - resultShape.append(innerTileSizes.begin(), innerTileSizes.end()); - return resultShape; -} - -SmallVector PackOp::getResultShape( - OpBuilder &builder, Location loc, ArrayRef sourceDims, - ArrayRef innerTileSizes, ArrayRef innerDimsPos, - ArrayRef outerDimsPerm) { - SmallVector resultDims = llvm::to_vector(sourceDims); - - AffineExpr s0, s1; - bindSymbols(builder.getContext(), s0, s1); - AffineExpr ceilDivExpr = s0.ceilDiv(s1); - for (auto tiledDim : llvm::enumerate(llvm::to_vector(innerDimsPos))) { - resultDims[tiledDim.value()] = affine::makeComposedFoldedAffineApply( - builder, loc, ceilDivExpr, - {resultDims[tiledDim.value()], innerTileSizes[tiledDim.index()]}); - } - if (!outerDimsPerm.empty()) - applyPermutationToVector(resultDims, outerDimsPerm); - resultDims.append(innerTileSizes.begin(), innerTileSizes.end()); - - SmallVector resultTypeShape = - getPackOpResultTypeShape(asShapeWithAnyValueAsDynamic(sourceDims), - asShapeWithAnyValueAsDynamic(innerTileSizes), - innerDimsPos, outerDimsPerm); - - // Fix-up `resultDims` to ensure that they are Value's if and only if the - // result type shape says it's a dynamic dim. This is needed as callers may - // use dispatchIndexOpFoldResults on the result, and rely on exact number of - // dynamic dims returned by that. - for (unsigned i = 0; i < resultDims.size(); ++i) { - if (!ShapedType::isDynamic(resultTypeShape[i])) - continue; - resultDims[i] = - getValueOrCreateConstantIndexOp(builder, loc, resultDims[i]); - } - - return resultDims; -} - -/// Get the expected packed type based on source type, tile factors, position of -/// the inner tiles and permutation of the outer tiled loop. -RankedTensorType PackOp::inferPackedType(RankedTensorType sourceType, - ArrayRef innerTileSizes, - ArrayRef innerDimsPos, - ArrayRef outerDimsPerm) { - SmallVector resultShape = getPackOpResultTypeShape( - sourceType.getShape(), innerTileSizes, innerDimsPos, outerDimsPerm); - return RankedTensorType::get(resultShape, sourceType.getElementType()); -} - -Value PackOp::createDestinationTensor(OpBuilder &b, Location loc, Value source, - ArrayRef innerTileSizes, - ArrayRef innerDimsPos, - ArrayRef outerDimsPerm) { - AffineExpr dim0, dim1; - bindDims(b.getContext(), dim0, dim1); - auto ceilDiv = [&](OpFoldResult v1, OpFoldResult v2) -> OpFoldResult { - return affine::makeComposedFoldedAffineApply(b, loc, dim0.ceilDiv(dim1), - {v1, v2}); - }; - - SmallVector mixedSizes; - for (auto [index, value] : llvm::enumerate( - llvm::cast(source.getType()).getShape())) { - if (ShapedType::isDynamic(value)) - mixedSizes.push_back(b.create(loc, source, index).getResult()); - else - mixedSizes.push_back(b.getIndexAttr(value)); - } - for (auto it : llvm::zip(innerDimsPos, innerTileSizes)) { - int64_t dimPos = std::get<0>(it); - OpFoldResult tileSize = std::get<1>(it); - mixedSizes[dimPos] = ceilDiv(mixedSizes[dimPos], tileSize); - } - if (!outerDimsPerm.empty()) - applyPermutationToVector(mixedSizes, outerDimsPerm); - - mixedSizes.append(innerTileSizes.begin(), innerTileSizes.end()); - auto elemType = llvm::cast(source.getType()).getElementType(); - return b.create(loc, mixedSizes, elemType); -} - -PackOp PackOp::createTransposedClone(OpBuilder &b, Location loc, - ArrayRef innerPermutation, - ArrayRef outerPermutation) { - PackOrUnPackTransposeResult metadata = commonPermutationOfPackAndUnPackOp( - *this, innerPermutation, outerPermutation); - Value transposedDest = - createDestinationTensor(b, loc, getSource(), metadata.innerTiles, - metadata.innerDimsPos, metadata.outerDimsPerm); - return b.create(loc, getSource(), transposedDest, - metadata.innerDimsPos, metadata.innerTiles, - getPaddingValue(), metadata.outerDimsPerm); -} - -/// Returns true if the tiles and the tiled dims are constant. -template -bool areTilesAndTiledDimsAllConstant(OpTy op) { - static_assert(llvm::is_one_of::value, - "applies to only pack or unpack operations"); - ShapedType packedType = (std::is_same::value) - ? op.getDestType() - : op.getSourceType(); - SmallVector mixedTiles = op.getMixedTiles(); - for (auto [dimDest, tile] : llvm::zip( - packedType.getShape().take_back(mixedTiles.size()), mixedTiles)) { - std::optional constTileSize = getConstantIntValue(tile); - if (!constTileSize || ShapedType::isDynamic(dimDest)) - return false; - } - return true; -} - -Speculation::Speculatability PackOp::getSpeculatability() { - if (getPaddingValue()) - return Speculation::Speculatable; - - // The verifier rejects already operations if we can statically prove that the - // sizes of the tiles do not divide perfectly the dimension; thus, check only - // to have constant tiles and tiled inner dimensions. - if (!areTilesAndTiledDimsAllConstant(*this)) - return Speculation::NotSpeculatable; - - return Speculation::Speculatable; -} - -// Return true if `inner_dims_pos` and `outer_dims_perm` target the same -// dimensions for pack and unpack. -static bool hasSameInnerOuterAttribute(PackOp packOp, UnPackOp unPackOp) { - if (packOp.getInnerDimsPos() != unPackOp.getInnerDimsPos()) - return false; - if (packOp.getOuterDimsPerm() == unPackOp.getOuterDimsPerm()) - return true; - // Outer dims permutation is optional. - // To compare unbalanced pack-unpack pair, treat no permutation as equal to - // identity permutation. - return isIdentityPermutation(packOp.getOuterDimsPerm()) && - isIdentityPermutation(unPackOp.getOuterDimsPerm()); -} - -// Return true if pack and unpack have the same tiles. -// Same SSA values or same integer constants. -static bool haveSameTiles(PackOp packOp, UnPackOp unPackOp) { - auto packTiles = packOp.getMixedTiles(); - auto unPackTiles = unPackOp.getMixedTiles(); - if (packTiles.size() != unPackTiles.size()) - return false; - for (size_t i = 0, e = packTiles.size(); i < e; i++) { - if (!isEqualConstantIntOrValue(packTiles[i], unPackTiles[i])) - return false; - } - return true; -} - -/// Returns true if the pack op does not need a padding value. -static bool paddingIsNotNeeded(PackOp op) { - auto srcType = op.getSourceType(); - if (llvm::any_of(op.getInnerDimsPos(), - [&](int64_t pos) { return srcType.isDynamicDim(pos); })) - return false; - if (ShapedType::isDynamicShape(op.getStaticInnerTiles())) - return false; - return !PackOp::requirePaddingValue( - srcType.getShape(), op.getInnerDimsPos(), op.getDestType().getShape(), - op.getOuterDimsPerm(), op.getMixedTiles()); -} - -/// Returns true if the `srcShape` or `destShape` is different from the one in -/// `packOp` and populates each with the inferred static shape. -static bool inferStaticShape(PackOp packOp, SmallVectorImpl &srcShape, - SmallVectorImpl &destShape) { - bool changeNeeded = false; - srcShape.assign(packOp.getSourceType().getShape().begin(), - packOp.getSourceType().getShape().end()); - destShape.assign(packOp.getDestType().getShape().begin(), - packOp.getDestType().getShape().end()); - llvm::SmallSetVector innerDims; - innerDims.insert(packOp.getInnerDimsPos().begin(), - packOp.getInnerDimsPos().end()); - SmallVector inverseOuterDimsPerm; - if (!packOp.getOuterDimsPerm().empty()) - inverseOuterDimsPerm = invertPermutationVector(packOp.getOuterDimsPerm()); - int srcRank = packOp.getSourceRank(); - for (auto i : llvm::seq(0, srcRank)) { - if (innerDims.contains(i)) - continue; - int64_t srcPos = i; - int64_t destPos = i; - if (!inverseOuterDimsPerm.empty()) - destPos = inverseOuterDimsPerm[srcPos]; - if (ShapedType::isDynamic(srcShape[srcPos]) == - ShapedType::isDynamic(destShape[destPos])) { - continue; - } - int64_t size = srcShape[srcPos]; - if (ShapedType::isDynamic(size)) - size = destShape[destPos]; - srcShape[srcPos] = size; - destShape[destPos] = size; - changeNeeded = true; - } - return changeNeeded; -} - -LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) { - // Fold an pack(unpack(x)) to x. - if (auto unPackOp = packOp.getSource().getDefiningOp()) { - if (unPackOp.getSourceType() != packOp.getDestType()) - return failure(); - if (packOp.getPaddingValue() || - !hasSameInnerOuterAttribute(packOp, unPackOp) || - !haveSameTiles(packOp, unPackOp)) - return failure(); - rewriter.replaceOp(packOp, unPackOp.getSource()); - return success(); - } - - // Fold optional PaddingValue operand away if padding is not needed. - if (packOp.getPaddingValue() && paddingIsNotNeeded(packOp)) { - rewriter.startOpModification(packOp); - packOp.getPaddingValueMutable().clear(); - rewriter.finalizeOpModification(packOp); - return success(); - } - - // Insert tensor.cast ops if static shape inference is available.. - SmallVector srcShape, destShape; - if (inferStaticShape(packOp, srcShape, destShape)) { - Location loc = packOp.getLoc(); - Value source = packOp.getSource(); - if (srcShape != packOp.getSourceType().getShape()) { - auto newSrcType = packOp.getSourceType().clone(srcShape); - source = - rewriter.create(loc, newSrcType, packOp.getSource()); - } - Value dest = packOp.getDest(); - RankedTensorType originalResultType = packOp.getDestType(); - bool needUpdateDestType = (destShape != originalResultType.getShape()); - if (needUpdateDestType) { - auto newDestType = packOp.getDestType().clone(destShape); - dest = - rewriter.create(loc, newDestType, packOp.getDest()); - } - rewriter.modifyOpInPlace(packOp, [&] { - packOp.getSourceMutable().assign(source); - packOp.getDestMutable().assign(dest); - packOp.getResult().setType(cast(dest.getType())); - }); - // Insert a cast if needed - if (needUpdateDestType) { - rewriter.setInsertionPointAfter(packOp); - auto castOp = - rewriter.create(loc, originalResultType, packOp); - rewriter.replaceAllUsesExcept(packOp, castOp, castOp); - } - return success(); - } - - return failure(); -} - -template -static bool isLikePadUnPad(PackOrUnpackOp packOp, - RankedTensorType packedTensorType) { - static_assert(std::is_same::value || - std::is_same::value, - "Function meant for pack/unpack"); - // This is a pad if packing only adds ones and we don't transpose dimensions. - - // Check that we are not transposing any dimensions. - ArrayRef innerDimsPos = packOp.getInnerDimsPos(); - int64_t numPackedDims = innerDimsPos.size(); - auto orderedDims = llvm::to_vector<4>(llvm::seq(0, numPackedDims)); - if (orderedDims != innerDimsPos) { - // Dimensions don't happen in order. - return false; - } - - ArrayRef packedShape = packedTensorType.getShape(); - int64_t packedRank = packedTensorType.getRank(); - // At this point we know that we are taking numPackedDims outer - // dimensions and pushing them all the way as the inner most dimensions. - // What's left on the outer most dimensions is, in this order: - // - the factor of the packed dimensions, then - // - the untouched dimensions - // This shifting inward of dimensions is a no-op (as opposed to a transpose) - // if all the dimensions that bubble outerward are ones. - // Therefore check that all the dimensions but the numPackedDims inner most - // ones are ones. - return llvm::all_of( - llvm::seq(0, packedRank - numPackedDims), - [&packedShape](int64_t i) { return packedShape[i] == 1; }); -} - -bool PackOp::isLikePad() { - auto packedTensorType = - llvm::cast((*this)->getResultTypes().front()); - return isLikePadUnPad(*this, packedTensorType); -} - -OpFoldResult PackOp::fold(FoldAdaptor adaptor) { - std::optional paddingValue; - if (auto pad = adaptor.getPaddingValue()) - paddingValue = pad; - if (OpFoldResult reshapedSource = reshapeConstantSource( - llvm::dyn_cast_if_present(adaptor.getSource()), - getDestType(), paddingValue)) - return reshapedSource; - return {}; -} - -//===----------------------------------------------------------------------===// -// UnPackOp -//===----------------------------------------------------------------------===// - -void UnPackOp::getAsmResultNames( - function_ref setNameFn) { - setNameFn(getResult(), "unpack"); -} - -LogicalResult -UnPackOp::reifyResultShapes(OpBuilder &builder, - ReifiedRankedShapedTypeDims &reifiedReturnShapes) { - return reifyResultShapesImpl(*this, builder, reifiedReturnShapes); -} - -DenseMap UnPackOp::getDimAndTileMapping() { - return getDimAndTileMappingImpl(*this); -} - -SmallVector UnPackOp::getMixedTiles() { - return getMixedTilesImpl(*this); -} - -SmallVector UnPackOp::getStaticTiles() { - return getStaticTilesImpl(*this); -} - -ArrayRef UnPackOp::getAllOuterDims() { - ShapedType destType = getDestType(); - int64_t destRank = destType.getRank(); - return getSourceType().getShape().take_front(destRank); -} - -SmallVector UnPackOp::getTiledOuterDims() { - auto innerDimsPos = getInnerDimsPos(); - auto packedShape = getSourceType().getShape(); - SmallVector res; - - for (auto index : innerDimsPos) - res.push_back(packedShape[index]); - - return res; -} - -LogicalResult UnPackOp::verify() { - return commonVerifierPackAndUnPackOp(*this); -} - -Speculation::Speculatability UnPackOp::getSpeculatability() { - // See PackOp::getSpeculatability. - if (!areTilesAndTiledDimsAllConstant(*this)) - return Speculation::NotSpeculatable; - - return Speculation::Speculatable; -} - -void UnPackOp::build(OpBuilder &builder, OperationState &state, Value source, - Value dest, ArrayRef innerDimsPos, - ArrayRef innerTiles, - ArrayRef outerDimsPerm) { - assert(innerDimsPos.size() == innerTiles.size() && - "number of tile sizes specified must match the specified number of " - "original dimensions to be tiled"); - SmallVector staticTileSizes; - SmallVector dynamicTileSizes; - dispatchIndexOpFoldResults(innerTiles, dynamicTileSizes, staticTileSizes); - build(builder, state, dest.getType(), source, dest, - outerDimsPerm.empty() ? nullptr - : builder.getDenseI64ArrayAttr(outerDimsPerm), - builder.getDenseI64ArrayAttr(innerDimsPos), dynamicTileSizes, - builder.getDenseI64ArrayAttr(staticTileSizes)); -} - -Value UnPackOp::createDestinationTensor(OpBuilder &b, Location loc, - Value source, - ArrayRef innerTileSizes, - ArrayRef innerDimsPos, - ArrayRef outerDimsPerm) { - AffineExpr sym0, sym1; - bindSymbols(b.getContext(), sym0, sym1); - auto dimMul = [&](OpFoldResult v1, OpFoldResult v2) -> OpFoldResult { - return affine::makeComposedFoldedAffineApply(b, loc, sym0 * sym1, {v1, v2}); - }; - - SmallVector mixedSizes; - auto srcType = llvm::cast(source.getType()); - for (auto i : - llvm::seq(0, srcType.getRank() - innerTileSizes.size())) { - if (srcType.isDynamicDim(i)) - mixedSizes.push_back(b.create(loc, source, i).getResult()); - else - mixedSizes.push_back(b.getIndexAttr(srcType.getDimSize(i))); - } - if (!outerDimsPerm.empty()) { - applyPermutationToVector( - mixedSizes, invertPermutationVector(outerDimsPerm)); - } - - for (auto [dimPos, tileSize] : llvm::zip_equal(innerDimsPos, innerTileSizes)) - mixedSizes[dimPos] = dimMul(mixedSizes[dimPos], tileSize); - - auto elemType = srcType.getElementType(); - return b.create(loc, mixedSizes, elemType); -} - -UnPackOp UnPackOp::createTransposedClone(OpBuilder &b, Location loc, - Value transposedSource, - ArrayRef innerPermutation, - ArrayRef outerPermutation) { - PackOrUnPackTransposeResult metadata = commonPermutationOfPackAndUnPackOp( - *this, innerPermutation, outerPermutation); - return b.create(loc, transposedSource, getDest(), - metadata.innerDimsPos, metadata.innerTiles, - metadata.outerDimsPerm); -} - -/// Returns true if the `srcShape` or `destShape` is different from the one in -/// `op` and populates each with the inferred static shape. -static bool inferStaticShape(UnPackOp op, SmallVectorImpl &srcShape, - SmallVectorImpl &destShape) { - bool changeNeeded = false; - srcShape.assign(op.getSourceType().getShape().begin(), - op.getSourceType().getShape().end()); - destShape.assign(op.getDestType().getShape().begin(), - op.getDestType().getShape().end()); - llvm::SmallSetVector innerDims; - innerDims.insert(op.getInnerDimsPos().begin(), op.getInnerDimsPos().end()); - SmallVector inverseOuterDimsPerm; - if (!op.getOuterDimsPerm().empty()) - inverseOuterDimsPerm = invertPermutationVector(op.getOuterDimsPerm()); - int destRank = op.getDestRank(); - for (auto i : llvm::seq(0, destRank)) { - if (innerDims.contains(i)) - continue; - int64_t srcPos = i; - int64_t destPos = i; - if (!inverseOuterDimsPerm.empty()) - srcPos = inverseOuterDimsPerm[destPos]; - if (ShapedType::isDynamic(srcShape[srcPos]) == - ShapedType::isDynamic(destShape[destPos])) { - continue; - } - int64_t size = srcShape[srcPos]; - if (ShapedType::isDynamic(size)) - size = destShape[destPos]; - srcShape[srcPos] = size; - destShape[destPos] = size; - changeNeeded = true; - } - return changeNeeded; -} - -LogicalResult UnPackOp::canonicalize(UnPackOp unPackOp, - PatternRewriter &rewriter) { - /// unpack(pack(x)) -> x - if (PackOp packOp = unPackOp.getSource().getDefiningOp()) { - if (packOp.getSourceType() != unPackOp.getDestType()) - return failure(); - if (packOp.getPaddingValue() || - !hasSameInnerOuterAttribute(packOp, unPackOp) || - !haveSameTiles(packOp, unPackOp)) - return failure(); - rewriter.replaceOp(unPackOp, packOp.getSource()); - return success(); - } - /// unpack(destinationStyleOp(x)) -> unpack(x) - if (auto dstStyleOp = - unPackOp.getDest().getDefiningOp()) { - auto destValue = cast(unPackOp.getDest()); - Value newDest = dstStyleOp.getDpsInits()[destValue.getResultNumber()]; - rewriter.modifyOpInPlace(unPackOp, - [&]() { unPackOp.setDpsInitOperand(0, newDest); }); - return success(); - } - - // Insert tensor.cast ops if static shape inference is available.. - SmallVector srcShape, destShape; - if (inferStaticShape(unPackOp, srcShape, destShape)) { - Location loc = unPackOp.getLoc(); - Value source = unPackOp.getSource(); - if (srcShape != unPackOp.getSourceType().getShape()) { - auto newSrcType = unPackOp.getSourceType().clone(srcShape); - source = rewriter.create(loc, newSrcType, - unPackOp.getSource()); - } - Value dest = unPackOp.getDest(); - if (destShape != unPackOp.getDestType().getShape()) { - auto newDestType = unPackOp.getDestType().clone(destShape); - dest = - rewriter.create(loc, newDestType, unPackOp.getDest()); - } - Value newOp = rewriter.create( - loc, source, dest, unPackOp.getInnerDimsPos(), unPackOp.getMixedTiles(), - unPackOp.getOuterDimsPerm()); - rewriter.replaceOpWithNewOp( - unPackOp, unPackOp.getResult().getType(), newOp); - return success(); - } - - return failure(); -} - -bool UnPackOp::isLikeUnPad() { - RankedTensorType packedTensorType = getSourceType(); - return isLikePadUnPad(*this, packedTensorType); -} - -OpFoldResult UnPackOp::fold(FoldAdaptor adaptor) { - if (OpFoldResult reshapedSource = reshapeConstantSource( - llvm::dyn_cast_if_present(adaptor.getSource()), - getResult().getType())) - return reshapedSource; - return {}; -} - //===----------------------------------------------------------------------===// // Common Canonicalizers and Folders. //===----------------------------------------------------------------------===// @@ -4797,151 +3887,6 @@ bool foldTensorCastPrecondition(DestinationStyleOpInterface op) { return hasFoldableTensorCastOperand(op); } -// Given the (potentially) updated packed type, `newPackedTy`, generates an -// updated mixed-tile-sizes attribute. A tile size is updated only -// when: -// * a dim from newPackedTy is static, and -// * the corresponding size from mixedTiles is still dynamic. -// Otherwise, the original tile size is preserved. -// Note - packed-type-dim and mixed-tile-size should always match! -static SmallVector -getNewMixedTileSizes(PatternRewriter &rewriter, Type newPackedTy, - SmallVector mixedTiles) { - SmallVector newMixedTileSizes; - for (auto it : llvm::zip(cast(newPackedTy) - .getShape() - .take_back(mixedTiles.size()), - mixedTiles)) { - int64_t shape = std::get<0>(it); - if (shape == ShapedType::kDynamic) { - newMixedTileSizes.push_back(std::get<1>(it)); - continue; - } - - // If the current result dim is static, update the dynamic mixed-size - // (provided the original value is dynamic). - OpFoldResult tile = std::get<1>(it); - if (Attribute attr = llvm::dyn_cast_if_present(tile)) { - // Already a constant - newMixedTileSizes.push_back(tile); - } else { - assert(getConstantIntValue(tile).value() == shape && - "tile size and dim size don't match!"); - newMixedTileSizes.push_back( - (rewriter.getIntegerAttr(rewriter.getIndexType(), shape))); - } - } - - return newMixedTileSizes; -} - -/// Folds a tensor.cast op into a consuming PackOp op if the -/// `tensor.cast` has source that is more static than the consuming op. -/// -/// Example: -/// ```mlir -/// %1 = tensor.cast %0 : tensor<8x16xf32> to tensor -/// %2 = tensor.pack %1 ... : tensor ... -/// ``` -/// -/// folds into: -/// -/// ```mlir -/// %2 = tensor.pack %0 ... : tensor<8x16xf32> ... -/// ``` -struct FoldTensorCastPackOp : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(PackOp op, - PatternRewriter &rewriter) const override { - if (!foldTensorCastPrecondition(op)) - return failure(); - - SmallVector newResultTypes(op->getResultTypes()); - SmallVector newOperands = - getUpdatedOperandsAfterCastOpFolding(op, newResultTypes); - - // Get the updated mixed-tile-sizes attribute. - SmallVector newMixedTileSizes = - getNewMixedTileSizes(rewriter, newResultTypes[0], op.getMixedTiles()); - - // Clone op. - // TODO: Strictly speaking, discardable attributes should be _discarded_ at - // this point. However, in practice, we use them for things that we'd like - // to preserve. Implement a better abstraction. - PackOp newOp = rewriter.create( - op.getLoc(), newOperands[0], newOperands[1], op.getInnerDimsPos(), - newMixedTileSizes, op.getPaddingValue(), op.getOuterDimsPerm()); - newOp->setDiscardableAttrs(op->getDiscardableAttrDictionary()); - - // Replace op. - Value oldResult = op.getResult(); - Value newResult = newOp.getResult(); - Value replacement = (newResult.getType() != oldResult.getType()) - ? rewriter.create( - op->getLoc(), oldResult.getType(), newResult) - : newResult; - - rewriter.replaceOp(op, {replacement}); - - return success(); - } -}; - -/// Folds a tensor.cast op into a consuming UnPackOp op if the -/// `tensor.cast` has source that is more static than the consuming op. -/// -/// Example: -/// ```mlir -/// %1 = tensor.cast %0 : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32> -/// %2 = tensor.unpack %1 ... : tensor<1x1x?x1xi32> -> tensor<7x?xi32> -/// ``` -/// -/// folds into: -/// -/// ```mlir -/// %2 = tensor.unpack %0 ... tensor<1x1x8x1xi32> -> tensor<7x?xi32> -/// ``` -struct FoldTensorCastUnPackOp : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(UnPackOp op, - PatternRewriter &rewriter) const override { - if (!foldTensorCastPrecondition(op)) - return failure(); - - SmallVector newResultTypes(op->getResultTypes()); - SmallVector newOperands = - getUpdatedOperandsAfterCastOpFolding(op, newResultTypes); - Value sourceTensor = newOperands[0]; - - // Get the updated mixed-tile-sizes attribute. - SmallVector newMixedTileSizes = getNewMixedTileSizes( - rewriter, sourceTensor.getType(), op.getMixedTiles()); - - // Clone op. - // TODO: Strictly speaking, discardable attributes should be _discarded_ at - // this point. However, in practice, we use them for things that we'd like - // to preserve. Implement a better abstraction. - UnPackOp newOp = rewriter.create( - op.getLoc(), sourceTensor, newOperands[1], op.getInnerDimsPos(), - newMixedTileSizes, op.getOuterDimsPerm()); - newOp->setDiscardableAttrs(op->getDiscardableAttrDictionary()); - - // Replace op. - Value oldResult = op.getResult(); - Value newResult = newOp.getResult(); - Value replacement = (newResult.getType() != oldResult.getType()) - ? rewriter.create( - op->getLoc(), oldResult.getType(), newResult) - : newResult; - - rewriter.replaceOp(op, {replacement}); - - return success(); - } -}; - /// Folds a tensor.cast op into a consuming DestinationStyleOpInterface op if /// the `tensor.cast` has source that is more static than the consuming op. /// @@ -4969,7 +3914,6 @@ struct FoldTensorCastProducerOp // Reject PackOp/UnpackOp (i.e. RelayoutOps) - there are dedicated patterns // for that instead. if (!foldTensorCastPrecondition(op) || - isa(*op) || isa(*op)) return failure(); @@ -5003,8 +3947,6 @@ struct FoldTensorCastProducerOp void TensorDialect::getCanonicalizationPatterns( RewritePatternSet &results) const { - results.add(getContext()); - results.add(getContext()); results.add(getContext()); } diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp index bd1a09be6b9bc..138e4be6b18e9 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp @@ -87,648 +87,6 @@ struct PadOpTiling : public TilingInterface::ExternalModel { } }; -template -static SmallVector getPackUnPackIterationDomain(OpTy op, - OpBuilder &builder) { - static_assert(llvm::is_one_of::value, - "applies to only pack or unpack operations"); - OpBuilder::InsertionGuard g(builder); - int64_t rank = (std::is_same::value) ? op.getSourceRank() - : op.getDestRank(); - OpFoldResult zero = builder.getIndexAttr(0); - OpFoldResult one = builder.getIndexAttr(1); - ReifiedRankedShapedTypeDims resultShape; - (void)reifyResultShapes(builder, op, resultShape); - SmallVector loopBounds(rank); - for (auto dim : llvm::seq(0, rank)) { - loopBounds[dim].offset = zero; - loopBounds[dim].stride = one; - loopBounds[dim].size = resultShape[0][dim]; - } - return loopBounds; -} - -static void applyPermToRange(SmallVector &offsets, - SmallVector &sizes, - ArrayRef permutation) { - if (permutation.empty()) - return; - applyPermutationToVector(offsets, permutation); - applyPermutationToVector(sizes, permutation); -} - -struct PackOpTiling - : public TilingInterface::ExternalModel { - - SmallVector getLoopIteratorTypes(Operation *op) const { - // Note that here we only consider untiled dimensions and outer tiled data - // dimensions, the inner tiled data dimensions are materialized when - // building the body of the operation. - auto packOp = cast(op); - SmallVector iteratorTypes( - packOp.getSourceRank(), utils::IteratorType::parallel); - return iteratorTypes; - } - - SmallVector getIterationDomain(Operation *op, OpBuilder &b) const { - return getPackUnPackIterationDomain(cast(op), b); - } - - FailureOr - getTiledImplementation(Operation *op, OpBuilder &b, - ArrayRef offsets, - ArrayRef sizes) const { - auto packOp = cast(op); - Location loc = packOp.getLoc(); - - // The tiling is applied on interchanged dimensions. We have to undo the - // interchange to map sizes and offsets to the original input. - int64_t inputRank = packOp.getSourceRank(); - SmallVector origOffsets(offsets); - SmallVector origSizes(sizes); - applyPermToRange(origOffsets, origSizes, - invertPermutationVector(packOp.getOuterDimsPerm())); - - DenseMap dimAndTileMapping = - packOp.getDimAndTileMapping(); - SmallVector srcDimValues = - tensor::getMixedSizes(b, loc, packOp.getSource()); - SmallVector inputIndices, inputSizes; - for (auto dim : llvm::seq(0, inputRank)) { - using AV = affine::AffineValueExpr; - affine::AffineBuilder ab(b, loc); - AffineExpr dim0, dim1, sym; - bindDims(b.getContext(), dim0, dim1); - bindSymbols(b.getContext(), sym); - if (dimAndTileMapping.count(dim)) { - // If the data dimension is tiled, the i-th index is the product of - // offset_i and tile_i, and the i-th size is the product of sizes_i and - // tile_i. - auto avOffset = AV(dim0).bind(origOffsets[dim]); - auto avSize = AV(dim0).bind(origSizes[dim]); - auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]); - inputIndices.push_back(ab.mul(avOffset, avTileSize)); - inputSizes.push_back(ab.mul(avSize, avTileSize)); - } else { - inputIndices.push_back(origOffsets[dim]); - inputSizes.push_back(origSizes[dim]); - } - - // Limit the size of the input operand for incomplete tiles. - if (packOp.getPaddingValue()) { - OpFoldResult dimSize = srcDimValues[dim]; - auto avDimSize = AV(dim0).bind(dimSize); - auto avInputIdx = AV(dim1).bind(inputIndices.back()); - inputSizes.back() = - ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)}); - } - } - - auto oneAttr = b.getI64IntegerAttr(1); - SmallVector strides(inputRank, oneAttr); - - SmallVector tiledOperands; - auto sourceSlice = b.create( - loc, packOp.getSource(), inputIndices, inputSizes, strides); - tiledOperands.push_back(sourceSlice); - - SmallVector outputOffsets, outputSizes; - if (failed(getResultTilePosition(op, b, 0, offsets, sizes, outputOffsets, - outputSizes))) - return {}; - - strides.append(packOp.getDestRank() - inputRank, oneAttr); - auto outSlice = b.create( - loc, packOp.getDest(), outputOffsets, outputSizes, strides); - tiledOperands.push_back(outSlice); - - if (auto val = packOp.getPaddingValue()) - tiledOperands.push_back(val); - for (auto tile : packOp.getInnerTiles()) - tiledOperands.push_back(tile); - - Operation *tiledPackOp = b.create( - loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs()); - - return TilingResult{ - {tiledPackOp}, - SmallVector(tiledPackOp->getResults()), - llvm::to_vector(ArrayRef{sourceSlice, outSlice})}; - } - - LogicalResult - getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber, - ArrayRef offsets, - ArrayRef sizes, - SmallVector &resultOffsets, - SmallVector &resultSizes) const { - // The iteration domain is over outer dimensions of packed layout. In this - // context, the outer dimensions of `resultOffsets` are `offsets`. The - // inner dimensions of `resultOffsets` are zeros because tiling is not - // applied to them. - auto packOp = cast(op); - int64_t inputRank = packOp.getSourceRank(); - int64_t outputRank = packOp.getDestRank(); - auto zeroAttr = b.getI64IntegerAttr(0); - resultOffsets.assign(offsets.begin(), offsets.end()); - resultOffsets.append(outputRank - inputRank, zeroAttr); - - ReifiedRankedShapedTypeDims outputShape; - (void)reifyResultShapes(b, packOp, outputShape); - resultSizes.assign(sizes.begin(), sizes.end()); - for (auto dataTileDim : llvm::seq(inputRank, outputRank)) - resultSizes.push_back(outputShape[0][dataTileDim]); - - return success(); - } - - FailureOr - generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber, - ArrayRef offsets, - ArrayRef sizes) const { - auto packOp = cast(op); - int64_t numTiles = packOp.getInnerDimsPos().size(); - - // tensor.pack op is fusible (as a producer) only if full inner tiles are - // iterated or inner dims are not tiled. Otherwise, it will generate a - // sequence of non-trivial ops (for partial tiles). - for (auto offset : offsets.take_back(numTiles)) - if (!isConstantIntValue(offset, 0)) - return failure(); - - for (auto iter : - llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles))) - if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter))) - return failure(); - - FailureOr tilingResult = getTiledImplementation( - op, b, offsets.drop_back(numTiles), sizes.drop_back(numTiles)); - if (failed(tilingResult)) - return failure(); - return tilingResult.value(); - } - - /// Method to return the position of iteration domain tile computed by the - /// tiled operation. In current `tensor.pack` context, the `resultOffsets` and - /// `resultSizes` only cover outer dimensions. - LogicalResult getIterationDomainTileFromOperandTile( - Operation *op, OpBuilder &b, unsigned operandNumber, - ArrayRef offsets, ArrayRef sizes, - SmallVectorImpl &resultOffsets, - SmallVectorImpl &resultSizes) const { - if (operandNumber != 0) - return failure(); - - auto packOp = cast(op); - // It is not trivial to infer dest tile from source tile if `packOp` has - // padding semantic. - if (packOp.getPaddingValue()) - return failure(); - - Location loc = packOp.getLoc(); - - SmallVector outerDimOffsets, outerDimSizes; - DenseMap dimAndTileMapping = - packOp.getDimAndTileMapping(); - for (auto dim : llvm::seq(packOp.getSourceRank())) { - if (dimAndTileMapping.count(dim)) { - FailureOr cstSize = - ValueBoundsConstraintSet::computeConstantBound( - presburger::BoundType::UB, sizes[dim], - /*stopCondition=*/nullptr, /*closedUB=*/true); - std::optional cstInnerSize = - getConstantIntValue(dimAndTileMapping[dim]); - // Currently fusing `packOp` as consumer only expects perfect tiling - // scenario because even if without padding semantic, the `packOp` may - // also yield incomplete tiles. E.g. tensor<30xf32> -> tensor<5x6xf32>, - // where the `tileSize` from operand of `packOp` is 5, which is not - // exactly divided by `innerTile`(=6) of `packOp`. As the result: - // 1. the first slice is extracted from (0) to (4) and inserted into - // (0,0)~(0,4) at first row. - // 2. the second slice is extracted from (5) to (9) and SHOULD BE - // respectively inserted into two rows with different length, including - // first row: (0,5) and second row (1,0)~(1,3). It is hard to coordinate - // them, thus adding below constraint to bypass them temporarily. In - // another word, we can only support tiling with consumer if the tile - // size for the producer is a multiple of the inner tile size for the - // packed dimensions at this moment. - if (failed(cstSize) || !cstInnerSize || *cstSize % *cstInnerSize != 0) { - return failure(); - } - - using AV = affine::AffineValueExpr; - affine::AffineBuilder ab(b, loc); - AffineExpr dim0, sym; - bindDims(b.getContext(), dim0); - bindSymbols(b.getContext(), sym); - auto avOffset = AV(dim0).bind(offsets[dim]); - auto avSize = AV(dim0).bind(sizes[dim]); - auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]); - outerDimOffsets.push_back(ab.floor(avOffset, avTileSize)); - outerDimSizes.push_back(ab.ceil(avSize, avTileSize)); - } else { - outerDimOffsets.push_back(offsets[dim]); - outerDimSizes.push_back(sizes[dim]); - } - } - applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm()); - resultOffsets = outerDimOffsets; - resultSizes = outerDimSizes; - return success(); - } - - /// Method to return the tiled implementation of tensor.pack as a consumer. - FailureOr getTiledImplementationFromOperandTile( - Operation *op, OpBuilder &b, unsigned operandNumber, - ArrayRef offsets, ArrayRef sizes) const { - if (operandNumber != 0) - return failure(); - - auto packOp = cast(op); - Location loc = packOp.getLoc(); - - int64_t inputRank = packOp.getSourceRank(); - auto oneAttr = b.getI64IntegerAttr(1); - SmallVector strides(inputRank, oneAttr); - - SmallVector tiledOperands; - auto sourceSlice = b.create(loc, packOp.getSource(), - offsets, sizes, strides); - tiledOperands.push_back(sourceSlice); - - SmallVector outerDimOffsets, outerDimSizes; - if (failed(getIterationDomainTileFromOperandTile( - op, b, /*operandNumber=*/0, offsets, sizes, outerDimOffsets, - outerDimSizes))) - return failure(); - - SmallVector outputOffsets, outputSizes; - if (failed(getResultTilePosition(op, b, 0, outerDimOffsets, outerDimSizes, - outputOffsets, outputSizes))) - return failure(); - - strides.append(packOp.getDestRank() - inputRank, oneAttr); - auto outSlice = b.create( - loc, packOp.getDest(), outputOffsets, outputSizes, strides); - tiledOperands.push_back(outSlice); - - assert(!packOp.getPaddingValue() && "Expect no padding semantic"); - for (auto tile : packOp.getInnerTiles()) - tiledOperands.push_back(tile); - - Operation *tiledPackOp = b.create( - loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs()); - - return TilingResult{ - {tiledPackOp}, - SmallVector(tiledPackOp->getResults()), - llvm::to_vector(ArrayRef{sourceSlice, outSlice})}; - } -}; - -struct UnpackTileDimInfo { - bool isAlignedToInnerTileSize; - OpFoldResult sourceOffset; - OpFoldResult sourceSize; - OpFoldResult resultOffset; - OpFoldResult destExpandedSize; -}; - -/// Returns the needed information for tiling unpack op on `tileDim` with given -/// `tileOffset` and `tileSize`. For more details, see the comment of the -/// `getTiledImplementation`. -static UnpackTileDimInfo getUnpackTileDimInfo(OpBuilder &b, UnPackOp unpackOp, - int64_t tileDim, - OpFoldResult tileOffset, - OpFoldResult tileSize) { - UnpackTileDimInfo info; - Attribute zeroAttr = b.getIndexAttr(0); - Attribute oneAttr = b.getIndexAttr(1); - DenseMap dimAndTileMapping = - unpackOp.getDimAndTileMapping(); - // The dimension is not one of packed data dimension. - if (!dimAndTileMapping.count(tileDim)) { - info.isAlignedToInnerTileSize = true; - info.sourceOffset = tileOffset; - info.sourceSize = tileSize; - info.resultOffset = zeroAttr; - info.destExpandedSize = tileSize; - return info; - } - - Location loc = unpackOp.getLoc(); - using AV = affine::AffineValueExpr; - affine::AffineBuilder ab(b, loc); - AffineExpr dim0, dim1, sym0; - bindDims(b.getContext(), dim0, dim1); - bindSymbols(b.getContext(), sym0); - - OpFoldResult innerTileSize = dimAndTileMapping[tileDim]; - - info.isAlignedToInnerTileSize = false; - FailureOr cstSize = ValueBoundsConstraintSet::computeConstantBound( - presburger::BoundType::UB, tileSize, - /*stopCondition=*/nullptr, /*closedUB=*/true); - std::optional cstInnerSize = getConstantIntValue(innerTileSize); - if (!failed(cstSize) && cstInnerSize) { - if (*cstSize % *cstInnerSize == 0) - info.isAlignedToInnerTileSize = true; - - // If the tiling size equals to the inner tiling size, the outer dims are - // always 1. - if (*cstInnerSize == *cstSize) { - auto lhs = AV(dim0).bind(tileOffset); - auto rhs = AV(dim1).bind(innerTileSize); - info.sourceOffset = ab.floor(lhs, rhs); - info.sourceSize = oneAttr; - info.resultOffset = zeroAttr; - info.destExpandedSize = tileSize; - return info; - } - } - - if (info.isAlignedToInnerTileSize) { - info.sourceOffset = - ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize)); - info.resultOffset = zeroAttr; - info.destExpandedSize = tileSize; - - // The ceilDiv is needed here because there could be incomplete tile even - // it is perfect tiling cases. E.g., - // %0 = unpack tensor<33x2xf32> into tensor<64xf32> - // If the tiling size is 32, there will be 3 tiles. Two of them have - // size=32; one of them have size=2. The size is represented using - // affine_min op; we need ceilDiv. - info.sourceSize = - ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize)); - return info; - } - - affine::DivModValue firstCoord = affine::getDivMod( - b, loc, getValueOrCreateConstantIndexOp(b, loc, tileOffset), - getValueOrCreateConstantIndexOp(b, loc, innerTileSize)); - OpFoldResult tileExclusiveBound = - ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize)); - affine::DivModValue lastCoord = affine::getDivMod( - b, loc, - getValueOrCreateConstantIndexOp( - b, loc, - ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))), - getValueOrCreateConstantIndexOp(b, loc, innerTileSize)); - - OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient), - AV(dim1).bind(firstCoord.quotient)); - info.sourceSize = - ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr)); - info.sourceOffset = firstCoord.quotient; - info.resultOffset = firstCoord.remainder; - // Do not create an Affine ops for expanded size because the affine op is too - // complicated which would trigger an issue in affine ops simplification. - info.destExpandedSize = b.createOrFold( - loc, getValueOrCreateConstantIndexOp(b, loc, info.sourceSize), - getValueOrCreateConstantIndexOp(b, loc, innerTileSize)); - return info; -} - -struct UnPackOpTiling - : public TilingInterface::ExternalModel { - - SmallVector getLoopIteratorTypes(Operation *op) const { - auto unpackOp = cast(op); - SmallVector iteratorTypes( - unpackOp.getDestRank(), utils::IteratorType::parallel); - return iteratorTypes; - } - - SmallVector getIterationDomain(Operation *op, OpBuilder &b) const { - return getPackUnPackIterationDomain(cast(op), b); - } - - /// There are two cases in tiling unpack ops. If the tiling size is aligned to - /// the inner tile size, the corresponding tiles of source are all complete. - /// Otherwise, there are in-complete tiles. We will need to expand the slice - /// of source for getting complete tiles. The tiled unpack op unpacks more - /// data from source, so We'll need an extract_slice op to shift and truncate - /// the output. - /// Take Nn_to_N as an example. Say that N=32, n=8, and tiling_size=15. The - /// coordinates of second tile (i.e., result[15..31]) are - /// [(1, 7), (2, 0,), (2, 1) ... (3, 6), (3, 7)]. The first row and the last - /// row are incomplete tiles. To represent the unpack op, we have to complete - /// the rows. I.e., the input coordinates would start with (1, 0); end with - /// (3, 7). In this context, the tiled unpack produces a (3 * n) elements - /// because there are 3 rows in total. Follow by a tensor.extract_slice op, we - /// can get the actual result. - FailureOr - getTiledImplementation(Operation *op, OpBuilder &b, - ArrayRef offsets, - ArrayRef sizes) const { - auto unpackOp = cast(op); - int64_t srcRank = unpackOp.getSourceRank(); - int64_t destRank = unpackOp.getDestRank(); - int64_t numInnerTiles = srcRank - destRank; - Location loc = unpackOp.getLoc(); - - // The perfect tiling case indicates that the tiling sizes are multiple of - // inner_tile_size. In this context, no extra data is needed when - // representing the tiled unpack op. - bool isPerfectTilingCase = true; - Attribute oneAttr = b.getIndexAttr(1); - SmallVector sliceSrcStrides(destRank, oneAttr); - SmallVector sliceSrcIndices, sliceSrcSizes; - SmallVector destExpandedSizes, resultOffsetsFromDest; - for (auto dim : llvm::seq(0, destRank)) { - UnpackTileDimInfo info = - getUnpackTileDimInfo(b, unpackOp, dim, offsets[dim], sizes[dim]); - if (!info.isAlignedToInnerTileSize) - isPerfectTilingCase = false; - sliceSrcIndices.push_back(info.sourceOffset); - sliceSrcSizes.push_back(info.sourceSize); - destExpandedSizes.push_back(info.destExpandedSize); - resultOffsetsFromDest.push_back(info.resultOffset); - } - - // The tiling is applied on destination dimensions. We have to apply the - // interchange on source dimensions if outer_dims_perm is set. - applyPermToRange(sliceSrcIndices, sliceSrcSizes, - unpackOp.getOuterDimsPerm()); - Attribute zeroAttr = b.getIndexAttr(0); - sliceSrcIndices.append(numInnerTiles, zeroAttr); - sliceSrcSizes.append(unpackOp.getMixedTiles()); - sliceSrcStrides.append(numInnerTiles, oneAttr); - SmallVector generatedSlices; - ExtractSliceOp sliceSource = - b.create(loc, unpackOp.getSource(), sliceSrcIndices, - sliceSrcSizes, sliceSrcStrides); - generatedSlices.push_back(sliceSource); - - SmallVector destStrides(destRank, oneAttr); - Value sliceDest; - if (isPerfectTilingCase) { - auto destSliceOp = b.create(loc, unpackOp.getDest(), - offsets, sizes, destStrides); - sliceDest = destSliceOp; - generatedSlices.push_back(destSliceOp); - } else { - sliceDest = b.create(loc, destExpandedSizes, - unpackOp.getDestType().getElementType()); - } - - SmallVector tiledOperands = {sliceSource.getResult(), sliceDest}; - for (auto tile : unpackOp.getInnerTiles()) - tiledOperands.push_back(tile); - - Operation *tiledUnpackOp = b.create( - loc, TypeRange{sliceDest.getType()}, tiledOperands, op->getAttrs()); - - if (isPerfectTilingCase) - return TilingResult{{tiledUnpackOp}, - SmallVector(tiledUnpackOp->getResults()), - generatedSlices}; - - auto extractSlice = - b.create(loc, tiledUnpackOp->getResult(0), - resultOffsetsFromDest, sizes, destStrides); - return TilingResult{ - {tiledUnpackOp}, {extractSlice.getResult()}, generatedSlices}; - } - - LogicalResult - getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber, - ArrayRef offsets, - ArrayRef sizes, - SmallVector &resultOffsets, - SmallVector &resultSizes) const { - resultOffsets = llvm::to_vector(offsets); - resultSizes = llvm::to_vector(sizes); - return success(); - } - - FailureOr - generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber, - ArrayRef offsets, - ArrayRef sizes) const { - FailureOr tilingResult = - getTiledImplementation(op, b, offsets, sizes); - if (failed(tilingResult)) - return failure(); - return tilingResult.value(); - } - - /// Method to return the position of iteration domain tile computed by the - /// tiled operation. - LogicalResult getIterationDomainTileFromOperandTile( - Operation *op, OpBuilder &b, unsigned operandNumber, - ArrayRef offsets, ArrayRef sizes, - SmallVectorImpl &resultOffsets, - SmallVectorImpl &resultSizes) const { - auto unPackOp = cast(op); - // If the operand tile is the dest, then no adjustment is needed. - if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) { - resultOffsets = llvm::to_vector(offsets); - resultSizes = llvm::to_vector(sizes); - return success(); - } - Location loc = unPackOp.getLoc(); - - int64_t numTiles = unPackOp.getInnerDimsPos().size(); - auto destOffsets = offsets.drop_back(numTiles); - auto destSizes = sizes.drop_back(numTiles); - // The tiling is applied on interchanged dimensions. We have to undo the - // interchange to map sizes and offsets to the original input. - int64_t outputRank = unPackOp.getDestRank(); - ReifiedRankedShapedTypeDims reifiedReturnShapes; - if (failed(reifyResultShapes(b, unPackOp, reifiedReturnShapes))) - return failure(); - SmallVector outputMixedSizes = reifiedReturnShapes.front(); - SmallVector origOffsets(destOffsets); - SmallVector origSizes(destSizes); - applyPermToRange(origOffsets, origSizes, - invertPermutationVector(unPackOp.getOuterDimsPerm())); - - DenseMap dimAndTileMapping = - unPackOp.getDimAndTileMapping(); - - for (auto dim : llvm::seq(0, outputRank)) { - using AV = affine::AffineValueExpr; - affine::AffineBuilder ab(b, loc); - AffineExpr dim0, dim1, sym0; - bindDims(b.getContext(), dim0, dim1); - bindSymbols(b.getContext(), sym0); - if (dimAndTileMapping.count(dim)) { - // If the data dimension is tiled, the i-th index is the product of - // offset_i and tile_i, and the i-th size is the product of sizes_i and - // tile_i. The sizes must be clamped to the sizes of the unpack result. - auto avOffset = AV(dim0).bind(origOffsets[dim]); - auto avSize = AV(dim0).bind(origSizes[dim]); - auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]); - auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]); - resultOffsets.push_back(ab.mul(avOffset, avTileSize)); - auto avResultOffset = AV(dim1).bind(resultOffsets.back()); - resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize), - ab.sub(avResultSize, avResultOffset)})); - } else { - resultOffsets.push_back(origOffsets[dim]); - resultSizes.push_back(origSizes[dim]); - } - } - return success(); - } - - /// Method to return the tiled implementation of tensor.unpack as a consumer. - FailureOr getTiledImplementationFromOperandTile( - Operation *op, OpBuilder &b, unsigned operandNumber, - ArrayRef offsets, ArrayRef sizes) const { - auto unPackOp = cast(op); - // tensor.unpack op is fusible (as a consumer) only if inner dims are not - // tiled. - int64_t numTiles = unPackOp.getInnerDimsPos().size(); - for (auto iter : - llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) { - if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter))) - return failure(); - } - - Location loc = unPackOp.getLoc(); - - // Fetch offset/size for creating the slice of the dest operand of - // unpack op. - SmallVector outputOffsets, outputSizes; - if (failed(getIterationDomainTileFromOperandTile( - op, b, /*operandNumber=*/0, offsets, sizes, outputOffsets, - outputSizes))) - return failure(); - - auto oneAttr = b.getI64IntegerAttr(1); - int64_t outputRank = unPackOp.getDestRank(); - SmallVector strides(outputRank, oneAttr); - - SmallVector tiledOperands; - // Create slice of the dest operand. - auto extractDestSlice = b.create( - loc, unPackOp.getDest(), outputOffsets, outputSizes, strides); - tiledOperands.push_back(extractDestSlice); - - SmallVector inputOffsets, inputSizes; - strides.append(unPackOp.getSourceRank() - outputRank, oneAttr); - // Create slice of the source operand. - auto extractSourceSlice = b.create( - loc, unPackOp.getSource(), offsets, sizes, strides); - tiledOperands.insert(tiledOperands.begin(), extractSourceSlice); - for (auto tile : unPackOp.getInnerTiles()) - tiledOperands.push_back(tile); - - // Create tiled unpack op. - Operation *tiledUnPackOp = - b.create(loc, TypeRange{extractDestSlice.getType()}, - tiledOperands, op->getAttrs()); - - return TilingResult{{tiledUnPackOp}, - SmallVector(tiledUnPackOp->getResults()), - llvm::to_vector(ArrayRef{ - extractSourceSlice, extractDestSlice})}; - } -}; - } // namespace FailureOr tensor::bubbleUpPadSlice(OpBuilder &b, @@ -949,15 +307,5 @@ void mlir::tensor::registerTilingInterfaceExternalModels( DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, TensorDialect *dialect) { tensor::PadOp::attachInterface(*ctx); - tensor::PackOp::attachInterface(*ctx); - tensor::UnPackOp::attachInterface(*ctx); - }); -} - -void mlir::tensor::registerTilingInterfaceExternalModelsForPackUnPackOps( - DialectRegistry ®istry) { - registry.addExtension(+[](MLIRContext *ctx, TensorDialect *dialect) { - tensor::PackOp::attachInterface(*ctx); - tensor::UnPackOp::attachInterface(*ctx); }); } diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp index 5c16e538ac242..52462aae4bc80 100644 --- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp @@ -92,61 +92,6 @@ mlir::tensor::computeTransposedType(RankedTensorType rankedTensorType, return transposedTensorType; } -/// The permutation can be obtained from two permutations: -/// a) Compute the permutation vector to move the last `numPackedDims` into -/// the `innerPosDims` of a shape of rank `rank`. -/// b) Compute the permutation vector to move outer dims if the -/// `outerPerm` parameter is not empty. -/// Apply (b) permutation on (a) permutation to get the final permutation. -static SmallVector -computePackUnPackPerm(int64_t rank, ArrayRef &innerDimsPos, - ArrayRef &outerPerm, - PackingMetadata &packingMetadata) { - int64_t numPackedDims = innerDimsPos.size(); - auto lastDims = - llvm::to_vector(llvm::seq(rank - numPackedDims, rank)); - packingMetadata = computePackingMetadata(rank, innerDimsPos); - SmallVector innerPositionsPerm = - computePermutationVector(rank, lastDims, packingMetadata.insertPositions); - - SmallVector outerPos = packingMetadata.outerPositions; - if (!outerPerm.empty()) - applyPermutationToVector(outerPos, outerPerm); - SmallVector outerPositionPerm = - computePermutationVector(rank, packingMetadata.outerPositions, outerPos); - - SmallVector packInverseDestPermutation = innerPositionsPerm; - applyPermutationToVector(packInverseDestPermutation, outerPositionPerm); - return packInverseDestPermutation; -} - -SmallVector mlir::tensor::getPackInverseDestPerm(PackOp packOp) { - - PackingMetadata pMetadata; - int64_t packedRank = packOp.getDestType().getRank(); - ArrayRef innerDimPos = packOp.getInnerDimsPos(); - ArrayRef outerPerm = packOp.getOuterDimsPerm(); - SmallVector packInvDestPerm = - computePackUnPackPerm(packedRank, innerDimPos, outerPerm, pMetadata); - return packInvDestPerm; -} - -SmallVector mlir::tensor::getUnPackInverseSrcPerm(UnPackOp unpackOp) { - PackingMetadata metadata; - return mlir::tensor::getUnPackInverseSrcPerm(unpackOp, metadata); -} - -SmallVector -mlir::tensor::getUnPackInverseSrcPerm(UnPackOp unpackOp, - PackingMetadata &metadata) { - int64_t unpackRank = unpackOp.getSourceType().getRank(); - ArrayRef innerDimPos = unpackOp.getInnerDimsPos(); - ArrayRef outerPerm = unpackOp.getOuterDimsPerm(); - SmallVector unpackInvSrcPerm = - computePackUnPackPerm(unpackRank, innerDimPos, outerPerm, metadata); - return unpackInvSrcPerm; -} - bool mlir::tensor::isCastLikeInsertSliceOp(InsertSliceOp op) { llvm::SmallBitVector droppedDims = op.getDroppedDims(); int64_t srcDim = 0;