diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
index 98a5fd278a997..f5cf53117d773 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
@@ -178,6 +178,16 @@ def LinalgConvolutionOpInterface : OpInterface<"ConvolutionOpInterface"> {
   ];
 }
 
+def LinalgRelayoutOpInterface : OpInterface<"RelayoutOpInterface"> {
+  let description = [{
+    A Linalg relayout-op is either linalg.pack or linalg.unpack.
+
+    While we could extend this interface with methods from Linalg_RelayoutOp,
+    this is currently not needed and left as a TODO.
+  }];
+  let cppNamespace = "::mlir::linalg";
+}
+
 def LinalgFillOpInterface : OpInterface<"FillOpInterface"> {
   let description = [{
     A fill operation is defined in general terms:
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
index 6fbc7c0c25e6f..a08a778fc25e1 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
@@ -22,6 +22,7 @@ include "mlir/Dialect/Linalg/IR/LinalgBase.td"
 include "mlir/Interfaces/DestinationStyleOpInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
+include "mlir/Dialect/Linalg/IR/LinalgInterfaces.td"
 include "mlir/IR/OpAsmInterface.td"
 
 //===----------------------------------------------------------------------===//
@@ -31,7 +32,7 @@ include "mlir/IR/OpAsmInterface.td"
 class Linalg_RelayoutOp<string mnemonic, list<Trait> traits = []> :
       Op<Linalg_Dialect, mnemonic, !listconcat(traits, [
         DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
-        DestinationStyleOpInterface,
+        DestinationStyleOpInterface, LinalgRelayoutOpInterface,
         ConditionallySpeculatable, NoMemoryEffect,
         DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>,
         TypesMatchWith<"result type matches type of dest",
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index e86d175489775..12080cee85c9d 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -45,7 +45,7 @@ def ApplyDecomposeTensorPackUnpackPatternsOp
     : Op<Transform_Dialect, "apply_patterns.linalg.decompose_pack_unpack",
          [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
   let description = [{
-    Collect patterns to decompose tensor.pack and tensor.unpack into e.g.
+    Collect patterns to decompose linalg.pack and linalg.unpack into e.g.
     tensor::PadOp, linalg::transposeOp Ops. Requires all outer dims to be unit.
   }];
 
@@ -126,6 +126,28 @@ def ApplyPadVectorizationPatternsOp : Op<Transform_Dialect,
   let assemblyFormat = "attr-dict";
 }
 
+def ApplyFoldIntoPackAndUnpackPatternsOp : Op<Transform_Dialect,
+    "apply_patterns.tensor.fold_into_pack_and_unpack",
+    [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
+  let description = [{
+    Indicates that operations like tensor.pad and tensor.extract_slice should
+    be folded into linalg.pack and linalg.unpack operations, respectively.
+  }];
+
+  let assemblyFormat = "attr-dict";
+}
+
+def ApplyFoldPackUnpackIntoEmptyPatternsOp : Op<Transform_Dialect,
+    "apply_patterns.linalg.fold_pack_unpack_into_empty",
+    [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
+  let description = [{
+    // TODO:
+  }];
+
+  let arguments = (ins DefaultValuedAttr<BoolAttr, "false">:$fold_single_use_only);
+  let assemblyFormat = "attr-dict";
+}
+
 //===----------------------------------------------------------------------===//
 // BufferizeToAllocationOp
 //===----------------------------------------------------------------------===//
@@ -547,19 +569,18 @@ def LowerPackOp : Op<Transform_Dialect, "structured.lower_pack", [
                          TransformOpInterface,
                          ReportTrackingListenerFailuresOpTrait]> {
   let description = [{
-    Rewrite a tensor.pack into tensor.pad + tensor.expand_shape + linalg.transpose.
+    Rewrite a linalg.pack into tensor.pad + tensor.expand_shape + linalg.transpose.
 
     #### Return modes
 
-    This operation ignores non-pack ops and drops them in the return.
-    This operation produces a silenceable failure if the rewrite fails for any
-    reason.
-    If all the operations referred to by the `target` are rewritten, the
-    transform succeeds.
-    Return handles to the newly produced pad, expand_shape and transpose ops.
+    This operation ignores non-pack ops and drops them in the return. This
+    operation produces a silenceable failure if the rewrite fails for any
+    reason. If all the operations referred to by the `target` are rewritten,
+    the transform succeeds. Return handles to the newly produced pad,
+    expand_shape and transpose ops.
   }];
 
-  let arguments = (ins Transform_ConcreteOpType<"tensor.pack">:$target,
+  let arguments = (ins Transform_ConcreteOpType<"linalg.pack">:$target,
                        DefaultValuedAttr<BoolAttr, "true">:$lowerPadLikeWithInsertSlice);
   let results = (outs Transform_ConcreteOpType<"tensor.pad">:$pad_op,
                       Transform_ConcreteOpType<"tensor.expand_shape">:$expand_shape_op,
@@ -571,7 +592,7 @@ def LowerPackOp : Op<Transform_Dialect, "structured.lower_pack", [
   let extraClassDeclaration = [{
     ::mlir::DiagnosedSilenceableFailure applyToOne(
         ::mlir::transform::TransformRewriter &rewriter,
-        ::mlir::tensor::PackOp target,
+        ::mlir::linalg::PackOp target,
         ::mlir::transform::ApplyToEachResultList &transformResults,
         ::mlir::transform::TransformState &state);
   }];
@@ -587,20 +608,19 @@ def LowerUnPackOp : Op<Transform_Dialect, "structured.lower_unpack", [
                          TransformOpInterface,
                          ReportTrackingListenerFailuresOpTrait]> {
   let description = [{
-    Lower a tensor.unpack into empty + linalg.transpose + tensor.collapse_shape +
+    Lower a linalg.unpack into empty + linalg.transpose + tensor.collapse_shape +
     tensor.extract_slice.
 
     #### Return modes
 
-    This operation ignores non-unpack ops and drops them in the return.
-    This operation produces a silenceable failure if the rewrite fails for any
-    reason.
-    If all the operations referred to by the `target` are rewritten, the
-    transform succeeds.
-    Return handles to the newly produced empty, transpose, collapse_shape and extract_slice ops.
+    This operation ignores non-unpack ops and drops them in the return. This
+    operation produces a silenceable failure if the rewrite fails for any
+    reason. If all the operations referred to by the `target` are rewritten,
+    the transform succeeds. Return handles to the newly produced empty,
+    transpose, collapse_shape and extract_slice ops.
   }];
 
-  let arguments = (ins Transform_ConcreteOpType<"tensor.unpack">:$target,
+  let arguments = (ins Transform_ConcreteOpType<"linalg.unpack">:$target,
                        DefaultValuedAttr<BoolAttr, "true">:$lowerUnpadLikeWithExtractSlice);
   let results = (outs Transform_ConcreteOpType<"tensor.empty">:$empty_op,
                       Transform_ConcreteOpType<"linalg.transpose">:$transpose_op,
@@ -613,7 +633,7 @@ def LowerUnPackOp : Op<Transform_Dialect, "structured.lower_unpack", [
   let extraClassDeclaration = [{
     ::mlir::DiagnosedSilenceableFailure applyToOne(
         ::mlir::transform::TransformRewriter &rewriter,
-        ::mlir::tensor::UnPackOp target,
+        ::mlir::linalg::UnPackOp target,
         ::mlir::transform::ApplyToEachResultList &transformResults,
         ::mlir::transform::TransformState &state);
   }];
@@ -791,7 +811,7 @@ def PackOp : Op<Transform_Dialect, "structured.pack", [
     Specifying a packed size of 0 for an iterator removes it from consideration
     for packing.
 
-    `tensor.pack` (resp. `tensor.unpack`) operations are inserted for the operands
+    `linalg.pack` (resp. `linalg.unpack`) operations are inserted for the operands
     (resp. results) that need to be packed (resp. unpacked) according to the
     `packed_sizes` specification.
 
@@ -980,7 +1000,7 @@ def PackTransposeOp : Op<Transform_Dialect, "structured.pack_transpose", [
                          DeclareOpInterfaceMethods<TransformOpInterface>,
                          ReportTrackingListenerFailuresOpTrait]> {
   let description = [{
-    Apply a transposition to a single `tensor.pack` (resp. `tensor.unpack`) and
+    Apply a transposition to a single `linalg.pack` (resp. `linalg.unpack`) and
     update the `linalg.generic` op that consumes (resp. produces) the operation.
 
     This transform allows composing a simple `structured.pack` with additional
@@ -989,19 +1009,19 @@ def PackTransposeOp : Op<Transform_Dialect, "structured.pack_transpose", [
 
     The transpose spec must specify at least one of `outer_perm` or `inner_perm`
     attributes, which will act upon the `outer_dims_perm` or `inner_dims_pos` of
-    the specified `tensor.pack` or `tensor.unpack` op.
+    the specified `linalg.pack` or `linalg.unpack` op.
 
-    If the `target` of this op is a `tensor.pack` then a new `tensor.empty` will
-    be created along with transposed versions of the `tensor.pack` and the
+    If the `target` of this op is a `linalg.pack` then a new `tensor.empty` will
+    be created along with transposed versions of the `linalg.pack` and the
     consuming `linalg.generic`, which is expected to be the sole consumer.
 
-    If the `target` of this op is a `tensor.unpack` then the whole pack / compute
-    / unpack chain will be transposed and transposed clones of `tensor.pack`,
-    the consuming `linalg.generic` and the tail `tensor.pack` will be created.
+    If the `target` of this op is a `linalg.unpack` then the whole pack / compute
+    / unpack chain will be transposed and transposed clones of `linalg.pack`,
+    the consuming `linalg.generic` and the tail `linalg.pack` will be created.
 
     #### Return modes
 
-    This operation targets a single `tensor.pack` / `tensor.unpack` op and a
+    This operation targets a single `linalg.pack` / `linalg.unpack` op and a
     single matching `linalg.generic` that consumes / produces the op. Otherwise,
     it produces a silenceableFailure.
 
@@ -1011,9 +1031,9 @@ def PackTransposeOp : Op<Transform_Dialect, "structured.pack_transpose", [
     reason.
 
     This operation returns 3 handles, one to the transformed LinalgOp, one to
-    the transformed `tensor.pack` and one to the transformed `tensor.unpack`.
-    The last handle for `tensor.unpack` is empty if `target_pack_or_unpack_op`
-    was not itself a `tensor.unpack`.
+    the transformed `linalg.pack` and one to the transformed `linalg.unpack`.
+    The last handle for `linalg.unpack` is empty if `target_pack_or_unpack_op`
+    was not itself a `linalg.unpack`.
   }];
 
   let arguments = (ins TransformHandleTypeInterface:$target_pack_or_un_pack_op,
@@ -1143,7 +1163,7 @@ def HoistPadBuildPackingLoopNestOp :
     creates the packing loop nest required by the hoist_pad operation and makes
     that functionality available independently.
 
-    TODO: In the future, we should consider rewriting as a tensor.pack after
+    TODO: In the future, we should consider rewriting as a linalg.pack after
     hoisting since this abstraction is now available.
 
     #### Return modes
@@ -1182,7 +1202,7 @@ def HoistPadOp : Op<Transform_Dialect, "structured.hoist_pad",
     Hoist the tensor.pad target operation by at most the given number of loops.
     Optionally apply the transpose attribute to the inner dimensions.
 
-    TODO: In the future, we should consider rewriting as a tensor.pack after
+    TODO: In the future, we should consider rewriting as a linalg.pack after
     hoisting since this abstraction is now available.
     TODO: Maybe also return the linalg.generic transpose created at some point.
 
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h b/mlir/include/mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h
index 5b88f1d05ce84..3cc30123afb25 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h
@@ -14,6 +14,11 @@ class DialectRegistry;
 
 namespace linalg {
 void registerTilingInterfaceExternalModels(DialectRegistry &registry);
+
+/// Similar to the above registeration, but it is only for `tensor.pack` and
+/// `tensor.unpack` ops.
+void registerTilingInterfaceExternalModelsForPackUnPackOps(
+    DialectRegistry &registry);
 } // namespace linalg
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index 2a3ce0519ab53..2bfa0c4b4d349 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -572,7 +572,7 @@ buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist,
 /// packed tensor. A `transposeVector` can change the storage order of the
 /// padded tensor but does not change the order of the pack or compute loops.
 ///
-/// TODO: In the future, we should consider rewriting as a tensor.pack after
+/// TODO: In the future, we should consider rewriting as a linalg.pack after
 /// hoisting since this abstraction is now available.
 ///
 /// Example in pseudo-mlir:
@@ -1121,7 +1121,7 @@ struct LowerPackResult {
 
 /// Rewrite pack as pad + reshape + transpose.
 FailureOr<LowerPackResult> lowerPack(RewriterBase &rewriter,
-                                     tensor::PackOp packOp,
+                                     linalg::PackOp packOp,
                                      bool lowerPadLikeWithInsertSlice = true);
 
 struct LowerUnPackOpResult {
@@ -1133,14 +1133,14 @@ struct LowerUnPackOpResult {
 
 /// Rewrite pack as empty + transpose + reshape + extract_slice.
 FailureOr<LowerUnPackOpResult>
-lowerUnPack(RewriterBase &rewriter, tensor::UnPackOp unPackOp,
+lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
             bool lowerUnpadLikeWithExtractSlice = true);
 
 /// Struct to hold the result of a `pack` call.
 struct PackResult {
-  SmallVector<tensor::PackOp> packOps;
+  SmallVector<linalg::PackOp> packOps;
   linalg::LinalgOp packedLinalgOp;
-  SmallVector<tensor::UnPackOp> unPackOps;
+  SmallVector<linalg::UnPackOp> unPackOps;
 };
 /// Implement packing of a single LinalgOp by `packedSizes`.
 /// There must be one packedSizes entry per `linalgOp` iterator.
@@ -1150,9 +1150,9 @@ FailureOr<PackResult> pack(RewriterBase &rewriter, linalg::LinalgOp linalgOp,
 
 /// Struct to hold the result of a `packTranspose` call.
 struct PackTransposeResult {
-  tensor::PackOp transposedPackOp;
+  linalg::PackOp transposedPackOp;
   linalg::LinalgOp transposedLinalgOp;
-  tensor::UnPackOp transposedUnPackOp;
+  linalg::UnPackOp transposedUnPackOp;
 };
 /// Transpose a single PackOp -> LinalgOp -> UnPackOp chain and return the
 /// transposed PackOp -> LinalgOp -> UnPackOp chain after replacements.
@@ -1163,8 +1163,8 @@ struct PackTransposeResult {
 ///   3. `outerPerm` (resp. `innerPerm`) must be valid permutations of
 ///      `packOp.getOuterDimsPerm` (resp. `packOp.getInnerDimsPerm`) or empty.
 FailureOr<PackTransposeResult>
-packTranspose(RewriterBase &rewriter, tensor::PackOp packOp,
-              linalg::LinalgOp linalgOp, tensor::UnPackOp maybeUnPackOp,
+packTranspose(RewriterBase &rewriter, linalg::PackOp packOp,
+              linalg::LinalgOp linalgOp, linalg::UnPackOp maybeUnPackOp,
               ArrayRef<int64_t> outerPerm, ArrayRef<int64_t> innerPerm);
 
 /// Pack a LinalgOp by greedily inferring matmul dimensions (m, n, k) where m
@@ -1525,15 +1525,15 @@ struct DecomposePadOpPattern : public OpRewritePattern<tensor::PadOp> {
                                const SmallVector<Value> &dynSizes) const;
 };
 
-/// Rewrites a tensor::PackOp into a sequence of:
+/// Rewrites a linalg::PackOp into a sequence of:
 ///   * tensor::PadOp + linalg::TransposeOp + tensor::EmptyOp +
 ///     tensor::InsertSliceOp ops.
 ///
-/// Requires that all the outer dims of the input tensor::PackOp are 1.
+/// Requires that all the outer dims of the input linalg::PackOp are 1.
 ///
 /// Before:
 /// ```
-///   %packed = tensor.pack %input
+///   %packed = linalg.pack %input
 ///     padding_value(%pad : f32)
 ///     inner_dims_pos = [1, 0]
 ///     inner_tiles = [2, %high]
@@ -1559,20 +1559,20 @@ struct DecomposePadOpPattern : public OpRewritePattern<tensor::PadOp> {
 ///     : tensor<2x?xf32> into tensor<1x1x2x?xf32>
 /// ```
 struct DecomposeOuterUnitDimsPackOpPattern
-    : public OpRewritePattern<tensor::PackOp> {
-  using OpRewritePattern<tensor::PackOp>::OpRewritePattern;
-  LogicalResult matchAndRewrite(tensor::PackOp packOp,
+    : public OpRewritePattern<linalg::PackOp> {
+  using OpRewritePattern<linalg::PackOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(linalg::PackOp packOp,
                                 PatternRewriter &rewriter) const override;
 };
 
-/// Rewrites a tensor::UnPackOp into a sequence of rank-reduced
+/// Rewrites a linalg::UnPackOp into a sequence of rank-reduced
 ///   * tensor::ExtractSliceOp + linalg::TransposeOp + tensor::InsertSliceOp
 ///
-/// Requires that all the outer dims of the input tensor::PackOp are 1.
+/// Requires that all the outer dims of the input linalg::PackOp are 1.
 ///
 /// Before:
 /// ```
-/// %packed = tensor.unpack %input
+/// %packed = linalg.unpack %input
 ///   inner_dims_pos = [1, 0]
 ///   inner_tiles = [2, 8]
 ///   into %output : tensor<1x1x2x8xf32> -> tensor<5x1xf32>
@@ -1593,9 +1593,9 @@ struct DecomposeOuterUnitDimsPackOpPattern
 ///     : tensor<8x2xf32> to tensor<5x1xf32>
 /// ```
 struct DecomposeOuterUnitDimsUnPackOpPattern
-    : public OpRewritePattern<tensor::UnPackOp> {
-  using OpRewritePattern<tensor::UnPackOp>::OpRewritePattern;
-  LogicalResult matchAndRewrite(tensor::UnPackOp unpackOp,
+    : public OpRewritePattern<linalg::UnPackOp> {
+  using OpRewritePattern<linalg::UnPackOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(linalg::UnPackOp unpackOp,
                                 PatternRewriter &rewriter) const override;
 };
 
@@ -1717,7 +1717,7 @@ void populateLinalgGenericOpsSpecializationPatterns(
 void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns,
                                           PatternBenefit benefit = 1);
 
-/// Populates patterns to decompose tensor.pack and tensor.unpack Ops into e.g.
+/// Populates patterns to decompose linalg.pack and linalg.unpack Ops into e.g.
 /// tensor.pad, linalg.transpose, tensor.{insert|extract}_slice. Require all
 /// outer dims to be unit.
 void populateDecomposePackUnpackPatterns(RewritePatternSet &patterns);
@@ -1779,7 +1779,7 @@ void populateElementwiseOpsFusionPatterns(
     RewritePatternSet &patterns,
     const ControlFusionFn &controlElementwiseOpFusion);
 
-/// Function type which is used to control propagation of tensor.pack/unpack
+/// Function type which is used to control propagation of linalg.pack/unpack
 /// ops.
 using ControlPropagationFn = std::function<bool(OpOperand *opOperand)>;
 
@@ -1888,6 +1888,19 @@ void populateDecomposeWinogradOpsPatterns(RewritePatternSet &patterns);
 /// convert to a `linalg.dot`.
 void populateContractionOpRankReducingPatterns(RewritePatternSet &patterns);
 
+/// Populates `patterns` with patterns that fold operations like `tensor.pad`
+/// and `tensor.extract_slice` into `tensor.pack` and `tensor.unpack` operations
+/// respectively.
+void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns);
+
+/// Populates `patterns` with patterns that fold operations like `linalg.pack`
+/// and `linalg.unpack` into `tensor.empty`.
+void populateFoldPackUnpackIntoTensorEmptyPatterns(RewritePatternSet &patterns);
+
+/// Populates `patterns` with patterns that simplify `tensor.pack` and
+/// `tensor.unpack` operations.
+void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns);
+
 } // namespace linalg
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index 1e4f3004dec7e..80aa034d2199d 100644
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -33,6 +33,24 @@ namespace linalg {
 //===----------------------------------------------------------------------===//
 // Utilities for inferring various semantics properties of Linalg ops.
 //===----------------------------------------------------------------------===//
+/// Shell function to compute the Destination Permutation of PackOp
+/// This function uses the helper function `computePackUnPackPerm` to get
+/// the permutation vector. Only major difference between UnPack and Pack is
+/// that packOp uses destination rank whereas unpack Uses source rank.
+SmallVector<int64_t> getPackInverseDestPerm(linalg::PackOp packOp);
+
+/// Shell function to compute the Source Permutation of unPackOp.
+/// This function, like the getPackInverseDestPerm uses the helper function
+/// computePackUnPackPerm` to get the permutation vector.
+/// Only major difference between UnPack and Pack is that packOp uses
+/// destination rank whereas unpack Uses source rank.
+SmallVector<int64_t> getUnPackInverseSrcPerm(linalg::UnPackOp unpackOp);
+
+/// Shell function to compute the Source rank permutation for unpackOp
+/// Unpack requires some packing metadata data information, so created
+/// another function where this value is passed by reference.
+SmallVector<int64_t> getUnPackInverseSrcPerm(linalg::UnPackOp,
+                                             PackingMetadata &metadata);
 
 //===----------------------------------------------------------------------===//
 // General utilities
diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
index 81bab1b0c82f7..fcb10f55d556d 100644
--- a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
@@ -53,16 +53,6 @@ def ApplyFoldTensorEmptyPatternsOp : Op<Transform_Dialect,
   let arguments = (ins DefaultValuedAttr<BoolAttr, "false">:$fold_single_use_only);
   let assemblyFormat = "attr-dict";
 }
-def ApplyFoldIntoPackAndUnpackPatternsOp : Op<Transform_Dialect,
-    "apply_patterns.tensor.fold_into_pack_and_unpack",
-    [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
-  let description = [{
-    Indicates that operations like tensor.pad and tensor.extract_slice should
-    be folded into tensor.pack and tensor.unpack operations, respectively.
-  }];
-
-  let assemblyFormat = "attr-dict";
-}
 
 def ApplyFoldTensorSubsetOpsPatternsOp : Op<Transform_Dialect,
     "apply_patterns.tensor.fold_tensor_subset_ops",
diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
index ae695e0326ca1..905ab0577ccc1 100644
--- a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
@@ -86,15 +86,6 @@ void populateFoldTensorEmptyPatterns(RewritePatternSet &patterns,
 /// that it can be bufferized into a sequence of copies.
 void populateDecomposeTensorConcatPatterns(RewritePatternSet &patterns);
 
-/// Populates `patterns` with patterns that simplify `tensor.pack` and
-/// `tensor.unpack` operations.
-void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns);
-
-/// Populates `patterns` with patterns that fold operations like `tensor.pad`
-/// and `tensor.extract_slice` into `tensor.pack` and `tensor.unpack` operations
-/// respectively.
-void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns);
-
 using ControlFoldFn = std::function<bool(OpOperand *)>;
 
 /// Populates `patterns` with patterns that replace tensor ops (such as
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 1aabb693de57c..075887ecc0afe 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -22,6 +22,7 @@
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Tensor/Utils/Utils.h"
 #include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
@@ -847,7 +848,7 @@ struct FoldFillWithTensorExtract : public OpRewritePattern<tensor::ExtractOp> {
 ///   1. The pack op does not have padding value, or
 ///   2. The filled value and padding value are the same.
 static FailureOr<FillOp> foldFillPackIntoFillOp(RewriterBase &rewriter,
-                                                tensor::PackOp packOp) {
+                                                linalg::PackOp packOp) {
   auto fillOp = packOp.getSource().getDefiningOp<FillOp>();
   if (!fillOp)
     return failure();
@@ -865,12 +866,12 @@ static FailureOr<FillOp> foldFillPackIntoFillOp(RewriterBase &rewriter,
 }
 
 /// Wrapper pattern that applies foldFillPackIntoFillOp method.
-struct FoldFillWithPack : public OpRewritePattern<tensor::PackOp> {
+struct FoldFillWithPack : public OpRewritePattern<linalg::PackOp> {
 public:
   FoldFillWithPack(MLIRContext *context)
-      : OpRewritePattern<tensor::PackOp>(context) {}
+      : OpRewritePattern<linalg::PackOp>(context) {}
 
-  LogicalResult matchAndRewrite(tensor::PackOp packOp,
+  LogicalResult matchAndRewrite(linalg::PackOp packOp,
                                 PatternRewriter &rewriter) const override {
     auto fillOp = foldFillPackIntoFillOp(rewriter, packOp);
     if (failed(fillOp))
@@ -3414,20 +3415,9 @@ FailureOr<TilingResult> WinogradOutputTransformOp::getTiledImplementation(
 
 //===----------------------------------------------------------------------===//
 // LinalgDialect
+// TODO: Merge with the LinalgDialect block at the bottom
 //===----------------------------------------------------------------------===//
 
-void LinalgDialect::getCanonicalizationPatterns(
-    RewritePatternSet &results) const {
-  results.add<EraseDeadLinalgOp, FoldTensorCastConsumerOp,
-              InferStaticShapeOfOperands>(getContext());
-}
-
-Operation *LinalgDialect::materializeConstant(OpBuilder &builder,
-                                              Attribute value, Type type,
-                                              Location loc) {
-  return arith::ConstantOp::materialize(builder, value, type, loc);
-}
-
 // Returns true if the result expression of `subMap` are a subset of `fullMap`.
 static bool areResultExprsSubsetOf(AffineMap subMap, AffineMap fullMap) {
   auto explicitRange = subMap.getResults();
@@ -4064,6 +4054,78 @@ Speculation::Speculatability BatchMatmulOp::getSpeculatability() {
 //===----------------------------------------------------------------------===//
 // PackOp/UnPackOp Common
 //===----------------------------------------------------------------------===//
+// FIXME: Duplicates similar hook from TensorOps.cpp!
+bool foldTensorCastPrecondition(DestinationStyleOpInterface op) {
+  // If no operand comes from a tensor::CastOp and can be folded then fail.
+  bool hasTensorCastOperand =
+      llvm::any_of(op->getOpOperands(), [&](OpOperand &opOperand) {
+        if (llvm::isa<BlockArgument>(opOperand.get()))
+          return false;
+        auto castOp = opOperand.get().getDefiningOp<tensor::CastOp>();
+        return castOp && canFoldIntoConsumerOp(castOp);
+      });
+
+  return hasTensorCastOperand;
+}
+
+// FIXME: Duplicates similar hook from TensorOps.cpp!
+static SmallVector<Value> getNewOperands(DestinationStyleOpInterface op,
+                                         SmallVector<Type> &newResTy) {
+  SmallVector<Value> newOperands;
+  newOperands.reserve(op->getNumOperands());
+
+  // Assumes that the result has dpsInits followed by nonDpsInits.
+  int64_t dpsInitIdx = 0;
+  for (OpOperand &opOperand : op->getOpOperands()) {
+    auto tensorCastOp = opOperand.get().getDefiningOp<tensor::CastOp>();
+    bool fold = canFoldIntoConsumerOp(tensorCastOp);
+    newOperands.push_back(fold ? tensorCastOp.getOperand() : opOperand.get());
+    if (op.isDpsInit(&opOperand) &&
+        !llvm::isa<MemRefType>(newOperands.back().getType()))
+      newResTy[dpsInitIdx++] = newOperands.back().getType();
+  }
+  return newOperands;
+}
+
+// Given the (potentially) updated packed type, `newPackedTy`, generates an
+// updated mixed-tile-sizes attribute. A tile size is updated only
+// when:
+//  * a dim from newPackedTy is static, and
+//  * the corresponding size from mixedTiles is still dynamic.
+// Otherwise, the original tile size is preserved.
+// Note - packed-type-dim and mixed-tile-size should always match!
+//
+// FIXME: Duplicates similar hook from TensorOps.cpp!
+static SmallVector<OpFoldResult>
+getNewMixedTileSizes(PatternRewriter &rewriter, Type newPackedTy,
+                     SmallVector<OpFoldResult> mixedTiles) {
+  SmallVector<OpFoldResult> newMixedTileSizes;
+  for (auto it : llvm::zip(cast<ShapedType>(newPackedTy)
+                               .getShape()
+                               .take_back(mixedTiles.size()),
+                           mixedTiles)) {
+    int64_t shape = std::get<0>(it);
+    if (shape == ShapedType::kDynamic) {
+      newMixedTileSizes.push_back(std::get<1>(it));
+      continue;
+    }
+
+    // If the current result dim is static, update the dynamic mixed-size
+    // (provided the original value is dynamic).
+    OpFoldResult tile = std::get<1>(it);
+    if (Attribute attr = llvm::dyn_cast_if_present<Attribute>(tile)) {
+      // Already a constant
+      newMixedTileSizes.push_back(tile);
+    } else {
+      assert(getConstantIntValue(tile).value() == shape &&
+             "tile size and dim size don't match!");
+      newMixedTileSizes.push_back(
+          (rewriter.getIntegerAttr(rewriter.getIndexType(), shape)));
+    }
+  }
+
+  return newMixedTileSizes;
+}
 
 template <typename OpTy>
 static LogicalResult
@@ -4757,6 +4819,58 @@ OpFoldResult PackOp::fold(FoldAdaptor adaptor) {
   return {};
 }
 
+/// Folds a tensor.cast op into a consuming PackOp op if the
+/// `tensor.cast` has source that is more static than the consuming op.
+///
+/// Example:
+/// ```mlir
+///   %1 = tensor.cast %0 : tensor<8x16xf32> to tensor<?x?xf32>
+///   %2 = tensor.pack %1 ... : tensor<?x?xf32> ...
+/// ```
+///
+/// folds into:
+///
+/// ```mlir
+///   %2 = tensor.pack %0 ... : tensor<8x16xf32> ...
+/// ```
+struct FoldTensorCastPackOp : public OpRewritePattern<PackOp> {
+  using OpRewritePattern<PackOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(PackOp op,
+                                PatternRewriter &rewriter) const override {
+    if (!foldTensorCastPrecondition(op))
+      return failure();
+
+    SmallVector<Type> newResultTypes(op->getResultTypes());
+    SmallVector<Value> newOperands = getNewOperands(op, newResultTypes);
+
+    // Get the updated mixed-tile-sizes attribute.
+    SmallVector<OpFoldResult> newMixedTileSizes =
+        getNewMixedTileSizes(rewriter, newResultTypes[0], op.getMixedTiles());
+
+    // Clone op.
+    // TODO: Strictly speaking, discardable attributes should be _discarded_ at
+    // this point. However, in practice, we use them for things that we'd like
+    // to preserve. Implement a better abstraction.
+    PackOp newOp = rewriter.create<PackOp>(
+        op.getLoc(), newOperands[0], newOperands[1], op.getInnerDimsPos(),
+        newMixedTileSizes, op.getPaddingValue(), op.getOuterDimsPerm());
+    newOp->setDiscardableAttrs(op->getDiscardableAttrDictionary());
+
+    // Replace op.
+    Value oldResult = op.getResult();
+    Value newResult = newOp.getResult();
+    Value replacement = (newResult.getType() != oldResult.getType())
+                            ? rewriter.create<tensor::CastOp>(
+                                  op->getLoc(), oldResult.getType(), newResult)
+                            : newResult;
+
+    rewriter.replaceOp(op, {replacement});
+
+    return success();
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // UnPackOp
 //===----------------------------------------------------------------------===//
@@ -4972,5 +5086,74 @@ OpFoldResult UnPackOp::fold(FoldAdaptor adaptor) {
   return {};
 }
 
+/// Folds a tensor.cast op into a consuming UnPackOp op if the
+/// `tensor.cast` has source that is more static than the consuming op.
+///
+/// Example:
+/// ```mlir
+///   %1 = tensor.cast %0 : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32>
+///   %2 = tensor.unpack %1 ... : tensor<1x1x?x1xi32> -> tensor<7x?xi32>
+/// ```
+///
+/// folds into:
+///
+/// ```mlir
+///   %2 = tensor.unpack %0  ... tensor<1x1x8x1xi32> -> tensor<7x?xi32>
+/// ```
+struct FoldTensorCastUnPackOp : public OpRewritePattern<UnPackOp> {
+  using OpRewritePattern<UnPackOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(UnPackOp op,
+                                PatternRewriter &rewriter) const override {
+    if (!foldTensorCastPrecondition(op))
+      return failure();
+
+    SmallVector<Type> newResultTypes(op->getResultTypes());
+    SmallVector<Value> newOperands = getNewOperands(op, newResultTypes);
+    Value sourceTensor = newOperands[0];
+
+    // Get the updated mixed-tile-sizes attribute.
+    SmallVector<OpFoldResult> newMixedTileSizes = getNewMixedTileSizes(
+        rewriter, sourceTensor.getType(), op.getMixedTiles());
+
+    // Clone op.
+    // TODO: Strictly speaking, discardable attributes should be _discarded_ at
+    // this point. However, in practice, we use them for things that we'd like
+    // to preserve. Implement a better abstraction.
+    UnPackOp newOp = rewriter.create<UnPackOp>(
+        op.getLoc(), sourceTensor, newOperands[1], op.getInnerDimsPos(),
+        newMixedTileSizes, op.getOuterDimsPerm());
+    newOp->setDiscardableAttrs(op->getDiscardableAttrDictionary());
+
+    // Replace op.
+    Value oldResult = op.getResult();
+    Value newResult = newOp.getResult();
+    Value replacement = (newResult.getType() != oldResult.getType())
+                            ? rewriter.create<tensor::CastOp>(
+                                  op->getLoc(), oldResult.getType(), newResult)
+                            : newResult;
+
+    rewriter.replaceOp(op, {replacement});
+
+    return success();
+  }
+};
+
 } // namespace linalg
 } // namespace mlir
+
+//===----------------------------------------------------------------------===//
+// LinalgDialect
+//===----------------------------------------------------------------------===//
+
+void LinalgDialect::getCanonicalizationPatterns(
+    RewritePatternSet &results) const {
+  results.add<EraseDeadLinalgOp, FoldTensorCastConsumerOp, FoldTensorCastPackOp,
+              FoldTensorCastUnPackOp, InferStaticShapeOfOperands>(getContext());
+}
+
+Operation *LinalgDialect::materializeConstant(OpBuilder &builder,
+                                              Attribute value, Type type,
+                                              Location loc) {
+  return arith::ConstantOp::materialize(builder, value, type, loc);
+}
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 51d1df52598c7..2f54e780093a2 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -267,6 +267,16 @@ void transform::ApplyPadVectorizationPatternsOp::populatePatterns(
   linalg::populatePadOpVectorizationPatterns(patterns);
 }
 
+void transform::ApplyFoldIntoPackAndUnpackPatternsOp::populatePatterns(
+    RewritePatternSet &patterns) {
+  linalg::populateFoldIntoPackAndUnpackPatterns(patterns);
+}
+
+void transform::ApplyFoldPackUnpackIntoEmptyPatternsOp::populatePatterns(
+    RewritePatternSet &patterns) {
+  linalg::populateFoldPackUnpackIntoTensorEmptyPatterns(patterns);
+}
+
 //===----------------------------------------------------------------------===//
 // BufferizeToAllocationOp
 //===----------------------------------------------------------------------===//
@@ -1170,7 +1180,7 @@ LogicalResult transform::InterchangeOp::verify() {
 //===----------------------------------------------------------------------===//
 
 DiagnosedSilenceableFailure transform::LowerPackOp::applyToOne(
-    transform::TransformRewriter &rewriter, tensor::PackOp target,
+    transform::TransformRewriter &rewriter, linalg::PackOp target,
     transform::ApplyToEachResultList &transformResults,
     transform::TransformState &state) {
   rewriter.setInsertionPoint(target);
@@ -1192,7 +1202,7 @@ DiagnosedSilenceableFailure transform::LowerPackOp::applyToOne(
 //===----------------------------------------------------------------------===//
 
 DiagnosedSilenceableFailure transform::LowerUnPackOp::applyToOne(
-    transform::TransformRewriter &rewriter, tensor::UnPackOp target,
+    transform::TransformRewriter &rewriter, linalg::UnPackOp target,
     transform::ApplyToEachResultList &transformResults,
     transform::TransformState &state) {
   rewriter.setInsertionPoint(target);
@@ -1622,7 +1632,7 @@ bool isValidPackingPermutation(
     RelayoutOpTy op, ArrayRef<int64_t> permutation,
     OuterOrInnerPerm outerOrInnerPerm = OuterOrInnerPerm::Outer) {
   static_assert(
-      llvm::is_one_of<RelayoutOpTy, tensor::PackOp, tensor::UnPackOp>::value,
+      llvm::is_one_of<RelayoutOpTy, linalg::PackOp, linalg::UnPackOp>::value,
       "applies to only pack or unpack operations");
   if (!op || permutation.empty())
     return true;
@@ -1631,7 +1641,7 @@ bool isValidPackingPermutation(
     return permutation.size() == innerRank && isPermutationVector(permutation);
   // op.getOuterDimsPerm() may be empty, in which case it is identity.
   // Don't rely on it.
-  if (std::is_same<RelayoutOpTy, tensor::PackOp>::value) {
+  if (std::is_same<RelayoutOpTy, linalg::PackOp>::value) {
     return permutation.size() == op.getSourceRank() &&
            isPermutationVector(permutation);
   }
@@ -1665,11 +1675,11 @@ transform::PackTransposeOp::apply(transform::TransformRewriter &rewriter,
   }
 
   // Step 2.2. Fail on wrong type.
-  auto packOp = dyn_cast<tensor::PackOp>(*packOrUnpackOps.begin());
-  auto unPackOp = dyn_cast<tensor::UnPackOp>(*packOrUnpackOps.begin());
+  auto packOp = dyn_cast<linalg::PackOp>(*packOrUnpackOps.begin());
+  auto unPackOp = dyn_cast<linalg::UnPackOp>(*packOrUnpackOps.begin());
   if ((!packOp && !unPackOp)) {
     return emitSilenceableError() << "requires target to map to a "
-                                     "tensor.pack or tensor.unpack";
+                                     "linalg.pack or linalg.unpack";
   }
   LinalgOp linalgOpTarget = dyn_cast<LinalgOp>(*linalgOps.begin());
   if (!linalgOpTarget)
@@ -1694,7 +1704,7 @@ transform::PackTransposeOp::apply(transform::TransformRewriter &rewriter,
     assert(!packOp && "packOp must be null on entry when unPackOp is not null");
     OpOperand *packUse = linalgOp.getDpsInitOperand(
         cast<OpResult>(unPackOp.getSource()).getResultNumber());
-    packOp = dyn_cast_or_null<tensor::PackOp>(packUse->get().getDefiningOp());
+    packOp = dyn_cast_or_null<linalg::PackOp>(packUse->get().getDefiningOp());
     if (!packOp || !packOp.getResult().hasOneUse())
       return emitSilenceableError() << "could not find matching pack op";
   }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp b/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
index 7f9a0f7a6ca43..81842e4bea631 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
@@ -88,7 +88,7 @@ static bool validateFullTilesOnDims(linalg::LinalgOp linalgOp,
 /// Return failure or packed matmul with one of its operands transposed.
 static FailureOr<PackTransposeResult>
 transposePackedMatmul(RewriterBase &rewriter, linalg::LinalgOp linalgOp,
-                      tensor::PackOp packOp, AffineMap operandMap,
+                      linalg::PackOp packOp, AffineMap operandMap,
                       ArrayRef<unsigned> blocksStartDimPos,
                       bool transposeOuterBlocks, bool transposeInnerBlocks) {
   assert(operandMap.getNumDims() >= 4 &&
diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
index 3594b08413812..d18b6f8afc43b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@@ -26,6 +26,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
   MeshShardingInterfaceImpl.cpp
   NamedOpConversions.cpp
   BlockPackMatmul.cpp
+  PackAndUnpackPatterns.cpp
   Padding.cpp
   Promotion.cpp
   RuntimeOpVerification.cpp
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
index d79399b6588be..d826f72afa1c1 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
@@ -61,7 +61,7 @@ template <typename OpTy>
 static FailureOr<PackInfo>
 getPackingInfoFromOperand(OpOperand *opOperand, linalg::GenericOp genericOp,
                           OpTy packOrUnPackOp) {
-  static_assert(llvm::is_one_of<OpTy, tensor::PackOp, tensor::UnPackOp>::value,
+  static_assert(llvm::is_one_of<OpTy, linalg::PackOp, linalg::UnPackOp>::value,
                 "applies to only pack or unpack operations");
   LLVM_DEBUG(
       { llvm::dbgs() << "--- Construct PackInfo From an operand ---\n"; });
@@ -210,7 +210,7 @@ static SmallVector<int64_t> computeOuterDims(ArrayRef<int64_t> perm,
 ///      %4 = arith.addf %arg3, %arg4 : f32
 ///      linalg.yield %4 : f32
 ///  } -> tensor<?x?xf32>
-///  %1 = tensor.pack %0
+///  %1 = linalg.pack %0
 ///    inner_dims_pos = [0, 1]
 ///    inner_tiles = [8, 2]
 ///    into %dest : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
@@ -219,7 +219,7 @@ static SmallVector<int64_t> computeOuterDims(ArrayRef<int64_t> perm,
 ///  8. Thus, the below operation and `affine_map<(d0, d1, d2, d3)> ->
 ///  affine_map<(d1, d3)>` will be returned.
 ///
-///  %pack = tensor.pack %arg0
+///  %pack = linalg.pack %arg0
 ///    inner_dims_pos = [0]
 ///    inner_tiles = [8]
 ///    into %init : tensor<?xf32> -> tensor<?x8xf32>
@@ -290,9 +290,9 @@ getOrCreatePackedViewOfOperand(OpBuilder &b, Location loc, PackInfo packInfo,
   if (innerDimsPos.empty() && outerDimsPerm.empty())
     return std::make_tuple(opOperand->get(), indexingMap);
 
-  auto empty = tensor::PackOp::createDestinationTensor(
+  auto empty = linalg::PackOp::createDestinationTensor(
       b, loc, opOperand->get(), innerTileSizes, innerDimsPos, outerDimsPerm);
-  auto packedOperand = b.create<tensor::PackOp>(
+  auto packedOperand = b.create<linalg::PackOp>(
       loc, opOperand->get(), empty, innerDimsPos, innerTileSizes,
       /*padding=*/std::nullopt, outerDimsPerm);
   return std::make_tuple(packedOperand, indexingMap);
@@ -327,7 +327,7 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp,
   return newGenericOp;
 }
 
-/// Bubbles up tensor.pack op through a producer generic op. This
+/// Bubbles up linalg.pack op through a producer generic op. This
 /// swap pack(generic) to generic(pack). The new generic op works on packed
 /// domain; pack ops are created for input and output operands. E.g.,
 ///
@@ -343,7 +343,7 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp,
 ///         %4 = arith.addf %arg3, %arg3 : f32
 ///         linalg.yield %4 : f32
 ///     } -> tensor<?x?xf32>
-///     %4 = tensor.pack %3
+///     %4 = linalg.pack %3
 ///       inner_dims_pos = [0, 1]
 ///       inner_tiles = [8, 2]
 ///       into %dest : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
@@ -358,7 +358,7 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp,
 ///     %0 = affine.apply #map()[%dim]
 ///     %1 = affine.apply #map1()[%dim_0]
 ///     %2 = tensor.empty(%0, %1) : tensor<?x?x8x2xf32>
-///     %pack = tensor.pack %arg0
+///     %pack = linalg.pack %arg0
 ///       inner_dims_pos = [0, 1]
 ///       inner_tiles = [8, 2]
 ///       into %2 : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
@@ -371,7 +371,7 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp,
 ///       linalg.yield %4 : f32
 ///     } -> tensor<?x?x8x2xf32>
 static FailureOr<GenericOp>
-bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, tensor::PackOp packOp,
+bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, linalg::PackOp packOp,
                                const ControlPropagationFn &controlFn) {
   auto genericOp = packOp.getSource().getDefiningOp<GenericOp>();
   if (!genericOp)
@@ -404,11 +404,11 @@ bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, tensor::PackOp packOp,
   rewriter.setInsertionPoint(genericOp);
 
   // We need to handle two cases:
-  // 1) The tensor.pack destination is a tensor.empty. If this is the case, we
+  // 1) The linalg.pack destination is a tensor.empty. If this is the case, we
   // create a new tensor.empty to avoid breaking dominance, as we are moving the
-  // tensor.pack above the linalg.generic.
+  // linalg.pack above the linalg.generic.
   // 2) The destination is not a tensor.empty. In this case we can replace only
-  // if the destination of the tensor.pack dominates the linalg.generic.
+  // if the destination of the linalg.pack dominates the linalg.generic.
   Value packOpDest = packOp.getDest();
   if (!packOpDest.hasOneUse())
     return failure();
@@ -453,13 +453,13 @@ bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, tensor::PackOp packOp,
 
 /// Wrapper pattern that applies bubbleUpPackOpThroughGenericOp method.
 struct BubbleUpPackOpThroughGenericOpPattern
-    : public OpRewritePattern<tensor::PackOp> {
+    : public OpRewritePattern<linalg::PackOp> {
 public:
   BubbleUpPackOpThroughGenericOpPattern(MLIRContext *context,
                                         ControlPropagationFn fun)
-      : OpRewritePattern<tensor::PackOp>(context), controlFn(std::move(fun)) {}
+      : OpRewritePattern<linalg::PackOp>(context), controlFn(std::move(fun)) {}
 
-  LogicalResult matchAndRewrite(tensor::PackOp packOp,
+  LogicalResult matchAndRewrite(linalg::PackOp packOp,
                                 PatternRewriter &rewriter) const override {
     auto genericOp =
         bubbleUpPackOpThroughGenericOp(rewriter, packOp, controlFn);
@@ -473,15 +473,15 @@ struct BubbleUpPackOpThroughGenericOpPattern
   ControlPropagationFn controlFn;
 };
 
-/// Propagate a tensor.pack operation up through a tensor.pad. The idea is to
+/// Propagate a linalg.pack operation up through a tensor.pad. The idea is to
 /// add as many zero padding dimensions in `high` and `low` based on the number
 /// of point loops.
-class BubbleUpPackThroughPadOp final : public OpRewritePattern<tensor::PackOp> {
+class BubbleUpPackThroughPadOp final : public OpRewritePattern<linalg::PackOp> {
 public:
   BubbleUpPackThroughPadOp(MLIRContext *context, ControlPropagationFn fun)
-      : OpRewritePattern<tensor::PackOp>(context), controlFn(std::move(fun)) {}
+      : OpRewritePattern<linalg::PackOp>(context), controlFn(std::move(fun)) {}
 
-  LogicalResult matchAndRewrite(tensor::PackOp packOp,
+  LogicalResult matchAndRewrite(linalg::PackOp packOp,
                                 PatternRewriter &rewriter) const override {
     auto padOp = packOp.getSource().getDefiningOp<tensor::PadOp>();
     if (!padOp)
@@ -522,10 +522,10 @@ class BubbleUpPackThroughPadOp final : public OpRewritePattern<tensor::PackOp> {
 
     ArrayRef<int64_t> outerDimsPerm = packOp.getOuterDimsPerm();
     SmallVector<OpFoldResult> mixedTiles = packOp.getMixedTiles();
-    auto empty = tensor::PackOp::createDestinationTensor(
+    auto empty = linalg::PackOp::createDestinationTensor(
         rewriter, loc, padOp.getSource(), mixedTiles, innerDimsPos,
         outerDimsPerm);
-    auto sourcePack = rewriter.create<tensor::PackOp>(
+    auto sourcePack = rewriter.create<linalg::PackOp>(
         loc, padOp.getSource(), empty, innerDimsPos, mixedTiles,
         /*padding=*/std::nullopt, outerDimsPerm);
 
@@ -549,9 +549,9 @@ class BubbleUpPackThroughPadOp final : public OpRewritePattern<tensor::PackOp> {
     // If the pad has more than one user, create an unpack on the new pad to
     // replace the other uses.
     if (!padOp->hasOneUse()) {
-      auto unpackEmpty = tensor::UnPackOp::createDestinationTensor(
+      auto unpackEmpty = linalg::UnPackOp::createDestinationTensor(
           rewriter, loc, newPadOp, mixedTiles, innerDimsPos, outerDimsPerm);
-      Value unpackedPad = rewriter.create<tensor::UnPackOp>(
+      Value unpackedPad = rewriter.create<linalg::UnPackOp>(
           loc, newPadOp, unpackEmpty, innerDimsPos, mixedTiles, outerDimsPerm);
       rewriter.replaceAllUsesExcept(padOp, unpackedPad, sourcePack);
     }
@@ -636,20 +636,20 @@ static int64_t applyPermutationAndReindexReassoc(
 ///
 /// %collapsed = tensor.collapse_shape %in [[0, 1], 2]
 ///     : tensor<?x16x4xf32> into tensor<?x4xf32>
-/// %pack = tensor.pack %collapsed outer_dims_perm = [0, 1]
+/// %pack = linalg.pack %collapsed outer_dims_perm = [0, 1]
 ///     inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %empty
 ///     : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
 ///
 /// can be transformed into:
 ///
-/// %pack = tensor.pack %in outer_dims_perm = [1, 2]
+/// %pack = linalg.pack %in outer_dims_perm = [1, 2]
 ///     inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %empty
 ///     : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
 /// %collapsed = tensor.collapse_shape %pack [[0, 1], 2, 3, 4]
 ///     : tensor<?x2x4x8x1xf32> into tensor<?x4x8x1>
 static LogicalResult
 bubbleUpPackOpThroughCollapseShape(tensor::CollapseShapeOp collapseOp,
-                                   tensor::PackOp packOp,
+                                   linalg::PackOp packOp,
                                    PatternRewriter &rewriter) {
   SmallVector<int64_t> innerTileSizes = packOp.getStaticTiles();
   ArrayRef<int64_t> innerDimsPos = packOp.getInnerDimsPos();
@@ -682,10 +682,10 @@ bubbleUpPackOpThroughCollapseShape(tensor::CollapseShapeOp collapseOp,
                             reassocIndices[outerPos].end());
   }
 
-  auto emptyOp = tensor::PackOp::createDestinationTensor(
+  auto emptyOp = linalg::PackOp::createDestinationTensor(
       rewriter, packOp.getLoc(), collapseOp.getSrc(), packOp.getMixedTiles(),
       projectedInnerDimsPos, newOuterDimsPerm);
-  auto newPackOp = rewriter.create<tensor::PackOp>(
+  auto newPackOp = rewriter.create<linalg::PackOp>(
       packOp.getLoc(), collapseOp.getSrc(), emptyOp, projectedInnerDimsPos,
       packOp.getMixedTiles(), packOp.getPaddingValue(), newOuterDimsPerm);
 
@@ -742,20 +742,20 @@ projectDimsPosIntoReassocPos(ArrayRef<int64_t> dimsPos,
 ///
 /// %expand = tensor.expand_shape %in [[0], [1, 2]]
 ///     : tensor<?x64xf32> into tensor<?x4x16xf32>
-/// %pack = tensor.pack %expand outer_dims_perm = [0, 1]
+/// %pack = linalg.pack %expand outer_dims_perm = [0, 1]
 ///     inner_dims_pos = [2] inner_tiles = [8] into %empty
 ///     : tensor<?x4x16xf32> -> tensor<?x4x2x8xf32>
 ///
 /// can be transformed into:
 ///
-/// %pack = tensor.pack %in outer_dims_perm = [1, 2]
+/// %pack = linalg.pack %in outer_dims_perm = [1, 2]
 ///     inner_dims_pos = [1] inner_tiles = [8] into %empty
 ///     : tensor<?x64xf32> -> tensor<?x8x8xf32>
 /// %expand = tensor.expand_shape %pack [[0], [1, 2], [3]]
 ///     : tensor<?x8x8xf32> into tensor<?x4x2x8xf32>
 static LogicalResult
 bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp,
-                                 tensor::PackOp packOp,
+                                 linalg::PackOp packOp,
                                  PatternRewriter &rewriter) {
   // Outer dimensions permutation is not supported currently.
   // TODO: Handle outer_dims_perm variants.
@@ -808,7 +808,7 @@ bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp,
   // If reassociation is not possible, then reordering cannot happen.
   // This can be caused by pack padding affecting previously expanded
   // dimensions or packing extending dimensions.
-  RankedTensorType newPackType = tensor::PackOp::inferPackedType(
+  RankedTensorType newPackType = linalg::PackOp::inferPackedType(
       expandOp.getSrcType(), packOp.getStaticInnerTiles(),
       projectedInnerDimsPos, /*outerDimsPerm=*/SmallVector<int64_t>{});
   auto reassocExpand =
@@ -817,10 +817,10 @@ bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp,
     return rewriter.notifyMatchFailure(
         packOp, "could not reassociate dims after bubbling up");
 
-  Value destTensor = tensor::PackOp::createDestinationTensor(
+  Value destTensor = linalg::PackOp::createDestinationTensor(
       rewriter, packOp.getLoc(), expandOp.getSrc(), packOp.getMixedTiles(),
       projectedInnerDimsPos, /*outerDimsPerm=*/SmallVector<int64_t>{});
-  Value packedVal = rewriter.create<tensor::PackOp>(
+  Value packedVal = rewriter.create<linalg::PackOp>(
       packOp.getLoc(), expandOp.getSrc(), destTensor, projectedInnerDimsPos,
       packOp.getMixedTiles(), packOp.getPaddingValue(),
       /*outerDimsPerm=*/SmallVector<int64_t>{});
@@ -833,12 +833,12 @@ bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp,
 }
 
 class BubbleUpPackOpThroughReshapeOp final
-    : public OpRewritePattern<tensor::PackOp> {
+    : public OpRewritePattern<linalg::PackOp> {
 public:
   BubbleUpPackOpThroughReshapeOp(MLIRContext *context, ControlPropagationFn fun)
-      : OpRewritePattern<tensor::PackOp>(context), controlFn(std::move(fun)) {}
+      : OpRewritePattern<linalg::PackOp>(context), controlFn(std::move(fun)) {}
 
-  LogicalResult matchAndRewrite(tensor::PackOp packOp,
+  LogicalResult matchAndRewrite(linalg::PackOp packOp,
                                 PatternRewriter &rewriter) const override {
     Operation *srcOp = packOp.getSource().getDefiningOp();
     // Currently only support when the pack op is the only user.
@@ -877,7 +877,7 @@ class BubbleUpPackOpThroughReshapeOp final
 ///
 /// For example:
 ///
-/// %unpack = tensor.unpack %in outer_dims_perm = [0, 1]
+/// %unpack = linalg.unpack %in outer_dims_perm = [0, 1]
 ///     inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %empty
 ///     : tensor<?x32x8x8xf32> -> tensor<?x256xf32>
 /// %expanded = tensor.expand_shape %unpack [[0, 1], [2]]
@@ -887,11 +887,11 @@ class BubbleUpPackOpThroughReshapeOp final
 ///
 /// %expanded = tensor.expand_shape %ain [[0, 1], [2], [3], [4]]
 ///     : tensor<?x32x8x8xf32> into tensor<?x32x32x8x8xf32>
-/// %unpack = tensor.unpack %expanded outer_dims_perm = [0, 1, 2]
+/// %unpack = linalg.unpack %expanded outer_dims_perm = [0, 1, 2]
 ///     inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %empty
 ///     : tensor<?x32x32x8x8xf32> -> tensor<?x256x256xf32>
 static LogicalResult pushDownUnPackOpThroughExpandShape(
-    tensor::UnPackOp unPackOp, tensor::ExpandShapeOp expandOp,
+    linalg::UnPackOp unPackOp, tensor::ExpandShapeOp expandOp,
     PatternRewriter &rewriter, ControlPropagationFn controlFn) {
   // User controlled propagation function.
   if (!controlFn(&expandOp.getSrcMutable()))
@@ -943,16 +943,16 @@ static LogicalResult pushDownUnPackOpThroughExpandShape(
     nextPos += 1;
   }
 
-  RankedTensorType newExpandType = tensor::PackOp::inferPackedType(
+  RankedTensorType newExpandType = linalg::PackOp::inferPackedType(
       expandTy, innerTileSizes, projectedInnerDimsPos, newOuterDimsPerm);
   auto newExpandOp = rewriter.create<tensor::ExpandShapeOp>(
       expandOp.getLoc(), newExpandType, unPackOp.getSource(),
       newReassocIndices);
 
-  auto emptyOp = tensor::UnPackOp::createDestinationTensor(
+  auto emptyOp = linalg::UnPackOp::createDestinationTensor(
       rewriter, unPackOp.getLoc(), newExpandOp, unPackOp.getMixedTiles(),
       projectedInnerDimsPos, newOuterDimsPerm);
-  auto newUnPackOp = rewriter.create<tensor::UnPackOp>(
+  auto newUnPackOp = rewriter.create<linalg::UnPackOp>(
       unPackOp.getLoc(), newExpandOp.getResult(), emptyOp,
       projectedInnerDimsPos, unPackOp.getMixedTiles(), newOuterDimsPerm);
   rewriter.replaceOp(expandOp, newUnPackOp);
@@ -961,14 +961,14 @@ static LogicalResult pushDownUnPackOpThroughExpandShape(
 }
 
 class PushDownUnPackOpThroughReshapeOp final
-    : public OpRewritePattern<tensor::UnPackOp> {
+    : public OpRewritePattern<linalg::UnPackOp> {
 public:
   PushDownUnPackOpThroughReshapeOp(MLIRContext *context,
                                    ControlPropagationFn fun)
-      : OpRewritePattern<tensor::UnPackOp>(context), controlFn(std::move(fun)) {
+      : OpRewritePattern<linalg::UnPackOp>(context), controlFn(std::move(fun)) {
   }
 
-  LogicalResult matchAndRewrite(tensor::UnPackOp unPackOp,
+  LogicalResult matchAndRewrite(linalg::UnPackOp unPackOp,
                                 PatternRewriter &rewriter) const override {
     Value result = unPackOp.getResult();
     // Currently only support unpack op with the single user.
@@ -1001,7 +1001,7 @@ class PushDownUnPackOpThroughReshapeOp final
 static FailureOr<OpOperand *> getUnPackedOperand(GenericOp genericOp) {
   OpOperand *unPackedOperand = nullptr;
   for (OpOperand &operand : genericOp->getOpOperands()) {
-    auto unPackOp = operand.get().getDefiningOp<tensor::UnPackOp>();
+    auto unPackOp = operand.get().getDefiningOp<linalg::UnPackOp>();
     if (!unPackOp)
       continue;
     if (unPackedOperand)
@@ -1013,9 +1013,9 @@ static FailureOr<OpOperand *> getUnPackedOperand(GenericOp genericOp) {
   return unPackedOperand;
 }
 
-/// Push down a tensor.unpack op through a generic op.
+/// Push down a linalg.unpack op through a generic op.
 /// The new generic op works on packed domain; pack ops are created for input
-/// and output operands. A tensor.unpack op is inserted right after the packed
+/// and output operands. A linalg.unpack op is inserted right after the packed
 /// generic. E.g.
 ///
 /// #map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
@@ -1023,7 +1023,7 @@ static FailureOr<OpOperand *> getUnPackedOperand(GenericOp genericOp) {
 /// %arg0 = tensor<12x2x56x56x32xf32> // packed arg.
 ///
 /// %0 = tensor.empty() : tensor<12x56x56x64xf32>
-/// %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2]
+/// %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2]
 ///                          inner_dims_pos = [3] inner_tiles = [32] into %0
 /// %2 = linalg.generic {indexing_maps = [#map],
 ///      iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
@@ -1044,7 +1044,7 @@ static FailureOr<OpOperand *> getUnPackedOperand(GenericOp genericOp) {
 ///      ^bb0(%out : f32):
 ///         linalg.yield %out : f32
 ///      } -> tensor<12x2x56x56x32xf32>
-/// %2 = tensor.unpack %1 outer_dims_perm = [0, 3, 1, 2]
+/// %2 = linalg.unpack %1 outer_dims_perm = [0, 3, 1, 2]
 ///                       inner_dims_pos = [3] inner_tiles = [32] into %0
 ///
 static FailureOr<std::tuple<GenericOp, Value>>
@@ -1063,8 +1063,8 @@ pushDownUnPackOpThroughGenericOp(RewriterBase &rewriter, GenericOp genericOp,
   OpOperand *unPackedOperand = *(maybeUnPackedOperand);
 
   // Extract packing information.
-  tensor::UnPackOp producerUnPackOp =
-      unPackedOperand->get().getDefiningOp<tensor::UnPackOp>();
+  linalg::UnPackOp producerUnPackOp =
+      unPackedOperand->get().getDefiningOp<linalg::UnPackOp>();
   assert(producerUnPackOp && "expect a valid UnPackOp");
 
   if (!controlFn(unPackedOperand))
@@ -1079,7 +1079,7 @@ pushDownUnPackOpThroughGenericOp(RewriterBase &rewriter, GenericOp genericOp,
   auto [packedOutOperand, packedOutIndexingMap] =
       getOrCreatePackedViewOfOperand(rewriter, genericOp.getLoc(), *packInfo,
                                      genericOp, genericOp.getDpsInitOperand(0));
-  auto destPack = packedOutOperand.getDefiningOp<tensor::PackOp>();
+  auto destPack = packedOutOperand.getDefiningOp<linalg::PackOp>();
 
   // If the dps init operand of the generic is a tensor.empty, do not pack it
   // and forward the new tensor.empty as a destination.
@@ -1108,7 +1108,7 @@ pushDownUnPackOpThroughGenericOp(RewriterBase &rewriter, GenericOp genericOp,
   // Insert an unPackOp right after the packed generic.
   Value unPackOpRes =
       rewriter
-          .create<tensor::UnPackOp>(genericOp.getLoc(), newResult,
+          .create<linalg::UnPackOp>(genericOp.getLoc(), newResult,
                                     destPack.getSource(), innerDimsPos,
                                     mixedTiles, outerDimsPerm)
           .getResult();
@@ -1137,7 +1137,7 @@ struct PushDownUnPackOpThroughGenericOp : public OpRewritePattern<GenericOp> {
   ControlPropagationFn controlFn;
 };
 
-/// Propagate a tensor.unpack operation through a tensor.pad. The idea is to
+/// Propagate a linalg.unpack operation through a tensor.pad. The idea is to
 /// add as many zero padding dimensions in `high` and `low` based on the number
 /// of point loops.
 struct PushDownUnPackThroughPadOp : public OpRewritePattern<tensor::PadOp> {
@@ -1146,8 +1146,8 @@ struct PushDownUnPackThroughPadOp : public OpRewritePattern<tensor::PadOp> {
 
   LogicalResult matchAndRewrite(tensor::PadOp padOp,
                                 PatternRewriter &rewriter) const override {
-    tensor::UnPackOp unpackOp =
-        padOp.getSource().getDefiningOp<tensor::UnPackOp>();
+    linalg::UnPackOp unpackOp =
+        padOp.getSource().getDefiningOp<linalg::UnPackOp>();
     if (!unpackOp)
       return failure();
 
@@ -1185,12 +1185,12 @@ struct PushDownUnPackThroughPadOp : public OpRewritePattern<tensor::PadOp> {
         loc, /*result=*/Type(), unpackOp.getSource(), lowPad, highPad,
         paddingVal, padOp.getNofold());
 
-    // Inject the tensor.unpack right after the packed padOp.
+    // Inject the linalg.unpack right after the packed padOp.
     Value outputUnPack = rewriter.create<tensor::EmptyOp>(
         loc, padOp.getResultType().getShape(),
         padOp.getResultType().getElementType());
 
-    Value replacement = rewriter.create<tensor::UnPackOp>(
+    Value replacement = rewriter.create<linalg::UnPackOp>(
         loc, newPadOp.getResult(), outputUnPack, innerDimsPos,
         unpackOp.getMixedTiles(), outerDimsPerm);
     rewriter.replaceOp(padOp, replacement);
diff --git a/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp b/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp
similarity index 90%
rename from mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp
rename to mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp
index 3566714c6529e..0984b6988b93b 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/PackAndUnpackPatterns.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp
@@ -13,7 +13,7 @@
 #include "mlir/IR/PatternMatch.h"
 
 namespace mlir {
-namespace tensor {
+namespace linalg {
 namespace {
 
 /// Returns the number of shape sizes that is either dynamic or greater than 1.
@@ -201,7 +201,7 @@ struct FoldPadWithPackOp : public OpRewritePattern<PackOp> {
 
   LogicalResult matchAndRewrite(PackOp packOp,
                                 PatternRewriter &rewriter) const override {
-    auto padOp = packOp.getSource().getDefiningOp<PadOp>();
+    auto padOp = packOp.getSource().getDefiningOp<tensor::PadOp>();
 
     if (!padOp || padOp.getNofold() || !padOp.hasZeroLowPad())
       return failure();
@@ -224,10 +224,11 @@ struct FoldPadWithPackOp : public OpRewritePattern<PackOp> {
 
 /// Fold a `unpack` -> `extract_slice` into the `unpack` since it already
 /// has extract_slice semantics.
-struct FoldUnpackWithExtractSliceOp : public OpRewritePattern<ExtractSliceOp> {
-  using OpRewritePattern<ExtractSliceOp>::OpRewritePattern;
+struct FoldUnpackWithExtractSliceOp
+    : public OpRewritePattern<tensor::ExtractSliceOp> {
+  using OpRewritePattern<tensor::ExtractSliceOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(ExtractSliceOp sliceOp,
+  LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,
                                 PatternRewriter &rewriter) const override {
     auto unpackOp = sliceOp.getSource().getDefiningOp<UnPackOp>();
     if (!unpackOp)
@@ -247,7 +248,7 @@ struct FoldUnpackWithExtractSliceOp : public OpRewritePattern<ExtractSliceOp> {
 
     // Create a new empty output tensor.
     Type elementType = unpackOp.getDestType().getElementType();
-    Value output = rewriter.create<EmptyOp>(
+    Value output = rewriter.create<tensor::EmptyOp>(
         sliceOp.getLoc(), sliceOp.getMixedSizes(), elementType);
     rewriter.replaceOpWithNewOp<UnPackOp>(
         sliceOp, unpackOp.getSource(), output, unpackOp.getInnerDimsPos(),
@@ -474,6 +475,50 @@ struct FoldConsumerUnPackWithProducerLinalgTransposeOp
     return success();
   }
 };
+
+/// tensor.empty does not define any tensor contents, so an unpadded pack
+/// can be folded away.
+struct FoldEmptyTensorWithPackOp : public OpRewritePattern<PackOp> {
+  using OpRewritePattern<PackOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(PackOp packOp,
+                                PatternRewriter &rewriter) const override {
+    // Check for tensor.empty source.
+    auto emptyOp = packOp.getSource().getDefiningOp<tensor::EmptyOp>();
+    if (!emptyOp)
+      return failure();
+
+    // Check for padding.
+    // Packing with padding cannot be simply removed.
+    if (packOp.getPaddingValue())
+      return rewriter.notifyMatchFailure(packOp, "expects no padding value");
+
+    // Replace the pack directly with its destination.
+    rewriter.replaceOp(packOp, packOp.getDest());
+
+    return success();
+  }
+};
+
+/// tensor.empty does not define any tensor contents, so an unpack
+/// can be folded away.
+struct FoldEmptyTensorWithUnPackOp : public OpRewritePattern<UnPackOp> {
+  using OpRewritePattern<UnPackOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(UnPackOp unPackOp,
+                                PatternRewriter &rewriter) const override {
+    // Check for tensor.empty source.
+    auto emptyOp = unPackOp.getSource().getDefiningOp<tensor::EmptyOp>();
+    if (!emptyOp)
+      return failure();
+
+    // Replace the unpack directly with its destination.
+    rewriter.replaceOp(unPackOp, unPackOp.getDest());
+
+    return success();
+  }
+};
+
 } // namespace
 
 void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns) {
@@ -490,5 +535,11 @@ void populateSimplifyPackAndUnpackPatterns(RewritePatternSet &patterns) {
       patterns.getContext());
 }
 
-} // namespace tensor
+void populateFoldPackUnpackIntoTensorEmptyPatterns(
+    RewritePatternSet &patterns) {
+  patterns.add<FoldEmptyTensorWithPackOp, FoldEmptyTensorWithUnPackOp>(
+      patterns.getContext());
+}
+
+} // namespace linalg
 } // namespace mlir
diff --git a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp
index b7764da26a7f4..faa7bbf9d168a 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp
@@ -10,14 +10,17 @@
 
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Arith/Utils/Utils.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/Interfaces/TilingInterface.h"
+#include "mlir/Interfaces/ValueBoundsOpInterface.h"
 #include <optional>
 
 using namespace mlir;
@@ -563,6 +566,648 @@ struct LinalgOpPartialReductionInterface
   }
 };
 
+template <typename OpTy>
+static SmallVector<Range> getPackUnPackIterationDomain(OpTy op,
+                                                       OpBuilder &builder) {
+  static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
+                "applies to only pack or unpack operations");
+  OpBuilder::InsertionGuard g(builder);
+  int64_t rank = (std::is_same<OpTy, PackOp>::value) ? op.getSourceRank()
+                                                     : op.getDestRank();
+  OpFoldResult zero = builder.getIndexAttr(0);
+  OpFoldResult one = builder.getIndexAttr(1);
+  ReifiedRankedShapedTypeDims resultShape;
+  (void)reifyResultShapes(builder, op, resultShape);
+  SmallVector<Range> loopBounds(rank);
+  for (auto dim : llvm::seq<int64_t>(0, rank)) {
+    loopBounds[dim].offset = zero;
+    loopBounds[dim].stride = one;
+    loopBounds[dim].size = resultShape[0][dim];
+  }
+  return loopBounds;
+}
+
+static void applyPermToRange(SmallVector<OpFoldResult> &offsets,
+                             SmallVector<OpFoldResult> &sizes,
+                             ArrayRef<int64_t> permutation) {
+  if (permutation.empty())
+    return;
+  applyPermutationToVector<OpFoldResult>(offsets, permutation);
+  applyPermutationToVector<OpFoldResult>(sizes, permutation);
+}
+
+struct PackOpTiling
+    : public TilingInterface::ExternalModel<PackOpTiling, linalg::PackOp> {
+
+  SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
+    // Note that here we only consider untiled dimensions and outer tiled data
+    // dimensions, the inner tiled data dimensions are materialized when
+    // building the body of the operation.
+    auto packOp = cast<PackOp>(op);
+    SmallVector<utils::IteratorType> iteratorTypes(
+        packOp.getSourceRank(), utils::IteratorType::parallel);
+    return iteratorTypes;
+  }
+
+  SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
+    return getPackUnPackIterationDomain<PackOp>(cast<PackOp>(op), b);
+  }
+
+  FailureOr<TilingResult>
+  getTiledImplementation(Operation *op, OpBuilder &b,
+                         ArrayRef<OpFoldResult> offsets,
+                         ArrayRef<OpFoldResult> sizes) const {
+    auto packOp = cast<PackOp>(op);
+    Location loc = packOp.getLoc();
+
+    // The tiling is applied on interchanged dimensions. We have to undo the
+    // interchange to map sizes and offsets to the original input.
+    int64_t inputRank = packOp.getSourceRank();
+    SmallVector<OpFoldResult> origOffsets(offsets);
+    SmallVector<OpFoldResult> origSizes(sizes);
+    applyPermToRange(origOffsets, origSizes,
+                     invertPermutationVector(packOp.getOuterDimsPerm()));
+
+    DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
+        packOp.getDimAndTileMapping();
+    SmallVector<OpFoldResult> srcDimValues =
+        tensor::getMixedSizes(b, loc, packOp.getSource());
+    SmallVector<OpFoldResult> inputIndices, inputSizes;
+    for (auto dim : llvm::seq<int64_t>(0, inputRank)) {
+      using AV = affine::AffineValueExpr;
+      affine::AffineBuilder ab(b, loc);
+      AffineExpr dim0, dim1, sym;
+      bindDims(b.getContext(), dim0, dim1);
+      bindSymbols(b.getContext(), sym);
+      if (dimAndTileMapping.count(dim)) {
+        // If the data dimension is tiled, the i-th index is the product of
+        // offset_i and tile_i, and the i-th size is the product of sizes_i and
+        // tile_i.
+        auto avOffset = AV(dim0).bind(origOffsets[dim]);
+        auto avSize = AV(dim0).bind(origSizes[dim]);
+        auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
+        inputIndices.push_back(ab.mul(avOffset, avTileSize));
+        inputSizes.push_back(ab.mul(avSize, avTileSize));
+      } else {
+        inputIndices.push_back(origOffsets[dim]);
+        inputSizes.push_back(origSizes[dim]);
+      }
+
+      // Limit the size of the input operand for incomplete tiles.
+      if (packOp.getPaddingValue()) {
+        OpFoldResult dimSize = srcDimValues[dim];
+        auto avDimSize = AV(dim0).bind(dimSize);
+        auto avInputIdx = AV(dim1).bind(inputIndices.back());
+        inputSizes.back() =
+            ab.min({inputSizes.back(), ab.sub(avDimSize, avInputIdx)});
+      }
+    }
+
+    auto oneAttr = b.getI64IntegerAttr(1);
+    SmallVector<OpFoldResult> strides(inputRank, oneAttr);
+
+    SmallVector<Value> tiledOperands;
+    auto sourceSlice = b.create<tensor::ExtractSliceOp>(
+        loc, packOp.getSource(), inputIndices, inputSizes, strides);
+    tiledOperands.push_back(sourceSlice);
+
+    SmallVector<OpFoldResult> outputOffsets, outputSizes;
+    if (failed(getResultTilePosition(op, b, 0, offsets, sizes, outputOffsets,
+                                     outputSizes)))
+      return {};
+
+    strides.append(packOp.getDestRank() - inputRank, oneAttr);
+    auto outSlice = b.create<tensor::ExtractSliceOp>(
+        loc, packOp.getDest(), outputOffsets, outputSizes, strides);
+    tiledOperands.push_back(outSlice);
+
+    if (auto val = packOp.getPaddingValue())
+      tiledOperands.push_back(val);
+    for (auto tile : packOp.getInnerTiles())
+      tiledOperands.push_back(tile);
+
+    Operation *tiledPackOp = b.create<PackOp>(
+        loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
+
+    return TilingResult{
+        {tiledPackOp},
+        SmallVector<Value>(tiledPackOp->getResults()),
+        llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
+  }
+
+  LogicalResult
+  getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
+                        ArrayRef<OpFoldResult> offsets,
+                        ArrayRef<OpFoldResult> sizes,
+                        SmallVector<OpFoldResult> &resultOffsets,
+                        SmallVector<OpFoldResult> &resultSizes) const {
+    // The iteration domain is over outer dimensions of packed layout. In this
+    // context, the outer dimensions of `resultOffsets` are `offsets`. The
+    // inner dimensions of `resultOffsets` are zeros because tiling is not
+    // applied to them.
+    auto packOp = cast<PackOp>(op);
+    int64_t inputRank = packOp.getSourceRank();
+    int64_t outputRank = packOp.getDestRank();
+    auto zeroAttr = b.getI64IntegerAttr(0);
+    resultOffsets.assign(offsets.begin(), offsets.end());
+    resultOffsets.append(outputRank - inputRank, zeroAttr);
+
+    ReifiedRankedShapedTypeDims outputShape;
+    (void)reifyResultShapes(b, packOp, outputShape);
+    resultSizes.assign(sizes.begin(), sizes.end());
+    for (auto dataTileDim : llvm::seq<unsigned>(inputRank, outputRank))
+      resultSizes.push_back(outputShape[0][dataTileDim]);
+
+    return success();
+  }
+
+  FailureOr<TilingResult>
+  generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
+                          ArrayRef<OpFoldResult> offsets,
+                          ArrayRef<OpFoldResult> sizes) const {
+    auto packOp = cast<PackOp>(op);
+    int64_t numTiles = packOp.getInnerDimsPos().size();
+
+    // tensor.pack op is fusible (as a producer) only if full inner tiles are
+    // iterated or inner dims are not tiled. Otherwise, it will generate a
+    // sequence of non-trivial ops (for partial tiles).
+    for (auto offset : offsets.take_back(numTiles))
+      if (!isConstantIntValue(offset, 0))
+        return failure();
+
+    for (auto iter :
+         llvm::zip_equal(packOp.getMixedTiles(), sizes.take_back(numTiles)))
+      if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
+        return failure();
+
+    FailureOr<TilingResult> tilingResult = getTiledImplementation(
+        op, b, offsets.drop_back(numTiles), sizes.drop_back(numTiles));
+    if (failed(tilingResult))
+      return failure();
+    return tilingResult.value();
+  }
+
+  /// Method to return the position of iteration domain tile computed by the
+  /// tiled operation. In current `tensor.pack` context, the `resultOffsets` and
+  /// `resultSizes` only cover outer dimensions.
+  LogicalResult getIterationDomainTileFromOperandTile(
+      Operation *op, OpBuilder &b, unsigned operandNumber,
+      ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes,
+      SmallVectorImpl<OpFoldResult> &resultOffsets,
+      SmallVectorImpl<OpFoldResult> &resultSizes) const {
+    if (operandNumber != 0)
+      return failure();
+
+    auto packOp = cast<PackOp>(op);
+    // It is not trivial to infer dest tile from source tile if `packOp` has
+    // padding semantic.
+    if (packOp.getPaddingValue())
+      return failure();
+
+    Location loc = packOp.getLoc();
+
+    SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
+    DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
+        packOp.getDimAndTileMapping();
+    for (auto dim : llvm::seq<int64_t>(packOp.getSourceRank())) {
+      if (dimAndTileMapping.count(dim)) {
+        FailureOr<int64_t> cstSize =
+            ValueBoundsConstraintSet::computeConstantBound(
+                presburger::BoundType::UB, sizes[dim],
+                /*stopCondition=*/nullptr, /*closedUB=*/true);
+        std::optional<int64_t> cstInnerSize =
+            getConstantIntValue(dimAndTileMapping[dim]);
+        // Currently fusing `packOp` as consumer only expects perfect tiling
+        // scenario because even if without padding semantic, the `packOp` may
+        // also yield incomplete tiles. E.g. tensor<30xf32> -> tensor<5x6xf32>,
+        // where the `tileSize` from operand of `packOp` is 5, which is not
+        // exactly divided by `innerTile`(=6) of `packOp`. As the result:
+        // 1. the first slice is extracted from (0) to (4) and inserted into
+        // (0,0)~(0,4) at first row.
+        // 2. the second slice is extracted from (5) to (9) and SHOULD BE
+        // respectively inserted into two rows with different length, including
+        // first row: (0,5) and second row (1,0)~(1,3). It is hard to coordinate
+        // them, thus adding below constraint to bypass them temporarily. In
+        // another word, we can only support tiling with consumer if the tile
+        // size for the producer is a multiple of the inner tile size for the
+        // packed dimensions at this moment.
+        if (failed(cstSize) || !cstInnerSize || *cstSize % *cstInnerSize != 0) {
+          return failure();
+        }
+
+        using AV = affine::AffineValueExpr;
+        affine::AffineBuilder ab(b, loc);
+        AffineExpr dim0, sym;
+        bindDims(b.getContext(), dim0);
+        bindSymbols(b.getContext(), sym);
+        auto avOffset = AV(dim0).bind(offsets[dim]);
+        auto avSize = AV(dim0).bind(sizes[dim]);
+        auto avTileSize = AV(sym).bind(dimAndTileMapping[dim]);
+        outerDimOffsets.push_back(ab.floor(avOffset, avTileSize));
+        outerDimSizes.push_back(ab.ceil(avSize, avTileSize));
+      } else {
+        outerDimOffsets.push_back(offsets[dim]);
+        outerDimSizes.push_back(sizes[dim]);
+      }
+    }
+    applyPermToRange(outerDimOffsets, outerDimSizes, packOp.getOuterDimsPerm());
+    resultOffsets = outerDimOffsets;
+    resultSizes = outerDimSizes;
+    return success();
+  }
+
+  /// Method to return the tiled implementation of tensor.pack as a consumer.
+  FailureOr<TilingResult> getTiledImplementationFromOperandTile(
+      Operation *op, OpBuilder &b, unsigned operandNumber,
+      ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes) const {
+    if (operandNumber != 0)
+      return failure();
+
+    auto packOp = cast<PackOp>(op);
+    Location loc = packOp.getLoc();
+
+    int64_t inputRank = packOp.getSourceRank();
+    auto oneAttr = b.getI64IntegerAttr(1);
+    SmallVector<OpFoldResult> strides(inputRank, oneAttr);
+
+    SmallVector<Value> tiledOperands;
+    auto sourceSlice = b.create<tensor::ExtractSliceOp>(
+        loc, packOp.getSource(), offsets, sizes, strides);
+    tiledOperands.push_back(sourceSlice);
+
+    SmallVector<OpFoldResult> outerDimOffsets, outerDimSizes;
+    if (failed(getIterationDomainTileFromOperandTile(
+            op, b, /*operandNumber=*/0, offsets, sizes, outerDimOffsets,
+            outerDimSizes)))
+      return failure();
+
+    SmallVector<OpFoldResult> outputOffsets, outputSizes;
+    if (failed(getResultTilePosition(op, b, 0, outerDimOffsets, outerDimSizes,
+                                     outputOffsets, outputSizes)))
+      return failure();
+
+    strides.append(packOp.getDestRank() - inputRank, oneAttr);
+    auto outSlice = b.create<tensor::ExtractSliceOp>(
+        loc, packOp.getDest(), outputOffsets, outputSizes, strides);
+    tiledOperands.push_back(outSlice);
+
+    assert(!packOp.getPaddingValue() && "Expect no padding semantic");
+    for (auto tile : packOp.getInnerTiles())
+      tiledOperands.push_back(tile);
+
+    Operation *tiledPackOp = b.create<PackOp>(
+        loc, TypeRange{outSlice.getType()}, tiledOperands, op->getAttrs());
+
+    return TilingResult{
+        {tiledPackOp},
+        SmallVector<Value>(tiledPackOp->getResults()),
+        llvm::to_vector(ArrayRef<Operation *>{sourceSlice, outSlice})};
+  }
+};
+
+struct UnpackTileDimInfo {
+  bool isAlignedToInnerTileSize;
+  OpFoldResult sourceOffset;
+  OpFoldResult sourceSize;
+  OpFoldResult resultOffset;
+  OpFoldResult destExpandedSize;
+};
+
+/// Returns the needed information for tiling unpack op on `tileDim` with given
+/// `tileOffset` and `tileSize`. For more details, see the comment of the
+/// `getTiledImplementation`.
+static UnpackTileDimInfo getUnpackTileDimInfo(OpBuilder &b, UnPackOp unpackOp,
+                                              int64_t tileDim,
+                                              OpFoldResult tileOffset,
+                                              OpFoldResult tileSize) {
+  UnpackTileDimInfo info;
+  Attribute zeroAttr = b.getIndexAttr(0);
+  Attribute oneAttr = b.getIndexAttr(1);
+  DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
+      unpackOp.getDimAndTileMapping();
+  // The dimension is not one of packed data dimension.
+  if (!dimAndTileMapping.count(tileDim)) {
+    info.isAlignedToInnerTileSize = true;
+    info.sourceOffset = tileOffset;
+    info.sourceSize = tileSize;
+    info.resultOffset = zeroAttr;
+    info.destExpandedSize = tileSize;
+    return info;
+  }
+
+  Location loc = unpackOp.getLoc();
+  using AV = affine::AffineValueExpr;
+  affine::AffineBuilder ab(b, loc);
+  AffineExpr dim0, dim1, sym0;
+  bindDims(b.getContext(), dim0, dim1);
+  bindSymbols(b.getContext(), sym0);
+
+  OpFoldResult innerTileSize = dimAndTileMapping[tileDim];
+
+  info.isAlignedToInnerTileSize = false;
+  FailureOr<int64_t> cstSize = ValueBoundsConstraintSet::computeConstantBound(
+      presburger::BoundType::UB, tileSize,
+      /*stopCondition=*/nullptr, /*closedUB=*/true);
+  std::optional<int64_t> cstInnerSize = getConstantIntValue(innerTileSize);
+  if (!failed(cstSize) && cstInnerSize) {
+    if (*cstSize % *cstInnerSize == 0)
+      info.isAlignedToInnerTileSize = true;
+
+    // If the tiling size equals to the inner tiling size, the outer dims are
+    // always 1.
+    if (*cstInnerSize == *cstSize) {
+      auto lhs = AV(dim0).bind(tileOffset);
+      auto rhs = AV(dim1).bind(innerTileSize);
+      info.sourceOffset = ab.floor(lhs, rhs);
+      info.sourceSize = oneAttr;
+      info.resultOffset = zeroAttr;
+      info.destExpandedSize = tileSize;
+      return info;
+    }
+  }
+
+  if (info.isAlignedToInnerTileSize) {
+    info.sourceOffset =
+        ab.floor(AV(dim0).bind(tileOffset), AV(dim1).bind(innerTileSize));
+    info.resultOffset = zeroAttr;
+    info.destExpandedSize = tileSize;
+
+    // The ceilDiv is needed here because there could be incomplete tile even
+    // it is perfect tiling cases. E.g.,
+    //   %0 = unpack tensor<33x2xf32> into tensor<64xf32>
+    // If the tiling size is 32, there will be 3 tiles. Two of them have
+    // size=32; one of them have size=2. The size is represented using
+    // affine_min op; we need ceilDiv.
+    info.sourceSize =
+        ab.ceil(AV(dim0).bind(tileSize), AV(dim1).bind(innerTileSize));
+    return info;
+  }
+
+  affine::DivModValue firstCoord = affine::getDivMod(
+      b, loc, getValueOrCreateConstantIndexOp(b, loc, tileOffset),
+      getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
+  OpFoldResult tileExclusiveBound =
+      ab.add(AV(dim0).bind(tileOffset), AV(dim1).bind(tileSize));
+  affine::DivModValue lastCoord = affine::getDivMod(
+      b, loc,
+      getValueOrCreateConstantIndexOp(
+          b, loc,
+          ab.sub(AV(dim0).bind(tileExclusiveBound), AV(dim1).bind(oneAttr))),
+      getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
+
+  OpFoldResult lengthMinusOne = ab.sub(AV(dim0).bind(lastCoord.quotient),
+                                       AV(dim1).bind(firstCoord.quotient));
+  info.sourceSize =
+      ab.add(AV(dim0).bind(lengthMinusOne), AV(dim1).bind(oneAttr));
+  info.sourceOffset = firstCoord.quotient;
+  info.resultOffset = firstCoord.remainder;
+  // Do not create an Affine ops for expanded size because the affine op is too
+  // complicated which would trigger an issue in affine ops simplification.
+  info.destExpandedSize = b.createOrFold<arith::MulIOp>(
+      loc, getValueOrCreateConstantIndexOp(b, loc, info.sourceSize),
+      getValueOrCreateConstantIndexOp(b, loc, innerTileSize));
+  return info;
+}
+
+struct UnPackOpTiling
+    : public TilingInterface::ExternalModel<UnPackOpTiling, linalg::UnPackOp> {
+
+  SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
+    auto unpackOp = cast<UnPackOp>(op);
+    SmallVector<utils::IteratorType> iteratorTypes(
+        unpackOp.getDestRank(), utils::IteratorType::parallel);
+    return iteratorTypes;
+  }
+
+  SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
+    return getPackUnPackIterationDomain<UnPackOp>(cast<UnPackOp>(op), b);
+  }
+
+  /// There are two cases in tiling unpack ops. If the tiling size is aligned to
+  /// the inner tile size, the corresponding tiles of source are all complete.
+  /// Otherwise, there are in-complete tiles. We will need to expand the slice
+  /// of source for getting complete tiles. The tiled unpack op unpacks more
+  /// data from source, so We'll need an extract_slice op to shift and truncate
+  /// the output.
+  /// Take Nn_to_N as an example. Say that N=32, n=8, and tiling_size=15. The
+  /// coordinates of second tile (i.e., result[15..31]) are
+  /// [(1, 7), (2, 0,), (2, 1) ... (3, 6), (3, 7)]. The first row and the last
+  /// row are incomplete tiles. To represent the unpack op, we have to complete
+  /// the rows. I.e., the input coordinates would start with (1, 0); end with
+  /// (3, 7). In this context, the tiled unpack produces a (3 * n) elements
+  /// because there are 3 rows in total. Follow by a tensor.extract_slice op, we
+  /// can get the actual result.
+  FailureOr<TilingResult>
+  getTiledImplementation(Operation *op, OpBuilder &b,
+                         ArrayRef<OpFoldResult> offsets,
+                         ArrayRef<OpFoldResult> sizes) const {
+    auto unpackOp = cast<UnPackOp>(op);
+    int64_t srcRank = unpackOp.getSourceRank();
+    int64_t destRank = unpackOp.getDestRank();
+    int64_t numInnerTiles = srcRank - destRank;
+    Location loc = unpackOp.getLoc();
+
+    // The perfect tiling case indicates that the tiling sizes are multiple of
+    // inner_tile_size. In this context, no extra data is needed when
+    // representing the tiled unpack op.
+    bool isPerfectTilingCase = true;
+    Attribute oneAttr = b.getIndexAttr(1);
+    SmallVector<OpFoldResult> sliceSrcStrides(destRank, oneAttr);
+    SmallVector<OpFoldResult> sliceSrcIndices, sliceSrcSizes;
+    SmallVector<OpFoldResult> destExpandedSizes, resultOffsetsFromDest;
+    for (auto dim : llvm::seq<int64_t>(0, destRank)) {
+      UnpackTileDimInfo info =
+          getUnpackTileDimInfo(b, unpackOp, dim, offsets[dim], sizes[dim]);
+      if (!info.isAlignedToInnerTileSize)
+        isPerfectTilingCase = false;
+      sliceSrcIndices.push_back(info.sourceOffset);
+      sliceSrcSizes.push_back(info.sourceSize);
+      destExpandedSizes.push_back(info.destExpandedSize);
+      resultOffsetsFromDest.push_back(info.resultOffset);
+    }
+
+    // The tiling is applied on destination dimensions. We have to apply the
+    // interchange on source dimensions if outer_dims_perm is set.
+    applyPermToRange(sliceSrcIndices, sliceSrcSizes,
+                     unpackOp.getOuterDimsPerm());
+    Attribute zeroAttr = b.getIndexAttr(0);
+    sliceSrcIndices.append(numInnerTiles, zeroAttr);
+    sliceSrcSizes.append(unpackOp.getMixedTiles());
+    sliceSrcStrides.append(numInnerTiles, oneAttr);
+    SmallVector<Operation *> generatedSlices;
+    tensor::ExtractSliceOp sliceSource = b.create<tensor::ExtractSliceOp>(
+        loc, unpackOp.getSource(), sliceSrcIndices, sliceSrcSizes,
+        sliceSrcStrides);
+    generatedSlices.push_back(sliceSource);
+
+    SmallVector<OpFoldResult> destStrides(destRank, oneAttr);
+    Value sliceDest;
+    if (isPerfectTilingCase) {
+      auto destSliceOp = b.create<tensor::ExtractSliceOp>(
+          loc, unpackOp.getDest(), offsets, sizes, destStrides);
+      sliceDest = destSliceOp;
+      generatedSlices.push_back(destSliceOp);
+    } else {
+      sliceDest = b.create<tensor::EmptyOp>(
+          loc, destExpandedSizes, unpackOp.getDestType().getElementType());
+    }
+
+    SmallVector<Value> tiledOperands = {sliceSource.getResult(), sliceDest};
+    for (auto tile : unpackOp.getInnerTiles())
+      tiledOperands.push_back(tile);
+
+    Operation *tiledUnpackOp = b.create<UnPackOp>(
+        loc, TypeRange{sliceDest.getType()}, tiledOperands, op->getAttrs());
+
+    if (isPerfectTilingCase)
+      return TilingResult{{tiledUnpackOp},
+                          SmallVector<Value>(tiledUnpackOp->getResults()),
+                          generatedSlices};
+
+    auto extractSlice = b.create<tensor::ExtractSliceOp>(
+        loc, tiledUnpackOp->getResult(0), resultOffsetsFromDest, sizes,
+        destStrides);
+    return TilingResult{
+        {tiledUnpackOp}, {extractSlice.getResult()}, generatedSlices};
+  }
+
+  LogicalResult
+  getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,
+                        ArrayRef<OpFoldResult> offsets,
+                        ArrayRef<OpFoldResult> sizes,
+                        SmallVector<OpFoldResult> &resultOffsets,
+                        SmallVector<OpFoldResult> &resultSizes) const {
+    resultOffsets = llvm::to_vector(offsets);
+    resultSizes = llvm::to_vector(sizes);
+    return success();
+  }
+
+  FailureOr<TilingResult>
+  generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,
+                          ArrayRef<OpFoldResult> offsets,
+                          ArrayRef<OpFoldResult> sizes) const {
+    FailureOr<TilingResult> tilingResult =
+        getTiledImplementation(op, b, offsets, sizes);
+    if (failed(tilingResult))
+      return failure();
+    return tilingResult.value();
+  }
+
+  /// Method to return the position of iteration domain tile computed by the
+  /// tiled operation.
+  LogicalResult getIterationDomainTileFromOperandTile(
+      Operation *op, OpBuilder &b, unsigned operandNumber,
+      ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes,
+      SmallVectorImpl<OpFoldResult> &resultOffsets,
+      SmallVectorImpl<OpFoldResult> &resultSizes) const {
+    auto unPackOp = cast<UnPackOp>(op);
+    // If the operand tile is the dest, then no adjustment is needed.
+    if (operandNumber == unPackOp.getDestMutable().getOperandNumber()) {
+      resultOffsets = llvm::to_vector(offsets);
+      resultSizes = llvm::to_vector(sizes);
+      return success();
+    }
+    Location loc = unPackOp.getLoc();
+
+    int64_t numTiles = unPackOp.getInnerDimsPos().size();
+    auto destOffsets = offsets.drop_back(numTiles);
+    auto destSizes = sizes.drop_back(numTiles);
+    // The tiling is applied on interchanged dimensions. We have to undo the
+    // interchange to map sizes and offsets to the original input.
+    int64_t outputRank = unPackOp.getDestRank();
+    ReifiedRankedShapedTypeDims reifiedReturnShapes;
+    if (failed(reifyResultShapes(b, unPackOp, reifiedReturnShapes)))
+      return failure();
+    SmallVector<OpFoldResult> outputMixedSizes = reifiedReturnShapes.front();
+    SmallVector<OpFoldResult> origOffsets(destOffsets);
+    SmallVector<OpFoldResult> origSizes(destSizes);
+    applyPermToRange(origOffsets, origSizes,
+                     invertPermutationVector(unPackOp.getOuterDimsPerm()));
+
+    DenseMap<int64_t, OpFoldResult> dimAndTileMapping =
+        unPackOp.getDimAndTileMapping();
+
+    for (auto dim : llvm::seq<int64_t>(0, outputRank)) {
+      using AV = affine::AffineValueExpr;
+      affine::AffineBuilder ab(b, loc);
+      AffineExpr dim0, dim1, sym0;
+      bindDims(b.getContext(), dim0, dim1);
+      bindSymbols(b.getContext(), sym0);
+      if (dimAndTileMapping.count(dim)) {
+        // If the data dimension is tiled, the i-th index is the product of
+        // offset_i and tile_i, and the i-th size is the product of sizes_i and
+        // tile_i. The sizes must be clamped to the sizes of the unpack result.
+        auto avOffset = AV(dim0).bind(origOffsets[dim]);
+        auto avSize = AV(dim0).bind(origSizes[dim]);
+        auto avTileSize = AV(sym0).bind(dimAndTileMapping[dim]);
+        auto avResultSize = AV(dim0).bind(outputMixedSizes[dim]);
+        resultOffsets.push_back(ab.mul(avOffset, avTileSize));
+        auto avResultOffset = AV(dim1).bind(resultOffsets.back());
+        resultSizes.push_back(ab.min({ab.mul(avSize, avTileSize),
+                                      ab.sub(avResultSize, avResultOffset)}));
+      } else {
+        resultOffsets.push_back(origOffsets[dim]);
+        resultSizes.push_back(origSizes[dim]);
+      }
+    }
+    return success();
+  }
+
+  /// Method to return the tiled implementation of tensor.unpack as a consumer.
+  FailureOr<TilingResult> getTiledImplementationFromOperandTile(
+      Operation *op, OpBuilder &b, unsigned operandNumber,
+      ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes) const {
+    auto unPackOp = cast<UnPackOp>(op);
+    // tensor.unpack op is fusible (as a consumer) only if inner dims are not
+    // tiled.
+    int64_t numTiles = unPackOp.getInnerDimsPos().size();
+    for (auto iter :
+         llvm::zip_equal(unPackOp.getMixedTiles(), sizes.take_back(numTiles))) {
+      if (!isEqualConstantIntOrValue(std::get<0>(iter), std::get<1>(iter)))
+        return failure();
+    }
+
+    Location loc = unPackOp.getLoc();
+
+    // Fetch offset/size for creating the slice of the dest operand of
+    // unpack op.
+    SmallVector<OpFoldResult> outputOffsets, outputSizes;
+    if (failed(getIterationDomainTileFromOperandTile(
+            op, b, /*operandNumber=*/0, offsets, sizes, outputOffsets,
+            outputSizes)))
+      return failure();
+
+    auto oneAttr = b.getI64IntegerAttr(1);
+    int64_t outputRank = unPackOp.getDestRank();
+    SmallVector<OpFoldResult> strides(outputRank, oneAttr);
+
+    SmallVector<Value> tiledOperands;
+    // Create slice of the dest operand.
+    auto extractDestSlice = b.create<tensor::ExtractSliceOp>(
+        loc, unPackOp.getDest(), outputOffsets, outputSizes, strides);
+    tiledOperands.push_back(extractDestSlice);
+
+    SmallVector<OpFoldResult> inputOffsets, inputSizes;
+    strides.append(unPackOp.getSourceRank() - outputRank, oneAttr);
+    // Create slice of the source operand.
+    auto extractSourceSlice = b.create<tensor::ExtractSliceOp>(
+        loc, unPackOp.getSource(), offsets, sizes, strides);
+    tiledOperands.insert(tiledOperands.begin(), extractSourceSlice);
+    for (auto tile : unPackOp.getInnerTiles())
+      tiledOperands.push_back(tile);
+
+    // Create tiled unpack op.
+    Operation *tiledUnPackOp =
+        b.create<UnPackOp>(loc, TypeRange{extractDestSlice.getType()},
+                           tiledOperands, op->getAttrs());
+
+    return TilingResult{{tiledUnPackOp},
+                        SmallVector<Value>(tiledUnPackOp->getResults()),
+                        llvm::to_vector(ArrayRef<Operation *>{
+                            extractSourceSlice, extractDestSlice})};
+  }
+};
+
 } // namespace
 
 template <typename OpType>
@@ -584,8 +1229,18 @@ void mlir::linalg::registerTilingInterfaceExternalModels(
     DialectRegistry &registry) {
   registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) {
     registerOne<linalg::GenericOp>(ctx);
+    linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
+    linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
     registerAll<
 #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
         >(ctx);
   });
 }
+
+void mlir::linalg::registerTilingInterfaceExternalModelsForPackUnPackOps(
+    DialectRegistry &registry) {
+  registry.addExtension(+[](MLIRContext *ctx, LinalgDialect *dialect) {
+    linalg::PackOp::attachInterface<PackOpTiling>(*ctx);
+    linalg::UnPackOp::attachInterface<UnPackOpTiling>(*ctx);
+  });
+}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 50593b08ad74b..dcd50cc44f81b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -217,7 +217,7 @@ struct PackedOperandsDimList {
 } // namespace
 
 FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter,
-                                             tensor::PackOp packOp,
+                                             linalg::PackOp packOp,
                                              bool lowerPadLikeWithInsertSlice) {
   // 1. Filter out NYI cases.
   auto packedTensorType =
@@ -238,7 +238,7 @@ FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter,
   PackingMetadata packingMetadata = computePackingMetadata(
       packedTensorType.getRank(), packOp.getInnerDimsPos());
   SmallVector<int64_t> packedToStripMinedShapePerm =
-      tensor::getPackInverseDestPerm(packOp);
+      getPackInverseDestPerm(packOp);
 
   // 3. Compute the stripMinedShape: this is the packed shape before any outer
   // or inner permutations have been applied.
@@ -353,7 +353,7 @@ FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter,
 }
 
 FailureOr<LowerUnPackOpResult>
-linalg::lowerUnPack(RewriterBase &rewriter, tensor::UnPackOp unPackOp,
+linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
                     bool lowerUnpadLikeWithExtractSlice) {
   Location loc = unPackOp->getLoc();
   OpBuilder::InsertionGuard g(rewriter);
@@ -388,7 +388,7 @@ linalg::lowerUnPack(RewriterBase &rewriter, tensor::UnPackOp unPackOp,
   // before any outer or inner permutations have been applied.
   PackingMetadata packingMetadata;
   SmallVector<int64_t> packedToStripMinedShapePerm =
-      tensor::getUnPackInverseSrcPerm(unPackOp, packingMetadata);
+      getUnPackInverseSrcPerm(unPackOp, packingMetadata);
 
   // 2. Compute the stripMinedShape: this is the packed shape without outer and
   // inner permutations.
@@ -493,8 +493,8 @@ FailureOr<PackResult> linalg::pack(RewriterBase &rewriter,
              llvm::interleaveComma(iteratorTypes, DBGS() << "iterators: ");
              DBGSNL(););
 
-  SmallVector<tensor::PackOp> packOps;
-  SmallVector<tensor::UnPackOp> unPackOps;
+  SmallVector<linalg::PackOp> packOps;
+  SmallVector<linalg::UnPackOp> unPackOps;
   // Step 1. Pack each dim of the LinalgOp metadata by packedSizes[i].
   PackedOperandsDimList listOfPackedOperandsDim;
   for (int64_t i = 0, e = packedSizes.size(); i < e; ++i) {
@@ -545,7 +545,7 @@ FailureOr<PackResult> linalg::pack(RewriterBase &rewriter,
         inputsAndInits.push_back(operand);
         continue;
       }
-      Value dest = tensor::PackOp::createDestinationTensor(
+      Value dest = linalg::PackOp::createDestinationTensor(
           rewriter, loc, operand, innerPackSizes, innerPos,
           /*outerDimsPerm=*/{});
       ShapedType operandType = cast<ShapedType>(operand.getType());
@@ -554,11 +554,11 @@ FailureOr<PackResult> linalg::pack(RewriterBase &rewriter,
             return getConstantIntValue(tile).has_value();
           });
       if (areConstantTiles && operandType.hasStaticShape() &&
-          !tensor::PackOp::requirePaddingValue(
+          !linalg::PackOp::requirePaddingValue(
               operandType.getShape(), innerPos,
               cast<ShapedType>(dest.getType()).getShape(), {},
               innerPackSizes)) {
-        packOps.push_back(rewriter.create<tensor::PackOp>(
+        packOps.push_back(rewriter.create<linalg::PackOp>(
             loc, operand, dest, innerPos, innerPackSizes));
       } else {
         // TODO: value of the padding attribute should be determined by
@@ -566,7 +566,7 @@ FailureOr<PackResult> linalg::pack(RewriterBase &rewriter,
         auto zeroAttr =
             rewriter.getZeroAttr(getElementTypeOrSelf(dest.getType()));
         Value zero = rewriter.create<arith::ConstantOp>(loc, zeroAttr);
-        packOps.push_back(rewriter.create<tensor::PackOp>(
+        packOps.push_back(rewriter.create<linalg::PackOp>(
             loc, operand, dest, innerPos, innerPackSizes, zero));
       }
       inputsAndInits.push_back(packOps.back());
@@ -586,14 +586,14 @@ FailureOr<PackResult> linalg::pack(RewriterBase &rewriter,
   // Step 4. Propagate packing to all the op results.
   for (OpResult result : packedLinalgOp->getResults()) {
     int64_t resultNum = result.getResultNumber();
-    tensor::PackOp maybePackedInit =
-        inits[resultNum].getDefiningOp<tensor::PackOp>();
+    linalg::PackOp maybePackedInit =
+        inits[resultNum].getDefiningOp<linalg::PackOp>();
     if (!maybePackedInit) {
       results.push_back(result);
       continue;
     }
     // Build the symmetrical UnPackOp to the existing PackOp.
-    unPackOps.push_back(rewriter.create<tensor::UnPackOp>(
+    unPackOps.push_back(rewriter.create<linalg::UnPackOp>(
         packedLinalgOp->getLoc(), result, maybePackedInit.getSource(),
         maybePackedInit.getInnerDimsPos(), maybePackedInit.getMixedTiles()));
     results.push_back(unPackOps.back());
@@ -674,15 +674,15 @@ static LinalgOp transposeOneLinalgOperandAndReplace(
 }
 
 FailureOr<PackTransposeResult>
-linalg::packTranspose(RewriterBase &rewriter, tensor::PackOp packOp,
-                      linalg::LinalgOp linalgOp, tensor::UnPackOp maybeUnPackOp,
+linalg::packTranspose(RewriterBase &rewriter, linalg::PackOp packOp,
+                      linalg::LinalgOp linalgOp, linalg::UnPackOp maybeUnPackOp,
                       ArrayRef<int64_t> outerPerm,
                       ArrayRef<int64_t> innerPerm) {
   Location loc = linalgOp.getLoc();
 
   // Step 1. Transpose packOp.
   rewriter.setInsertionPoint(packOp);
-  tensor::PackOp transposedPackOp =
+  linalg::PackOp transposedPackOp =
       packOp.createTransposedClone(rewriter, loc, innerPerm, outerPerm);
 
   if (!packOp.getResult().hasOneUse())
@@ -733,7 +733,7 @@ linalg::packTranspose(RewriterBase &rewriter, tensor::PackOp packOp,
       rewriter, linalgOp, packUse, permutation, transposedPackOp.getResult());
 
   // Step 3. Maybe transpose unPackOp.
-  tensor::UnPackOp transposedUnPackOp;
+  linalg::UnPackOp transposedUnPackOp;
   if (maybeUnPackOp) {
     OpOperand &opOperand =
         transposedLinalgOp->getOpOperand(packUseOperandNumber);
@@ -1024,7 +1024,7 @@ LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite(
 ///
 /// This method assumes that all outer dims for this pack Op are 1.
 static Value getPackOpSourceOrPaddedSource(OpBuilder &builder,
-                                           tensor::PackOp packOp) {
+                                           linalg::PackOp packOp) {
   Value input = packOp.getSource();
   if (!packOp.getPaddingValue()) {
     return input;
@@ -1141,7 +1141,7 @@ getPackUnpackRankReducedPerm(ArrayRef<int64_t> shape,
 }
 
 LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite(
-    tensor::PackOp packOp, PatternRewriter &rewriter) const {
+    linalg::PackOp packOp, PatternRewriter &rewriter) const {
   // TODO: support the case that outer dimensions are not all 1s. A
   // tensor.expand_shape will be generated in this case.
   if (llvm::any_of(packOp.getAllOuterDims(),
@@ -1242,7 +1242,7 @@ LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite(
 }
 
 LogicalResult DecomposeOuterUnitDimsUnPackOpPattern::matchAndRewrite(
-    tensor::UnPackOp unpackOp, PatternRewriter &rewriter) const {
+    linalg::UnPackOp unpackOp, PatternRewriter &rewriter) const {
   int64_t srcRank = unpackOp.getSourceRank();
   int64_t destRank = unpackOp.getDestRank();
   ArrayRef<int64_t> srcShape = unpackOp.getSourceType().getShape();
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index f2c23c49a78e8..ae04c2b6b2a5b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1499,11 +1499,11 @@ vectorizeAsLinalgGeneric(RewriterBase &rewriter, VectorizationState &state,
   return success();
 }
 
-/// Given a tensor::PackOp, return the `dest` shape before any packing
+/// Given a linalg::PackOp, return the `dest` shape before any packing
 /// permutations.
-static SmallVector<int64_t> getTiledPackShape(tensor::PackOp packOp,
+static SmallVector<int64_t> getTiledPackShape(linalg::PackOp packOp,
                                               ArrayRef<int64_t> destShape) {
-  return applyPermutation(destShape, tensor::getPackInverseDestPerm(packOp));
+  return applyPermutation(destShape, linalg::getPackInverseDestPerm(packOp));
 }
 
 /// Given an input, the mixed destSizes, and the vector sizes for vectorization,
@@ -1558,7 +1558,7 @@ static Operation *createWriteOrMaskedWrite(OpBuilder &builder, Location loc,
   return write;
 }
 
-/// Vectorize tensor::PackOp with (1) static innerTiles (2) constant
+/// Vectorize linalg::PackOp with (1) static innerTiles (2) constant
 /// padding value and (3) input vector sizes into:
 /// masked_transfer_read->shape_cast->transpose->transfer_write_in_bounds
 /// As in the following example:
@@ -1585,7 +1585,7 @@ static Operation *createWriteOrMaskedWrite(OpBuilder &builder, Location loc,
 /// determined by the result tensor shape. Also, we update the inBounds
 /// attribute instead of masking.
 static LogicalResult
-vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
+vectorizeAsTensorPackOp(RewriterBase &rewriter, linalg::PackOp packOp,
                         ArrayRef<int64_t> inputVectorSizes,
                         SmallVectorImpl<Value> &newResults) {
   // TODO: Introduce a parent class that will handle the insertion point update.
@@ -1639,7 +1639,7 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
 
   // Create TransposeOp.
   auto destPermutation =
-      invertPermutationVector(tensor::getPackInverseDestPerm(packOp));
+      invertPermutationVector(getPackInverseDestPerm(packOp));
   auto transposeOp = rewriter.create<vector::TransposeOp>(
       loc, shapeCastOp.getResult(), destPermutation);
 
@@ -1651,7 +1651,7 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
   return success();
 }
 
-/// Vectorize a `tensor::UnPackOp` to these 4 Ops:
+/// Vectorize a `linalg::UnPackOp` to these 4 Ops:
 ///   Vector::TransferReadOp - Reads a vector from the source tensor
 ///   vector::TransposeOp - Transpose the Source tensor
 ///   ShapeCastOp - Reshape the data based on the target.
@@ -1661,7 +1661,7 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
 ///   * the vector sizes are determined by the input operand and attributes,
 ///   * update the inBounds attribute instead of masking.
 static LogicalResult
-vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp,
+vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp,
                           ArrayRef<int64_t> inputVectorSizes,
                           SmallVectorImpl<Value> &newResults) {
 
@@ -1754,7 +1754,7 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp,
 
   PackingMetadata packMetadata;
   SmallVector<int64_t> lastDimToInsertPosPerm =
-      tensor::getUnPackInverseSrcPerm(unpackOp, packMetadata);
+      getUnPackInverseSrcPerm(unpackOp, packMetadata);
   ShapedType maskedOpShapedType = cast<ShapedType>(readResult.getType());
   SmallVector<int64_t> stripMineShape(maskedOpShapedType.getShape());
   mlir::Type stripMineElemType = maskedOpShapedType.getElementType();
@@ -1887,7 +1887,7 @@ vectorizeDynamicLinalgOpPrecondition(linalg::LinalgOp op,
 
 /// Need to check if the inner-tiles are static/constant.
 static LogicalResult
-vectorizeUnPackOpPrecondition(tensor::UnPackOp unpackOp,
+vectorizeUnPackOpPrecondition(linalg::UnPackOp unpackOp,
                               ArrayRef<int64_t> inputVectorSizes) {
 
   if (llvm::any_of(unpackOp.getInnerTiles(), [](OpFoldResult res) {
@@ -2007,7 +2007,7 @@ static LogicalResult vectorizeLinalgOpPrecondition(
 }
 
 static LogicalResult
-vectorizePackOpPrecondition(tensor::PackOp packOp,
+vectorizePackOpPrecondition(linalg::PackOp packOp,
                             ArrayRef<int64_t> inputVectorSizes) {
   auto padValue = packOp.getPaddingValue();
   Attribute cstAttr;
@@ -2203,10 +2203,10 @@ LogicalResult mlir::linalg::vectorizeOpPrecondition(
       .Case<tensor::PadOp>([&](auto padOp) {
         return vectorizePadOpPrecondition(padOp, inputVectorSizes);
       })
-      .Case<tensor::PackOp>([&](auto packOp) {
+      .Case<linalg::PackOp>([&](auto packOp) {
         return vectorizePackOpPrecondition(packOp, inputVectorSizes);
       })
-      .Case<tensor::UnPackOp>([&](auto unpackOp) {
+      .Case<linalg::UnPackOp>([&](auto unpackOp) {
         return vectorizeUnPackOpPrecondition(unpackOp, inputVectorSizes);
       })
       .Case<tensor::InsertSliceOp>([&](auto sliceOp) {
@@ -2231,7 +2231,7 @@ static void convertAffineApply(RewriterBase &rewriter, LinalgOp linalgOp) {
 }
 
 bool mlir::linalg::hasVectorizationImpl(Operation *op) {
-  return isa<linalg::LinalgOp, tensor::PadOp, tensor::PackOp, tensor::UnPackOp,
+  return isa<linalg::LinalgOp, tensor::PadOp, linalg::PackOp, linalg::UnPackOp,
              tensor::InsertSliceOp>(op);
 }
 
@@ -2308,18 +2308,18 @@ LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op,
             return vectorizeAsTensorPadOp(rewriter, padOp, inputVectorSizes,
                                           results);
           })
-          .Case<tensor::PackOp>([&](auto packOp) {
+          .Case<linalg::PackOp>([&](auto packOp) {
             return vectorizeAsTensorPackOp(rewriter, packOp, inputVectorSizes,
                                            results);
           })
+          .Case<linalg::UnPackOp>([&](auto unpackOp) {
+            return vectorizeAsTensorUnpackOp(rewriter, unpackOp,
+                                             inputVectorSizes, results);
+          })
           .Case<tensor::InsertSliceOp>([&](auto sliceOp) {
             return vectorizeAsInsertSliceOp(rewriter, sliceOp, inputVectorSizes,
                                             results);
           })
-          .Case<tensor::UnPackOp>([&](auto unpackOp) {
-            return vectorizeAsTensorUnpackOp(rewriter, unpackOp,
-                                             inputVectorSizes, results);
-          })
           .Default([](auto) { return failure(); });
 
   if (failed(vectorizeResult)) {
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index d148067fe6343..d3d301ca093b1 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -142,10 +142,64 @@ static void unpackRanges(OpBuilder &builder, Location loc,
 //===----------------------------------------------------------------------===//
 // General utilities
 //===----------------------------------------------------------------------===//
+//
+/// The permutation can be obtained from two permutations:
+///   a) Compute the permutation vector to move the last `numPackedDims` into
+///      the `innerPosDims` of a shape of rank `rank`.
+///   b) Compute the permutation vector to move outer dims if the
+///      `outerPerm` parameter is not empty.
+/// Apply (b) permutation on (a) permutation to get the final permutation.
+static SmallVector<int64_t>
+computePackUnPackPerm(int64_t rank, ArrayRef<int64_t> &innerDimsPos,
+                      ArrayRef<int64_t> &outerPerm,
+                      PackingMetadata &packingMetadata) {
+  int64_t numPackedDims = innerDimsPos.size();
+  auto lastDims =
+      llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));
+  packingMetadata = computePackingMetadata(rank, innerDimsPos);
+  SmallVector<int64_t> innerPositionsPerm =
+      computePermutationVector(rank, lastDims, packingMetadata.insertPositions);
+
+  SmallVector<int64_t> outerPos = packingMetadata.outerPositions;
+  if (!outerPerm.empty())
+    applyPermutationToVector(outerPos, outerPerm);
+  SmallVector<int64_t> outerPositionPerm =
+      computePermutationVector(rank, packingMetadata.outerPositions, outerPos);
+
+  SmallVector<int64_t> packInverseDestPermutation = innerPositionsPerm;
+  applyPermutationToVector(packInverseDestPermutation, outerPositionPerm);
+  return packInverseDestPermutation;
+}
 
 namespace mlir {
 namespace linalg {
 
+SmallVector<int64_t> getPackInverseDestPerm(PackOp packOp) {
+
+  PackingMetadata pMetadata;
+  int64_t packedRank = packOp.getDestType().getRank();
+  ArrayRef<int64_t> innerDimPos = packOp.getInnerDimsPos();
+  ArrayRef<int64_t> outerPerm = packOp.getOuterDimsPerm();
+  SmallVector<int64_t> packInvDestPerm =
+      computePackUnPackPerm(packedRank, innerDimPos, outerPerm, pMetadata);
+  return packInvDestPerm;
+}
+
+SmallVector<int64_t> getUnPackInverseSrcPerm(UnPackOp unpackOp) {
+  PackingMetadata metadata;
+  return getUnPackInverseSrcPerm(unpackOp, metadata);
+}
+
+SmallVector<int64_t> getUnPackInverseSrcPerm(UnPackOp unpackOp,
+                                             PackingMetadata &metadata) {
+  int64_t unpackRank = unpackOp.getSourceType().getRank();
+  ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
+  ArrayRef<int64_t> outerPerm = unpackOp.getOuterDimsPerm();
+  SmallVector<int64_t> unpackInvSrcPerm =
+      computePackUnPackPerm(unpackRank, innerDimPos, outerPerm, metadata);
+  return unpackInvSrcPerm;
+}
+
 bool allIndexingsAreProjectedPermutation(LinalgOp op) {
   return llvm::all_of(op.getIndexingMapsArray(), [](AffineMap m) {
     return m.isProjectedPermutation(/*allowZeroInResults=*/true);
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index d8d52a80a9645..b29d35f446694 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -10,7 +10,9 @@
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Arith/Utils/Utils.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Tensor/Utils/Utils.h"
 #include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
@@ -4490,8 +4492,8 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
 template <typename PackOrUnpackOp>
 static bool isLikePadUnPad(PackOrUnpackOp packOp,
                            RankedTensorType packedTensorType) {
-  static_assert(std::is_same<PackOrUnpackOp, tensor::PackOp>::value ||
-                    std::is_same<PackOrUnpackOp, tensor::UnPackOp>::value,
+  static_assert(std::is_same<PackOrUnpackOp, PackOp>::value ||
+                    std::is_same<PackOrUnpackOp, UnPackOp>::value,
                 "Function meant for pack/unpack");
   // This is a pad if packing only adds ones and we don't transpose dimensions.
 
@@ -4692,7 +4694,7 @@ static bool inferStaticShape(UnPackOp op, SmallVectorImpl<int64_t> &srcShape,
 LogicalResult UnPackOp::canonicalize(UnPackOp unPackOp,
                                      PatternRewriter &rewriter) {
   /// unpack(pack(x)) -> x
-  if (PackOp packOp = unPackOp.getSource().getDefiningOp<tensor::PackOp>()) {
+  if (PackOp packOp = unPackOp.getSource().getDefiningOp<PackOp>()) {
     if (packOp.getSourceType() != unPackOp.getDestType())
       return failure();
     if (packOp.getPaddingValue() ||
@@ -4728,7 +4730,7 @@ LogicalResult UnPackOp::canonicalize(UnPackOp unPackOp,
       dest =
           rewriter.create<tensor::CastOp>(loc, newDestType, unPackOp.getDest());
     }
-    Value newOp = rewriter.create<tensor::UnPackOp>(
+    Value newOp = rewriter.create<UnPackOp>(
         loc, source, dest, unPackOp.getInnerDimsPos(), unPackOp.getMixedTiles(),
         unPackOp.getOuterDimsPerm());
     rewriter.replaceOpWithNewOp<tensor::CastOp>(
@@ -4831,7 +4833,7 @@ getNewMixedTileSizes(PatternRewriter &rewriter, Type newPackedTy,
   return newMixedTileSizes;
 }
 
-/// Folds a tensor.cast op into a consuming tensor::PackOp op if the
+/// Folds a tensor.cast op into a consuming PackOp op if the
 /// `tensor.cast` has source that is more static than the consuming op.
 ///
 /// Example:
@@ -4883,7 +4885,7 @@ struct FoldTensorCastPackOp : public OpRewritePattern<PackOp> {
   }
 };
 
-/// Folds a tensor.cast op into a consuming tensor::UnPackOp op if the
+/// Folds a tensor.cast op into a consuming UnPackOp op if the
 /// `tensor.cast` has source that is more static than the consuming op.
 ///
 /// Example:
@@ -4960,9 +4962,11 @@ struct FoldTensorCastProducerOp
   LogicalResult matchAndRewrite(DestinationStyleOpInterface op,
                                 PatternRewriter &rewriter) const override {
 
-    // Reject tensor::PackOp - there's dedicated pattern for that instead.
+    // Reject PackOp/UnpackOp (i.e. RelayoutOps) - there are dedicated patterns
+    // for that instead.
     if (!foldTensorCastPrecondition(op) ||
-        isa<tensor::RelayoutOpInterface>(*op))
+        isa<tensor::RelayoutOpInterface>(*op) ||
+        isa<linalg::RelayoutOpInterface>(*op))
       return failure();
 
     SmallVector<Type> newResultTypes(op->getResultTypes());
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
index 052dee402b79e..bd1a09be6b9bc 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
@@ -118,7 +118,7 @@ static void applyPermToRange(SmallVector<OpFoldResult> &offsets,
 }
 
 struct PackOpTiling
-    : public TilingInterface::ExternalModel<PackOpTiling, PackOp> {
+    : public TilingInterface::ExternalModel<PackOpTiling, tensor::PackOp> {
 
   SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
     // Note that here we only consider untiled dimensions and outer tiled data
@@ -491,7 +491,7 @@ static UnpackTileDimInfo getUnpackTileDimInfo(OpBuilder &b, UnPackOp unpackOp,
 }
 
 struct UnPackOpTiling
-    : public TilingInterface::ExternalModel<UnPackOpTiling, UnPackOp> {
+    : public TilingInterface::ExternalModel<UnPackOpTiling, tensor::UnPackOp> {
 
   SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {
     auto unpackOp = cast<UnPackOp>(op);
diff --git a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
index 99199252710f9..f3560d08ff769 100644
--- a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
+++ b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
@@ -100,11 +100,6 @@ void transform::ApplyFoldTensorEmptyPatternsOp::populatePatterns(
   tensor::populateFoldTensorEmptyPatterns(patterns, getFoldSingleUseOnly());
 }
 
-void transform::ApplyFoldIntoPackAndUnpackPatternsOp::populatePatterns(
-    RewritePatternSet &patterns) {
-  tensor::populateFoldIntoPackAndUnpackPatterns(patterns);
-}
-
 void transform::ApplyFoldTensorSubsetOpsPatternsOp::populatePatterns(
     RewritePatternSet &patterns) {
   tensor::populateFoldTensorSubsetOpPatterns(patterns);
diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
index cc6275fee671a..7880d1c5a0c5d 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
@@ -6,7 +6,6 @@ add_mlir_dialect_library(MLIRTensorTransforms
   FoldTensorSubsetOps.cpp
   IndependenceTransforms.cpp
   MergeConsecutiveInsertExtractSlicePatterns.cpp
-  PackAndUnpackPatterns.cpp
   ReshapePatterns.cpp
   RewriteAsConstant.cpp
   SwapExtractSliceWithProducerPatterns.cpp
diff --git a/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp
index 60b0c3e759b6c..fa748cf01977f 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/EmptyOpPatterns.cpp
@@ -93,49 +93,6 @@ struct FoldEmptyTensorWithExtractSliceOp
   bool foldSingleUseOnly = false;
 };
 
-/// tensor.empty does not define any tensor contents, so an unpadded pack
-/// can be folded away.
-struct FoldEmptyTensorWithPackOp : public OpRewritePattern<PackOp> {
-  using OpRewritePattern<PackOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(PackOp packOp,
-                                PatternRewriter &rewriter) const override {
-    // Check for tensor.empty source.
-    auto emptyOp = packOp.getSource().getDefiningOp<EmptyOp>();
-    if (!emptyOp)
-      return failure();
-
-    // Check for padding.
-    // Packing with padding cannot be simply removed.
-    if (packOp.getPaddingValue())
-      return rewriter.notifyMatchFailure(packOp, "expects no padding value");
-
-    // Replace the pack directly with its destination.
-    rewriter.replaceOp(packOp, packOp.getDest());
-
-    return success();
-  }
-};
-
-/// tensor.empty does not define any tensor contents, so an unpack
-/// can be folded away.
-struct FoldEmptyTensorWithUnPackOp : public OpRewritePattern<UnPackOp> {
-  using OpRewritePattern<UnPackOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(UnPackOp unPackOp,
-                                PatternRewriter &rewriter) const override {
-    // Check for tensor.empty source.
-    auto emptyOp = unPackOp.getSource().getDefiningOp<EmptyOp>();
-    if (!emptyOp)
-      return failure();
-
-    // Replace the unpack directly with its destination.
-    rewriter.replaceOp(unPackOp, unPackOp.getDest());
-
-    return success();
-  }
-};
-
 // Fold concat operation where all the operands are empty.
 struct FoldConcatsOfEmpty : public OpRewritePattern<ConcatOp> {
   using OpRewritePattern<ConcatOp>::OpRewritePattern;
@@ -176,7 +133,6 @@ void mlir::tensor::populateFoldTensorEmptyPatterns(RewritePatternSet &patterns,
                FoldEmptyTensorWithReshapeOp<tensor::ExpandShapeOp>,
                FoldEmptyTensorWithReshapeOp<tensor::CollapseShapeOp>>(
       patterns.getContext(), /*benefit=*/1, foldSingleUseOnly);
-  patterns.add<FoldConcatsOfEmpty, FoldEmptyTensorWithPackOp,
-               FoldEmptyTensorWithUnPackOp>(patterns.getContext(),
-                                            /*benefit=*/1);
+  patterns.add<FoldConcatsOfEmpty>(patterns.getContext(),
+                                   /*benefit=*/1);
 }
diff --git a/mlir/test/Dialect/Linalg/block-pack-matmul-layout.mlir b/mlir/test/Dialect/Linalg/block-pack-matmul-layout.mlir
index 01ca4374da046..4ba4b09f52163 100644
--- a/mlir/test/Dialect/Linalg/block-pack-matmul-layout.mlir
+++ b/mlir/test/Dialect/Linalg/block-pack-matmul-layout.mlir
@@ -38,64 +38,64 @@ func.func @block_matmul_transpose_b(
 // MMT4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
 // MMT4D-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
 // MMT4D-LABEL: func @block_matmul
-// MMT4D-COUNT-3: tensor.pack
+// MMT4D-COUNT-3: linalg.pack
 // MMT4D: linalg.generic
 // MMT4D-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // MMT4D-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MMT4D-COUNT-1: tensor.unpack
+// MMT4D-COUNT-1: linalg.unpack
 // MMT4D-LABEL: func @block_matmul_transpose_a
-// MMT4D-COUNT-3: tensor.pack
+// MMT4D-COUNT-3: linalg.pack
 // MMT4D: linalg.generic
 // MMT4D-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // MMT4D-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MMT4D-COUNT-1: tensor.unpack
+// MMT4D-COUNT-1: linalg.unpack
 // MMT4D-LABEL: func @block_matmul_transpose_b
-// MMT4D-COUNT-3: tensor.pack
+// MMT4D-COUNT-3: linalg.pack
 // MMT4D: linalg.generic
 // MMT4D-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // MMT4D-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MMT4D-COUNT-1: tensor.unpack
+// MMT4D-COUNT-1: linalg.unpack
 
 // MM4D-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
 // MM4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d5, d4)>
 // MM4D-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
 // MM4D-LABEL: func @block_matmul
-// MM4D-COUNT-3: tensor.pack
+// MM4D-COUNT-3: linalg.pack
 // MM4D: linalg.generic
 // MM4D-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // MM4D-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MM4D-COUNT-1: tensor.unpack
+// MM4D-COUNT-1: linalg.unpack
 // MM4D-LABEL: func @block_matmul_transpose_a
-// MM4D-COUNT-3: tensor.pack
+// MM4D-COUNT-3: linalg.pack
 // MM4D: linalg.generic
 // MM4D-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // MM4D-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MM4D-COUNT-1: tensor.unpack
+// MM4D-COUNT-1: linalg.unpack
 // MM4D-LABEL: func @block_matmul_transpose_b
-// MM4D-COUNT-3: tensor.pack
+// MM4D-COUNT-3: linalg.pack
 // MM4D: linalg.generic
 // MM4D-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // MM4D-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MM4D-COUNT-1: tensor.unpack
+// MM4D-COUNT-1: linalg.unpack
 
 // MTM4D-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d0, d5, d3)>
 // MTM4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d5, d4)>
 // MTM4D-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
 // MTM4D-LABEL: func @block_matmul
-// MTM4D-COUNT-3: tensor.pack
+// MTM4D-COUNT-3: linalg.pack
 // MTM4D: linalg.generic
 // MTM4D-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // MTM4D-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MTM4D-COUNT-1: tensor.unpack
+// MTM4D-COUNT-1: linalg.unpack
 // MTM4D-LABEL: func @block_matmul_transpose_a
-// MTM4D-COUNT-3: tensor.pack
+// MTM4D-COUNT-3: linalg.pack
 // MTM4D: linalg.generic
 // MTM4D-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // MTM4D-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MTM4D-COUNT-1: tensor.unpack
+// MTM4D-COUNT-1: linalg.unpack
 // MTM4D-LABEL: func @block_matmul_transpose_b
-// MTM4D-COUNT-3: tensor.pack
+// MTM4D-COUNT-3: linalg.pack
 // MTM4D: linalg.generic
 // MTM4D-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // MTM4D-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
-// MTM4D-COUNT-1: tensor.unpack
+// MTM4D-COUNT-1: linalg.unpack
diff --git a/mlir/test/Dialect/Linalg/block-pack-matmul-padding.mlir b/mlir/test/Dialect/Linalg/block-pack-matmul-padding.mlir
index 9e396ba08d246..e667879ceea0e 100644
--- a/mlir/test/Dialect/Linalg/block-pack-matmul-padding.mlir
+++ b/mlir/test/Dialect/Linalg/block-pack-matmul-padding.mlir
@@ -21,17 +21,17 @@ func.func @block_matmul_padding(
 // CHECK-SAME:    %[[A:[0-9a-z]+]]: tensor<123x125xf32>, %[[B:[0-9a-z]+]]: tensor<125x124xf32>, %[[C:[0-9a-z]+]]: tensor<123x124xf32>
 // CHECK-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00 : f32
 // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<4x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
 // CHECK-SAME:  padding_value(%[[ZERO]] : f32)
 // CHECK-SAME:  outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64]
 // CHECK-SAME:  into %[[PACK_DST_0]] : tensor<123x125xf32> -> tensor<4x2x32x64xf32>
 // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<8x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
 // CHECK-SAME:  padding_value(%[[ZERO]] : f32)
 // CHECK-SAME:  outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64]
 // CHECK-SAME:  into %[[PACK_DST_1]] : tensor<125x124xf32> -> tensor<8x2x16x64xf32>
 // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<4x8x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
 // CHECK-SAME:  padding_value(%[[ZERO]] : f32)
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[PACK_DST_2]] : tensor<123x124xf32> -> tensor<4x8x32x16xf32>
@@ -39,17 +39,17 @@ func.func @block_matmul_padding(
 // CHECK-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
 // CHECK-SAME:  ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<4x8x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[C]] : tensor<4x8x32x16xf32> -> tensor<123x124xf32>
 // CHECK: return %[[RES_UNPACKED]] : tensor<123x124xf32>
 
 // NOPAD-LABEL: func @block_matmul_padding(
 // NOPAD-SAME:    %[[A:[0-9a-z]+]]: tensor<123x125xf32>, %[[B:[0-9a-z]+]]: tensor<125x124xf32>, %[[C:[0-9a-z]+]]: tensor<123x124xf32>
-// NOPAD-NOT: tensor.pack
+// NOPAD-NOT: linalg.pack
 // NOPAD: linalg.matmul ins(%[[A]], %[[B]] : tensor<123x125xf32>, tensor<125x124xf32>)
 // NOPAD-SAME: outs(%[[C]] : tensor<123x124xf32>) -> tensor<123x124xf32>
-// NOPAD-NOT: tensor.unpack
+// NOPAD-NOT: linalg.unpack
 
 // PAD-MULT-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
 // PAD-MULT-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
@@ -58,17 +58,17 @@ func.func @block_matmul_padding(
 // PAD-MULT-SAME:    %[[A:[0-9a-z]+]]: tensor<123x125xf32>, %[[B:[0-9a-z]+]]: tensor<125x124xf32>, %[[C:[0-9a-z]+]]: tensor<123x124xf32>
 // PAD-MULT-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00 : f32
 // PAD-MULT: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<1x1x256x384xf32>
-// PAD-MULT: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// PAD-MULT: %[[A_PACKED:.+]] = linalg.pack %[[A]]
 // PAD-MULT-SAME:  padding_value(%[[ZERO]] : f32)
 // PAD-MULT-SAME:  outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [256, 384]
 // PAD-MULT-SAME:  into %[[PACK_DST_0]] : tensor<123x125xf32> -> tensor<1x1x256x384xf32>
 // PAD-MULT: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<1x1x512x384xf32>
-// PAD-MULT: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// PAD-MULT: %[[B_PACKED:.+]] = linalg.pack %[[B]]
 // PAD-MULT-SAME:  padding_value(%[[ZERO]] : f32)
 // PAD-MULT-SAME:  outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [512, 384]
 // PAD-MULT-SAME:  into %[[PACK_DST_1]] : tensor<125x124xf32> -> tensor<1x1x512x384xf32>
 // PAD-MULT: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<1x1x256x512xf32>
-// PAD-MULT: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// PAD-MULT: %[[C_PACKED:.+]] = linalg.pack %[[C]]
 // PAD-MULT-SAME:  padding_value(%[[ZERO]] : f32)
 // PAD-MULT-SAME:  inner_dims_pos = [0, 1] inner_tiles = [256, 512]
 // PAD-MULT-SAME:  into %[[PACK_DST_2]] : tensor<123x124xf32> -> tensor<1x1x256x512xf32>
@@ -76,7 +76,7 @@ func.func @block_matmul_padding(
 // PAD-MULT-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // PAD-MULT-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
 // PAD-MULT-SAME:  ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<1x1x256x384xf32>, tensor<1x1x512x384xf32>) outs(%[[C_PACKED]] : tensor<1x1x256x512xf32>)
-// PAD-MULT: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// PAD-MULT: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // PAD-MULT-SAME:  inner_dims_pos = [0, 1] inner_tiles = [256, 512]
 // PAD-MULT-SAME:  into %[[C]] : tensor<1x1x256x512xf32> -> tensor<123x124xf32>
 // PAD-MULT: return %[[RES_UNPACKED]] : tensor<123x124xf32>
diff --git a/mlir/test/Dialect/Linalg/block-pack-matmul.mlir b/mlir/test/Dialect/Linalg/block-pack-matmul.mlir
index 8a82608177692..aa860dbd581a9 100644
--- a/mlir/test/Dialect/Linalg/block-pack-matmul.mlir
+++ b/mlir/test/Dialect/Linalg/block-pack-matmul.mlir
@@ -14,22 +14,22 @@ func.func @block_matmul(
 // CHECK-LABEL: func @block_matmul(
 // CHECK-SAME:    %[[A:[0-9a-z]+]]: tensor<128x128xf32>, %[[B:[0-9a-z]+]]: tensor<128x128xf32>, %[[C:[0-9a-z]+]]: tensor<128x128xf32>
 // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<4x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
 // CHECK-SAME:  outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64]
 // CHECK-SAME:  into %[[PACK_DST_0]] : tensor<128x128xf32> -> tensor<4x2x32x64xf32>
 // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<8x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
 // CHECK-SAME:  outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64]
 // CHECK-SAME:  into %[[PACK_DST_1]] : tensor<128x128xf32> -> tensor<8x2x16x64xf32>
 // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<4x8x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[PACK_DST_2]] : tensor<128x128xf32> -> tensor<4x8x32x16xf32>
 // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
 // CHECK-SAME:  ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<4x8x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[C]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32>
 // CHECK: return %[[RES_UNPACKED]] : tensor<128x128xf32>
@@ -60,7 +60,7 @@ func.func @block_matmul_dynamic(
 // CHECK-DAG: %[[A_OUTER_TILE_M:.+]] = affine.apply #[[$MAP_M]]()[%[[A_M]]]
 // CHECK-DAG: %[[A_OUTER_TILE_K:.+]] = affine.apply #[[$MAP_K]]()[%[[A_K]]]
 // CHECK: %[[PACK_DST_0:.+]] = tensor.empty(%[[A_OUTER_TILE_M]], %[[A_OUTER_TILE_K]]) : tensor<?x?x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
 // CHECK-SAME:  padding_value(%[[ZERO]] : f32)
 // CHECK-SAME:  outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64]
 // CHECK-SAME:  into %[[PACK_DST_0]] : tensor<?x?xf32> -> tensor<?x?x32x64xf32>
@@ -69,7 +69,7 @@ func.func @block_matmul_dynamic(
 // CHECK-DAG: %[[B_OUTER_TILE_K:.+]] = affine.apply #[[$MAP_K]]()[%[[B_K]]]
 // CHECK-DAG: %[[B_OUTER_TILE_N:.+]] = affine.apply #[[$MAP_N]]()[%[[B_N]]]
 // CHECK: %[[PACK_DST_1:.+]] = tensor.empty(%[[B_OUTER_TILE_N]], %[[B_OUTER_TILE_K]]) : tensor<?x?x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
 // CHECK-SAME:  padding_value(%[[ZERO]] : f32)
 // CHECK-SAME:  outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64]
 // CHECK-SAME:  into %[[PACK_DST_1]] : tensor<?x?xf32> -> tensor<?x?x16x64xf32>
@@ -78,7 +78,7 @@ func.func @block_matmul_dynamic(
 // CHECK-DAG: %[[C_OUTER_TILE_M:.+]] = affine.apply #[[$MAP_M]]()[%[[C_M]]]
 // CHECK-DAG: %[[C_OUTER_TILE_N:.+]] = affine.apply #[[$MAP_N]]()[%[[C_N]]]
 // CHECK: %[[PACK_DST_2:.+]] = tensor.empty(%[[C_OUTER_TILE_M]], %[[C_OUTER_TILE_N]]) : tensor<?x?x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
 // CHECK-SAME:  padding_value(%[[ZERO]] : f32)
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[PACK_DST_2]] : tensor<?x?xf32> -> tensor<?x?x32x16xf32>
@@ -86,7 +86,7 @@ func.func @block_matmul_dynamic(
 // CHECK-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
 // CHECK-SAME:  ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<?x?x32x64xf32>, tensor<?x?x16x64xf32>) outs(%[[C_PACKED]] : tensor<?x?x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[C]] : tensor<?x?x32x16xf32> -> tensor<?x?xf32>
 // CHECK: return %[[RES_UNPACKED]] : tensor<?x?xf32>
@@ -107,7 +107,7 @@ func.func @block_matmul_with_constant(
 // CHECK-DAG: %[[RES_DST:.+]] = arith.constant dense<0.000000e+00> : tensor<128x128xf32>
 // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
 // CHECK-SAME:  ins({{.*}} : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[CST_ACC_PACKED]] : tensor<4x8x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[RES_DST]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32>
 // CHECK: return %[[RES_UNPACKED]] : tensor<128x128xf32>
@@ -130,7 +130,7 @@ func.func @block_matmul_with_producer(
 // CHECK: %[[ACC_PACKED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[FILL_DST_PACKED]] : tensor<4x8x32x16xf32>) -> tensor<4x8x32x16xf32>
 // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
 // CHECK-SAME:  ins({{.*}} : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[ACC_PACKED]] : tensor<4x8x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[C]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32>
 // CHECK: return %[[RES_UNPACKED]] : tensor<128x128xf32>
@@ -152,7 +152,7 @@ func.func @block_matmul_with_consumer(
 // CHECK-DAG: %[[RES_DST:.+]] = tensor.empty() : tensor<128x128xf32>
 // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
 // CHECK-SAME:  outs({{.*}} : tensor<4x8x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[C]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32>
 // CHECK: %[[ADD_RES:.+]] = linalg.add
@@ -175,22 +175,22 @@ func.func @block_batch_matmul(
 // CHECK-LABEL: func @block_batch_matmul(
 // CHECK-SAME:   %[[A:.+]]: tensor<512x64x128xf32>, %[[B:.+]]: tensor<512x128x64xf32>, %[[C:.+]]: tensor<512x64x64xf32>
 // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<512x2x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
 // CHECK-SAME:  outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [32, 64]
 // CHECK-SAME:  into %[[PACK_DST_0]] : tensor<512x64x128xf32> -> tensor<512x2x2x32x64xf32>
 // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<512x4x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
 // CHECK-SAME:  outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 64]
 // CHECK-SAME:  into %[[PACK_DST_1]] : tensor<512x128x64xf32> -> tensor<512x4x2x16x64xf32>
 // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<512x2x4x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
 // CHECK-SAME:  inner_dims_pos = [1, 2] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[PACK_DST_2]] : tensor<512x64x64xf32> -> tensor<512x2x4x32x16xf32>
 // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:  iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
 // CHECK-SAME:  ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<512x2x2x32x64xf32>, tensor<512x4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<512x2x4x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // CHECK-SAME:  inner_dims_pos = [1, 2] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[C]] : tensor<512x2x4x32x16xf32> -> tensor<512x64x64xf32>
 // CHECK: return %[[RES_UNPACKED]] : tensor<512x64x64xf32>
@@ -211,22 +211,22 @@ func.func @block_matmul_transpose_a(
 // CHECK-LABEL: func @block_matmul_transpose_a(
 // CHECK-SAME:    %[[A:[0-9a-z]+]]: tensor<128x64xf32>, %[[B:[0-9a-z]+]]: tensor<128x64xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32>
 // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
 // CHECK-SAME:  outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [32, 64]
 // CHECK-SAME:  into %[[PACK_DST_0]] : tensor<128x64xf32> -> tensor<2x2x32x64xf32>
 // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
 // CHECK-SAME:  outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64]
 // CHECK-SAME:  into %[[PACK_DST_1]] : tensor<128x64xf32> -> tensor<4x2x16x64xf32>
 // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32>
 // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
 // CHECK-SAME:  ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32>
 // CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32>
@@ -247,22 +247,22 @@ func.func @block_batch_matmul_transpose_a(
 // CHECK-LABEL: func @block_batch_matmul_transpose_a(
 // CHECK-SAME:   %[[A:.+]]: tensor<512x128x64xf32>, %[[B:.+]]: tensor<512x128x64xf32>, %[[C:.+]]: tensor<512x64x64xf32>
 // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<512x2x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
 // CHECK-SAME:  outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [32, 64]
 // CHECK-SAME:  into %[[PACK_DST_0]] : tensor<512x128x64xf32> -> tensor<512x2x2x32x64xf32>
 // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<512x4x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
 // CHECK-SAME:  outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 64]
 // CHECK-SAME:  into %[[PACK_DST_1]] : tensor<512x128x64xf32> -> tensor<512x4x2x16x64xf32>
 // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<512x2x4x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
 // CHECK-SAME:  inner_dims_pos = [1, 2] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[PACK_DST_2]] : tensor<512x64x64xf32> -> tensor<512x2x4x32x16xf32>
 // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:  iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
 // CHECK-SAME:  ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<512x2x2x32x64xf32>, tensor<512x4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<512x2x4x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // CHECK-SAME:  inner_dims_pos = [1, 2] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[C]] : tensor<512x2x4x32x16xf32> -> tensor<512x64x64xf32>
 // CHECK: return %[[RES_UNPACKED]] : tensor<512x64x64xf32>
@@ -283,22 +283,22 @@ func.func @block_matmul_transpose_b(
 // CHECK-LABEL: func @block_matmul_transpose_b(
 // CHECK-SAME:    %[[A:[0-9a-z]+]]: tensor<64x128xf32>, %[[B:[0-9a-z]+]]: tensor<64x128xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32>
 // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
 // CHECK-SAME:  outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64]
 // CHECK-SAME:  into %[[PACK_DST_0]] : tensor<64x128xf32> -> tensor<2x2x32x64xf32>
 // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
 // CHECK-SAME:  outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 64]
 // CHECK-SAME:  into %[[PACK_DST_1]] : tensor<64x128xf32> -> tensor<4x2x16x64xf32>
 // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32>
 // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
 // CHECK-SAME:  ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32>
 // CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32>
@@ -319,22 +319,22 @@ func.func @block_batch_matmul_transpose_b(
 // CHECK-LABEL: func @block_batch_matmul_transpose_b(
 // CHECK-SAME:   %[[A:.+]]: tensor<512x64x128xf32>, %[[B:.+]]: tensor<512x64x128xf32>, %[[C:.+]]: tensor<512x64x64xf32>
 // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<512x2x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
 // CHECK-SAME:  outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [32, 64]
 // CHECK-SAME:  into %[[PACK_DST_0]] : tensor<512x64x128xf32> -> tensor<512x2x2x32x64xf32>
 // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<512x4x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
 // CHECK-SAME:  outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 64]
 // CHECK-SAME:  into %[[PACK_DST_1]] : tensor<512x64x128xf32> -> tensor<512x4x2x16x64xf32>
 // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<512x2x4x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
 // CHECK-SAME:  inner_dims_pos = [1, 2] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[PACK_DST_2]] : tensor<512x64x64xf32> -> tensor<512x2x4x32x16xf32>
 // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:  iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
 // CHECK-SAME:  ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<512x2x2x32x64xf32>, tensor<512x4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<512x2x4x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // CHECK-SAME:  inner_dims_pos = [1, 2] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[C]] : tensor<512x2x4x32x16xf32> -> tensor<512x64x64xf32>
 // CHECK: return %[[RES_UNPACKED]] : tensor<512x64x64xf32>
@@ -365,22 +365,22 @@ func.func @block_generic_matmul(
 // CHECK-LABEL: func @block_generic_matmul(
 // CHECK-SAME:    %[[A:[0-9a-z]+]]: tensor<128x128xf32>, %[[B:[0-9a-z]+]]: tensor<128x128xf32>, %[[C:[0-9a-z]+]]: tensor<128x128xf32>
 // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<4x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
 // CHECK-SAME:  outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64]
 // CHECK-SAME:  into %[[PACK_DST_0]] : tensor<128x128xf32> -> tensor<4x2x32x64xf32>
 // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<8x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
 // CHECK-SAME:  outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64]
 // CHECK-SAME:  into %[[PACK_DST_1]] : tensor<128x128xf32> -> tensor<8x2x16x64xf32>
 // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<4x8x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[PACK_DST_2]] : tensor<128x128xf32> -> tensor<4x8x32x16xf32>
 // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
 // CHECK-SAME:  ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<4x2x32x64xf32>, tensor<8x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<4x8x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[C]] : tensor<4x8x32x16xf32> -> tensor<128x128xf32>
 // CHECK: return %[[RES_UNPACKED]] : tensor<128x128xf32>
@@ -411,22 +411,22 @@ func.func @block_generic_matmul_transpose_a(
 // CHECK-LABEL: func @block_generic_matmul_transpose_a(
 // CHECK-SAME:    %[[A:[0-9a-z]+]]: tensor<128x64xf32>, %[[B:[0-9a-z]+]]: tensor<128x64xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32>
 // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
 // CHECK-SAME:  outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [32, 64]
 // CHECK-SAME:  into %[[PACK_DST_0]] : tensor<128x64xf32> -> tensor<2x2x32x64xf32>
 // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
 // CHECK-SAME:  outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64]
 // CHECK-SAME:  into %[[PACK_DST_1]] : tensor<128x64xf32> -> tensor<4x2x16x64xf32>
 // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32>
 // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
 // CHECK-SAME:  ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32>
 // CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32>
@@ -457,22 +457,22 @@ func.func @block_generic_matmul_transpose_b(
 // CHECK-LABEL: func @block_generic_matmul_transpose_b(
 // CHECK-SAME:    %[[A:[0-9a-z]+]]: tensor<64x128xf32>, %[[B:[0-9a-z]+]]: tensor<64x128xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32>
 // CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32>
-// CHECK: %[[A_PACKED:.+]] = tensor.pack %[[A]]
+// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
 // CHECK-SAME:  outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64]
 // CHECK-SAME:  into %[[PACK_DST_0]] : tensor<64x128xf32> -> tensor<2x2x32x64xf32>
 // CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32>
-// CHECK: %[[B_PACKED:.+]] = tensor.pack %[[B]]
+// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
 // CHECK-SAME:  outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 64]
 // CHECK-SAME:  into %[[PACK_DST_1]] : tensor<64x128xf32> -> tensor<4x2x16x64xf32>
 // CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32>
-// CHECK: %[[C_PACKED:.+]] = tensor.pack %[[C]]
+// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32>
 // CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:  iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
 // CHECK-SAME:  ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>)
-// CHECK: %[[RES_UNPACKED:.+]] = tensor.unpack %[[GEMM_RES_PACKED]]
+// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:  into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32>
 // CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32>
@@ -498,10 +498,10 @@ func.func @non_contraction_generic(
 // CHECK-LABEL: func @non_contraction_generic(
 // CHECK-SAME:    %[[A:[0-9a-z]+]]: tensor<64x128xf32>
 // CHECK-DAG: %[[C0:.+]] = arith.constant 0.000000e+00 : f32
-// CHECK-NOT: tensor.pack
+// CHECK-NOT: linalg.pack
 // CHECK: %[[GENERIC:.+]] = linalg.generic
 // CHECK-SAME:  indexing_maps = [#[[$MAP]]]
 // CHECK-SAME:  iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:  outs(%[[A]] : tensor<64x128xf32>)
-// CHECK-NOT: tensor.unpack
+// CHECK-NOT: linalg.unpack
 // CHECK: return %[[GENERIC]] : tensor<64x128xf32>
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
index cd439cd23ecd0..db4f6181f517c 100644
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -357,7 +357,7 @@ func.func @fill_pack() -> tensor<24x32x16x16xf32> {
   %cst = arith.constant 0.000000e+00 : f32
   %0 = tensor.empty() : tensor<24x32x16x16xf32>
   %1 = linalg.fill ins(%cst : f32) outs(%dest : tensor<384x512xf32>) -> tensor<384x512xf32>
-  %pack = tensor.pack %1 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %0 : tensor<384x512xf32> -> tensor<24x32x16x16xf32>
+  %pack = linalg.pack %1 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %0 : tensor<384x512xf32> -> tensor<24x32x16x16xf32>
   return %pack : tensor<24x32x16x16xf32>
 }
 // CHECK-LABEL: func.func @fill_pack
@@ -374,7 +374,7 @@ func.func @fill_pack_general() -> tensor<1x1x8x4x4x8xi32>{
   %extracted_slice_15 = tensor.extract_slice %9[0, 0, 0, 0] [1, 1, 16, 64] [1, 1, 1, 1] : tensor<1x1x16x64xi32> to tensor<1x1x16x64xi32>
   %16 = linalg.fill ins(%c0_i32 : i32) outs(%extracted_slice_15 : tensor<1x1x16x64xi32>) -> tensor<1x1x16x64xi32>
   %0 = bufferization.to_tensor %alloc restrict writable : memref<1x1x8x4x4x8xi32> to tensor<1x1x8x4x4x8xi32>
-  %pack_18 = tensor.pack %16 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %0 : tensor<1x1x16x64xi32> -> tensor<1x1x8x4x4x8xi32>
+  %pack_18 = linalg.pack %16 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %0 : tensor<1x1x16x64xi32> -> tensor<1x1x8x4x4x8xi32>
   return %pack_18 : tensor<1x1x8x4x4x8xi32>
 }
 
@@ -397,7 +397,7 @@ func.func @dynamic_fill_pack(%arg0: tensor<?x?xf32>) -> tensor<?x?x16x16xf32> {
   %1 = affine.apply #map()[%dim]
   %2 = affine.apply #map()[%dim_0]
   %3 = tensor.empty(%1, %2) : tensor<?x?x16x16xf32>
-  %pack = tensor.pack %0 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<?x?xf32> -> tensor<?x?x16x16xf32>
+  %pack = linalg.pack %0 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<?x?xf32> -> tensor<?x?x16x16xf32>
   return %pack : tensor<?x?x16x16xf32>
 }
 // CHECK-DAG:   #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)>
@@ -1249,3 +1249,499 @@ func.func @recursive_effect(%arg : tensor<1xf32>) {
 
 // CHECK-LABEL: @recursive_effect
 //       CHECK: linalg.map
+
+//===----------------------------------------------------------------------===//
+// linalg.pack
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: func @fold_pack_constant_splat
+//   CHECK-NOT: linalg.pack
+//       CHECK: arith.constant dense<1.000000e-01> : tensor<8x16x8x32xf32>
+func.func @fold_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> {
+  %cst = arith.constant dense<1.000000e-01> : tensor<64x128xf32>
+  %0 = linalg.pack %cst outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
+    inner_tiles = [8, 32] into %dest : tensor<64x128xf32> -> tensor<8x16x8x32xf32>
+  return %0 : tensor<8x16x8x32xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @fold_padding_value_pack_constant_splat
+//   CHECK-NOT: linalg.pack
+//       CHECK: arith.constant dense<1.000000e-01> : tensor<8x16x8x32xf32>
+func.func @fold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> {
+  %pad = arith.constant 1.000000e-01 : f32
+  %cst = arith.constant dense<1.000000e-01> : tensor<63x127xf32>
+  %0 = linalg.pack %cst
+    padding_value(%pad : f32)
+    outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
+    inner_tiles = [8, 32] into %dest : tensor<63x127xf32> -> tensor<8x16x8x32xf32>
+  return %0 : tensor<8x16x8x32xf32>
+}
+
+
+// -----
+
+// CHECK-LABEL: func @nofold_padding_value_pack_constant_splat
+//       CHECK: arith.constant dense<1.000000e-01> : tensor<63x127xf32>
+//       CHECK: linalg.pack
+func.func @nofold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> {
+  %pad = arith.constant 0.0 : f32
+  %cst = arith.constant dense<1.000000e-01> : tensor<63x127xf32>
+  %0 = linalg.pack %cst
+    padding_value(%pad : f32)
+    outer_dims_perm = [1, 0]
+    inner_dims_pos = [0, 1]
+    inner_tiles = [8, 32]
+    into %dest : tensor<63x127xf32> -> tensor<8x16x8x32xf32>
+  return %0 : tensor<8x16x8x32xf32>
+}
+
+// -----
+
+func.func @fold_padding_value_pack(%arg0: tensor<1200x500000xf32>) -> tensor<31250x1200x16x1xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %0 = tensor.empty() : tensor<31250x1200x16x1xf32>
+  %pack = linalg.pack %arg0
+    padding_value(%cst : f32)
+    outer_dims_perm = [1, 0]
+    inner_dims_pos = [1, 0]
+    inner_tiles = [16, 1]
+    into %0 : tensor<1200x500000xf32> -> tensor<31250x1200x16x1xf32>
+  return %pack : tensor<31250x1200x16x1xf32>
+}
+// CHECK-LABEL: func @fold_padding_value_pack
+// CHECK-NOT:     padding_value
+
+// -----
+
+func.func @infer_src_shape_pack(%src: tensor<?x?x?x?xf32>, %dest: tensor<10x20x30x40x16xf32>) -> tensor<10x20x30x40x16xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+   %pack = linalg.pack %src
+    padding_value(%cst : f32)
+    outer_dims_perm = [2, 1, 3, 0]
+    inner_dims_pos = [2]
+    inner_tiles = [16]
+    into %dest : tensor<?x?x?x?xf32> -> tensor<10x20x30x40x16xf32>
+  return %pack : tensor<10x20x30x40x16xf32>
+}
+// CHECK-LABEL: func.func @infer_src_shape_pack
+// CHECK-SAME:    %[[SRC:[0-9a-zA-Z]+]]
+// CHECK-SAME:    %[[DEST:[0-9a-zA-Z]+]]
+// CHECK:         %[[CAST_SRC:.+]] = tensor.cast %[[SRC]] : tensor<?x?x?x?xf32> to tensor<40x20x?x30xf32>
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[CAST_SRC]] {{.+}} into %[[DEST]]
+// CHECK:         return %[[PACK]]
+
+// -----
+
+func.func @infer_dest_shape_pack(%src: tensor<30x20x?x10xf32>, %dest: tensor<?x?x?x?x16xf32>) -> tensor<?x?x?x?x16xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+   %pack = linalg.pack %src
+    padding_value(%cst : f32)
+    outer_dims_perm = [2, 1, 3, 0]
+    inner_dims_pos = [2]
+    inner_tiles = [16]
+    into %dest : tensor<30x20x?x10xf32> -> tensor<?x?x?x?x16xf32>
+  return %pack : tensor<?x?x?x?x16xf32>
+}
+// CHECK-LABEL: func.func @infer_dest_shape_pack
+// CHECK-SAME:    %[[SRC:[0-9a-zA-Z]+]]
+// CHECK-SAME:    %[[DEST:[0-9a-zA-Z]+]]
+// CHECK:         %[[CAST_DEST:.+]] = tensor.cast %[[DEST]] : tensor<?x?x?x?x16xf32> to tensor<?x20x10x30x16xf32>
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[SRC]] {{.+}} into %[[CAST_DEST]]
+// CHECK:         %[[CAST_PACK:.+]] = tensor.cast %[[PACK]] : tensor<?x20x10x30x16xf32> to tensor<?x?x?x?x16xf32>
+// CHECK:         return %[[CAST_PACK]]
+
+// -----
+
+func.func @no_infer_pack_shape(%arg0: tensor<?x32x100xf32>, %arg1: index) -> tensor<32x7x?x16x1xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %0 = tensor.empty(%arg1) : tensor<32x7x?x16x1xf32>
+  %pack = linalg.pack %arg0 padding_value(%cst : f32) outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 0] inner_tiles = [16, 1] into %0 : tensor<?x32x100xf32> -> tensor<32x7x?x16x1xf32>
+  return %pack : tensor<32x7x?x16x1xf32>
+}
+// CHECK-LABEL: func.func @no_infer_pack_shape
+// CHECK-NOT:     tensor.cast
+
+// -----
+
+func.func @fold_padding_value_pack_negative1(%arg0: tensor<1200x499999xf32>) -> tensor<31250x1200x16x1xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %0 = tensor.empty() : tensor<31250x1200x16x1xf32>
+  %pack = linalg.pack %arg0
+    padding_value(%cst : f32)
+    outer_dims_perm = [1, 0]
+    inner_dims_pos = [1, 0]
+    inner_tiles = [16, 1]
+    into %0 : tensor<1200x499999xf32> -> tensor<31250x1200x16x1xf32>
+  return %pack : tensor<31250x1200x16x1xf32>
+}
+// CHECK-LABEL: func @fold_padding_value_pack_negative1
+// CHECK:         linalg.pack
+// CHECK-SAME:      padding_value
+
+// -----
+
+func.func @fold_padding_value_pack_negative2(%arg0: tensor<1200x?xf32>, %arg1: tensor<?x1200x16x1xf32>) -> tensor<?x1200x16x1xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %pack = linalg.pack %arg0
+    padding_value(%cst : f32)
+    outer_dims_perm = [1, 0]
+    inner_dims_pos = [1, 0]
+    inner_tiles = [16, 1]
+    into %arg1 : tensor<1200x?xf32> -> tensor<?x1200x16x1xf32>
+  return %pack : tensor<?x1200x16x1xf32>
+}
+// CHECK-LABEL: func @fold_padding_value_pack_negative2
+// CHECK:         linalg.pack
+// CHECK-SAME:      padding_value
+
+// -----
+
+func.func @fold_padding_value_pack_negative3(%arg0: tensor<1200x500000xf32>, %arg1: tensor<?x1200x?x1xf32>, %tile : index) -> tensor<?x1200x?x1xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %pack = linalg.pack %arg0
+    padding_value(%cst : f32)
+    outer_dims_perm = [1, 0]
+    inner_dims_pos = [1, 0]
+    inner_tiles = [%tile, 1]
+    into %arg1 : tensor<1200x500000xf32> -> tensor<?x1200x?x1xf32>
+  return %pack : tensor<?x1200x?x1xf32>
+}
+// CHECK-LABEL: func @fold_padding_value_pack_negative3
+// CHECK:         linalg.pack
+// CHECK-SAME:      padding_value
+
+// -----
+
+//===----------------------------------------------------------------------===//
+// linalg.unpack
+//===----------------------------------------------------------------------===//
+
+
+// CHECK-LABEL: func @fold_unpack_constant_splat
+//   CHECK-NOT: linalg.unpack
+//       CHECK: arith.constant dense<1.000000e-01> : tensor<128x256xf32>
+func.func @fold_unpack_constant_splat(%dest : tensor<128x256xf32>) -> tensor<128x256xf32> {
+  %cst = arith.constant dense<1.000000e-01> : tensor<16x8x8x32xf32>
+  %0 = linalg.unpack %cst inner_dims_pos = [0, 1]
+    inner_tiles = [8, 32] into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32>
+  return %0 : tensor<128x256xf32>
+}
+
+// -----
+
+func.func @infer_dest_shape_unpack(%src: tensor<10x20x30x40x16xf32>, %dest: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
+  %unpack = linalg.unpack %src
+    outer_dims_perm = [2, 1, 3, 0]
+    inner_dims_pos = [2]
+    inner_tiles = [16]
+    into %dest : tensor<10x20x30x40x16xf32> -> tensor<?x?x?x?xf32>
+  return %unpack : tensor<?x?x?x?xf32>
+}
+// CHECK-LABEL: func.func @infer_dest_shape_unpack
+// CHECK-SAME:    %[[SRC:[0-9a-zA-Z]+]]
+// CHECK-SAME:    %[[DEST:[0-9a-zA-Z]+]]
+// CHECK:         %[[CAST_DEST:.+]] = tensor.cast %[[DEST]] : tensor<?x?x?x?xf32> to tensor<40x20x?x30xf32>
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[SRC]] {{.+}} into %[[CAST_DEST]]
+// CHECK:         %[[CAST_UNPACK:.+]] = tensor.cast %[[UNPACK]] : tensor<40x20x?x30xf32> to tensor<?x?x?x?xf32>
+// CHECK:         return %[[CAST_UNPACK]]
+
+// -----
+
+func.func @infer_src_shape_unpack(%src: tensor<?x?x?x?x16xf32>, %dest: tensor<30x20x?x10xf32>) -> tensor<30x20x?x10xf32> {
+  %unpack = linalg.unpack %src
+    outer_dims_perm = [2, 1, 3, 0]
+    inner_dims_pos = [2]
+    inner_tiles = [16]
+    into %dest : tensor<?x?x?x?x16xf32> -> tensor<30x20x?x10xf32>
+  return %unpack : tensor<30x20x?x10xf32>
+}
+// CHECK-LABEL: func.func @infer_src_shape_unpack
+// CHECK-SAME:    %[[SRC:[0-9a-zA-Z]+]]
+// CHECK-SAME:    %[[DEST:[0-9a-zA-Z]+]]
+// CHECK:         %[[CAST_SRC:.+]] = tensor.cast %[[SRC]] : tensor<?x?x?x?x16xf32> to tensor<?x20x10x30x16xf32>
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[CAST_SRC]]
+// CHECK:         return %[[UNPACK]]
+
+// -----
+
+func.func @no_infer_unpack_shape(%arg1: tensor<32x7x?x16x1xf32>, %arg2: index) -> tensor<?x32x100xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %0 = tensor.empty(%arg2) : tensor<?x32x100xf32>
+  %unpack = linalg.unpack %arg1 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 0] inner_tiles = [16, 1] into %0 : tensor<32x7x?x16x1xf32> -> tensor<?x32x100xf32>
+  return %unpack : tensor<?x32x100xf32>
+}
+// CHECK-LABEL: func.func @no_infer_unpack_shape
+// CHECK-NOT:     tensor.cast
+
+// -----
+
+//===----------------------------------------------------------------------===//
+// linalg.pack + linalg.unpack
+//===----------------------------------------------------------------------===//
+
+// Chain: NC -> NCnc -> NCnc -> NC
+// CHECK: func.func @unpack_pack(
+// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>)
+// CHECK: return %[[T]] : tensor<128x128xf32>
+func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> {
+  %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32>
+  %packed = linalg.pack %t inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
+  %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
+  %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor<128x128xf32>
+  return %unpacked : tensor<128x128xf32>
+}
+
+// -----
+
+// Chain: NC -> NCcn -> NCnc -> NC
+// CHECK: func.func @unpack_pack(
+// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>)
+// CHECK-NOT: return %[[T]] : tensor<128x128xf32>
+func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> {
+  %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32>
+  %packed = linalg.pack %t inner_dims_pos = [1, 0] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
+  %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
+  %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor
+<128x128xf32>
+  return %unpacked : tensor<128x128xf32>
+}
+
+// -----
+
+// Chain: NC -> CNcn -> NCnc -> NC
+// CHECK: func.func @unpack_pack(
+// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>)
+// CHECK-NOT: return %[[T]] : tensor<128x128xf32>
+func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> {
+  %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32>
+  %packed = linalg.pack %t outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
+  %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
+  %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor
+<128x128xf32>
+  return %unpacked : tensor<128x128xf32>
+}
+
+// -----
+
+// Chain: NC -> NCnc -> NCnc -> NC
+// CHECK: func.func @unpack_pack(
+// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>,
+// CHECK: return %[[T]] : tensor<128x128xf32>
+func.func @unpack_pack(%t: tensor<128x128xf32>, %tile1: index, %tile2: index) -> tensor<128x128xf32> {
+  %tensor_empty = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
+  %packed = linalg.pack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
+  %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
+  %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<16x16x?x?xf32> -> tensor
+<128x128xf32>
+  return %unpacked : tensor<128x128xf32>
+}
+
+// -----
+
+// CHECK: func.func @unpack_pack_with_padding_no_canonicalization(
+// CHECK:         linalg.pack
+// CHECK:         linalg.unpack
+func.func @unpack_pack_with_padding_no_canonicalization(%t: tensor<256x512xbf16>) -> tensor<224x512xbf16> {
+  %tensor_empty = tensor.empty() : tensor<4x16x64x32xbf16>
+  %tensor_empty1 = tensor.empty() : tensor<224x512xbf16>
+  %packed = linalg.pack %t outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty : tensor<256x512xbf16> -> tensor<4x16x64x32xbf16>
+  %unpacked = linalg.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty1 : tensor<4x16x64x32xbf16> -> tensor<224x512xbf16>
+  return %unpacked : tensor<224x512xbf16>
+}
+
+// -----
+
+// Chain NCnc -> NC -> NC -> NCnc
+// CHECK: func.func @pack_unpack(
+// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>,
+// CHECK: return %[[T]] : tensor<16x16x?x?xf32>
+func.func @pack_unpack(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> {
+  %tensor_empty = tensor.empty() : tensor<128x128xf32>
+  %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32>
+  %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
+  %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
+  return %packed : tensor<16x16x?x?xf32>
+}
+
+// -----
+
+// Chain NCnc -> NC -> NC -> NCnc
+// CHECK: func.func @pack_unpack(
+// CHECK-SAME: %[[T:.+]]: tensor<16x16x8x8xf32>
+// CHECK: return %[[T]] : tensor<16x16x8x8xf32>
+func.func @pack_unpack(%t: tensor<16x16x8x8xf32>) -> tensor<16x16x8x8xf32> {
+  %tensor_empty = tensor.empty() : tensor<128x128xf32>
+  %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty : tensor<16x16x8x8xf32> -> tensor<128x128xf32>
+  %tensor_empty1 = tensor.empty() : tensor<16x16x8x8xf32>
+  %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
+  return %packed : tensor<16x16x8x8xf32>
+}
+
+// -----
+
+// CHECK: func.func @pack_unpack_same_tiles(
+// CHECK-SAME:  %[[T:.+]]: tensor<?x?x?x?xf32>,
+// CHECK: return %[[T]] : tensor<?x?x?x?xf32>
+func.func @pack_unpack_same_tiles(%t: tensor<?x?x?x?xf32>, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index,
+                       %tile1: index, %tile2: index) -> tensor<?x?x?x?xf32> {
+  %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
+  %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
+  %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor<?x?x?x?xf32>
+  %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
+  return %packed : tensor<?x?x?x?xf32>
+}
+
+// -----
+
+// CHECK: func.func @pack_unpack_different_tiles(
+// CHECK-SAME:  %[[T:.+]]: tensor<?x?x?x?xf32>,
+// CHECK-NOT: return %[[T]] : tensor<?x?x?x?xf32>
+func.func @pack_unpack_different_tiles(%t: tensor<?x?x?x?xf32>, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index,
+                       %tile1: index, %tile2: index) -> tensor<?x?x?x?xf32> {
+  %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
+  %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
+  %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor<?x?x?x?xf32>
+  %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile2, %tile1] into %tensor_empty1 : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
+  return %packed : tensor<?x?x?x?xf32>
+}
+
+// -----
+
+// CHECK: func.func @pack_unpack_dynamic_with_padding(
+// CHECK-SAME:  %[[T:.+]]: tensor<?x?x?x?xf32>,
+// CHECK-NOT: return %[[T]] : tensor<?x?x?x?xf32>
+func.func @pack_unpack_dynamic_with_padding(%t: tensor<?x?x?x?xf32>, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index,
+                       %tile1: index, %tile2: index, %pad: f32) -> tensor<?x?x?x?xf32> {
+  %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
+  %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
+  %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor<?x?x?x?xf32>
+  %packed = linalg.pack %unpacked padding_value(%pad: f32) inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
+  return %packed : tensor<?x?x?x?xf32>
+}
+
+// -----
+
+// CHECK: func.func @pack_outer_dims_unpack_no_outer_dims(
+// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>,
+// CHECK: return %[[T]] : tensor<16x16x?x?xf32>
+func.func @pack_outer_dims_unpack_no_outer_dims(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> {
+  %tensor_empty = tensor.empty() : tensor<128x128xf32>
+  %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32>
+  %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
+  %packed = linalg.pack %unpacked outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
+  return %packed : tensor<16x16x?x?xf32>
+}
+
+// -----
+
+// CHECK: func.func @pack_no_outer_dims_unpack_outer_dims(
+// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>,
+// CHECK: return %[[T]] : tensor<16x16x?x?xf32>
+func.func @pack_no_outer_dims_unpack_outer_dims(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> {
+  %tensor_empty = tensor.empty() : tensor<128x128xf32>
+  %unpacked = linalg.unpack %t outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32>
+  %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
+  %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
+  return %packed : tensor<16x16x?x?xf32>
+}
+
+// -----
+
+//===----------------------------------------------------------------------===//
+// tensor.cast + linalg.pack
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL:   func.func @fold_cast_pack_dynamic_tile_size
+// CHECK-SAME:      %[[DEST:.*]]: tensor<1x1x8x1xi32>,
+// CHECK-SAME:      %[[SRC:.*]]: tensor<7x?xi32>,
+// CHECK-SAME:      %[[PAD:.*]]: i32) -> tensor<1x1x8x1xi32> {
+// CHECK:           %[[PACK:.*]] = linalg.pack %[[SRC]] padding_value(%[[PAD]] : i32)
+// CHECK-SAME:        inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[DEST]]
+// CHECK-SAME:        test_attr
+// CHECK-SAME:        : tensor<7x?xi32> -> tensor<1x1x8x1xi32>
+// CHECK:           return %[[PACK]] : tensor<1x1x8x1xi32>
+func.func @fold_cast_pack_dynamic_tile_size(
+  %dest: tensor<1x1x8x1xi32>,
+  %src: tensor<7x?xi32>,
+  %pad: i32) -> tensor<1x1x8x1xi32> {
+
+    %cast = tensor.cast %dest : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32>
+    %c8 = arith.constant 8 : index
+    %pack = linalg.pack %src padding_value(%pad : i32)
+      inner_dims_pos = [0, 1]
+      inner_tiles = [%c8, 1]
+      into %cast {test_attr} : tensor<7x?xi32> -> tensor<1x1x?x1xi32>
+    %res = tensor.cast %pack : tensor<1x1x?x1xi32> to tensor<1x1x8x1xi32>
+    return %res : tensor<1x1x8x1xi32>
+}
+
+// -----
+
+func.func @infer_and_fold_pack_unpack_same_tiles(%t: tensor<10x20x4x4xf32>) -> tensor<10x20x4x4xf32> {
+  %dim1 = arith.constant 40 : index
+  %dim2 = arith.constant 80 : index
+  %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
+  %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %tensor_empty : tensor<10x20x4x4xf32> -> tensor<?x?xf32>
+  %cast = tensor.cast %unpacked : tensor<?x?xf32> to tensor<40x80xf32>
+  %tensor_empty1 = tensor.empty() : tensor<10x20x4x4xf32>
+  %packed = linalg.pack %cast inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %tensor_empty1 : tensor<40x80xf32> -> tensor<10x20x4x4xf32>
+  return %packed : tensor<10x20x4x4xf32>
+}
+// CHECK-LABEL: func.func @infer_and_fold_pack_unpack_same_tiles
+// CHECK-SAME:    %[[SRC:[0-9a-zA-Z]+]]
+// CHECK:         return %[[SRC]]
+
+// -----
+
+// CHECK-LABEL:   func.func @pack_dont_drop_attributes(
+// CHECK: linalg.pack {{.*}}  {test_attr}
+func.func @pack_dont_drop_attributes(%arg0: tensor<?x?x?xf16>, %arg1: tensor<128x?x100x16x1xf16>) -> tensor<128x?x100x16x1xf16> {
+  %c32_i64 = arith.constant 32 : i64
+  %cst = arith.constant 0.000000e+00 : f16
+  %pack = linalg.pack %arg0 padding_value(%cst : f16) outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %arg1 {test_attr} : tensor<?x?x?xf16> -> tensor<128x?x100x16x1xf16>
+  return %pack : tensor<128x?x100x16x1xf16>
+}
+// -----
+
+//===----------------------------------------------------------------------===//
+// linalg.fill + linalg.unpack
+//===----------------------------------------------------------------------===//
+// Fold DstStyleOp -> tensor.unpack operations.
+func.func @fold_dst_style_ops_into_unpack(%arg0 : tensor<?x?x16x64xf32>, %init : tensor<?x?xf32>) -> tensor<?x?xf32> {
+  %cst = arith.constant 0.0 : f32
+  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %unpack = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [16, 64] into %fill : tensor<?x?x16x64xf32> -> tensor<?x?xf32>
+  return %unpack : tensor<?x?xf32>
+}
+// CHECK-LABEL: func @fold_dst_style_ops_into_unpack
+//  CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x16x64xf32>
+//  CHECK-SAME:     %[[INIT:.+]]: tensor<?x?xf32>
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
+//  CHECK-SAME:       into %[[INIT]]
+//       CHECK:   return %[[UNPACK]]
+
+// -----
+
+//===----------------------------------------------------------------------===//
+// tensor.cast + linalg.unpack
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL:   func.func @fold_cast_unpack_dynamic_tile_size(
+// CHECK-SAME:      %[[SRC:.*]]: tensor<1x1x8x1xi32>,
+// CHECK-SAME:      %[[DEST:.*]]: tensor<7x?xi32>) -> tensor<7x?xi32> {
+// CHECK:           %[[RES:.*]] = linalg.unpack %[[SRC]] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[DEST]] {test_attr} : tensor<1x1x8x1xi32> -> tensor<7x?xi32>
+// CHECK:           return %[[RES]] : tensor<7x?xi32>
+func.func @fold_cast_unpack_dynamic_tile_size(
+  %src: tensor<1x1x8x1xi32>,
+  %res: tensor<7x?xi32>) -> tensor<7x?xi32> {
+
+    %cast = tensor.cast %src : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32>
+    %c8 = arith.constant 8 : index
+    %unpack = linalg.unpack %cast
+      inner_dims_pos = [0, 1]
+      inner_tiles = [%c8, 1]
+      into %res {test_attr} : tensor<1x1x?x1xi32> -> tensor<7x?xi32>
+    return %unpack : tensor<7x?xi32>
+}
diff --git a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
index cb8064411bbae..a48807b46b766 100644
--- a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
+++ b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir
@@ -15,7 +15,7 @@ func.func @dynamic_elem_pack(%arg0: tensor<?x?xf32>, %dest: tensor<?x?x8x2xf32>)
       %4 = arith.addf %arg3, %arg3 : f32
       linalg.yield %4 : f32
   } -> tensor<?x?xf32>
-  %4 = tensor.pack %3
+  %4 = linalg.pack %3
     inner_dims_pos = [0, 1]
     inner_tiles = [8, 2]
     into %dest : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
@@ -34,7 +34,7 @@ func.func @dynamic_elem_pack(%arg0: tensor<?x?xf32>, %dest: tensor<?x?x8x2xf32>)
 // CHECK-DAG:      %[[OUTER_D0:.+]] = affine.apply #[[$MAP0]]()[%[[D0]]]
 // CHECK-DAG:      %[[OUTER_D1:.+]] = affine.apply #[[$MAP1]]()[%[[D1]]]
 // CHECK:          %[[ARG0_EMPTY:.+]] = tensor.empty(%[[OUTER_D0]], %[[OUTER_D1]]) : tensor<?x?x8x2xf32>
-// CHECK:          %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK:          %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:       inner_dims_pos = [0, 1] inner_tiles = [8, 2]
 // CHECK-SAME:       into %[[ARG0_EMPTY]]
 // CHECK:          %[[ELEM:.+]] = linalg.generic
@@ -56,7 +56,7 @@ func.func @elem_pack_transpose_inner_dims(%arg0: tensor<128x256xi32>, %dest: ten
       %4 = arith.addi %arg3, %arg3 : i32
       linalg.yield %4 : i32
   } -> tensor<128x256xi32>
-  %pack = tensor.pack %elem
+  %pack = linalg.pack %elem
     inner_dims_pos = [1, 0]
     inner_tiles = [16, 32]
     into %dest : tensor<128x256xi32> -> tensor<4x16x16x32xi32>
@@ -67,7 +67,7 @@ func.func @elem_pack_transpose_inner_dims(%arg0: tensor<128x256xi32>, %dest: ten
 // CHECK-SAME:    %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK-SAME:    %[[DEST:[a-zA-Z0-9]+]]
 // CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x16x32xi32>
-// CHECK:         %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      inner_dims_pos = [1, 0] inner_tiles = [16, 32]
 // CHECK-SAME:      into %[[ARG0_EMPTY]]
 // CHECK:         %[[ELEM:.+]] = linalg.generic
@@ -89,7 +89,7 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %dest: ten
       %4 = arith.addi %arg3, %arg3 : i32
       linalg.yield %4 : i32
   } -> tensor<128x256xi32>
-  %pack = tensor.pack %elem
+  %pack = linalg.pack %elem
     outer_dims_perm = [1, 0]
     inner_dims_pos = [0, 1]
     inner_tiles = [32, 16]
@@ -101,7 +101,7 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %dest: ten
 // CHECK-SAME:    %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK-SAME:    %[[DEST:[a-zA-Z0-9]+]]
 // CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
-// CHECK:         %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:      into %[[ARG0_EMPTY]] : tensor<128x256xi32> -> tensor<16x4x32x16xi32>
 // CHECK:         %[[ELEM:.+]] = linalg.generic
@@ -123,7 +123,7 @@ func.func @elem_pack_transpose_inner_and_outer_dims(%arg0: tensor<128x256xi32>,
       %4 = arith.addi %arg3, %arg3 : i32
       linalg.yield %4 : i32
   } -> tensor<128x256xi32>
-  %pack = tensor.pack %elem
+  %pack = linalg.pack %elem
     outer_dims_perm = [1, 0]
     inner_dims_pos = [1, 0]
     inner_tiles = [16, 32]
@@ -135,7 +135,7 @@ func.func @elem_pack_transpose_inner_and_outer_dims(%arg0: tensor<128x256xi32>,
 // CHECK-SAME:    %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK-SAME:    %[[DEST:[a-zA-Z0-9]+]]
 // CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x16x32xi32>
-// CHECK:         %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32]
 // CHECK-SAME:      into %[[ARG0_EMPTY]]
 // CHECK:         %[[ELEM:.+]] = linalg.generic
@@ -163,7 +163,7 @@ func.func @dynamic_broadcast_pack(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %d
       %4 = arith.addf %arg3, %arg4 : f32
       linalg.yield %4 : f32
   } -> tensor<?x?xf32>
-  %4 = tensor.pack %3
+  %4 = linalg.pack %3
     inner_dims_pos = [0, 1]
     inner_tiles = [8, 2]
     into %dest : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
@@ -182,13 +182,13 @@ func.func @dynamic_broadcast_pack(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %d
 // CHECK-DAG:     %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
 // CHECK-DAG:     %[[OUTER_D0:.+]] = affine.apply #[[$MAP0]]()[%[[D0]]]
 // CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty(%[[OUTER_D0]]) : tensor<?x8xf32>
-// CHECK:         %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [8]
 // CHECK-SAME:      into %[[ARG0_EMPTY]]
 // CHECK-DAG:     %[[D1:.+]] = tensor.dim %[[ARG1]], %[[C0]]
 // CHECK-DAG:     %[[OUTER_D1:.+]] = affine.apply #[[$MAP1]]()[%[[D1]]]
 // CHECK:         %[[ARG1_EMPTY:.+]] = tensor.empty(%[[OUTER_D1]]) : tensor<?x2xf32>
-// CHECK:         %[[PACK_ARG1:.+]] = tensor.pack %[[ARG1]]
+// CHECK:         %[[PACK_ARG1:.+]] = linalg.pack %[[ARG1]]
 // CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [2]
 // CHECK-SAME:      into %[[ARG1_EMPTY]]
 // CHECK:         %[[ELEM:.+]] = linalg.generic
@@ -212,7 +212,7 @@ func.func @elem_pack_transpose_inner_and_outer_dims2(%arg0: tensor<64xf32>, %des
     ^bb0(%in: f32, %out: f32):
       linalg.yield %in : f32
   } -> tensor<1x56x57x64xf32>
-  %2 = tensor.pack %1 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %dest : tensor<1x56x57x64xf32> -> tensor<1x2x56x57x32xf32>
+  %2 = linalg.pack %1 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %dest : tensor<1x56x57x64xf32> -> tensor<1x2x56x57x32xf32>
   return %2 : tensor<1x2x56x57x32xf32>
 }
 // CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d1, d4)>
@@ -221,7 +221,7 @@ func.func @elem_pack_transpose_inner_and_outer_dims2(%arg0: tensor<64xf32>, %des
 // CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK-SAME:     %[[DEST:[a-zA-Z0-9]+]]
 // CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<2x32xf32>
-// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [32]
 // CHECK-SAME:    into %[[ARG0_EMPTY]]
 // CHECK:         %[[RES:.+]] = linalg.generic
@@ -247,7 +247,7 @@ func.func @transpose_pack(%arg0: tensor<100x128x200x256xi32>, %arg1: tensor<100x
       %1 = arith.addi %0, %b2 : i32
       linalg.yield %1 : i32
     } -> tensor<100x200x128x256xi32>
-  %4 = tensor.pack %transpose
+  %4 = linalg.pack %transpose
     inner_dims_pos = [3, 2]
     inner_tiles = [16, 32]
     into %dest : tensor<100x200x128x256xi32> -> tensor<100x200x4x16x16x32xi32>
@@ -263,11 +263,11 @@ func.func @transpose_pack(%arg0: tensor<100x128x200x256xi32>, %arg1: tensor<100x
 // CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]
 // CHECK-SAME:     %[[DEST:[a-zA-Z0-9]+]]
 // CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<100x4x200x16x16x32xi32>
-// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      inner_dims_pos = [3, 1] inner_tiles = [16, 32]
 // CHECK-SAME:    into %[[ARG0_EMPTY]]
 // CHECK:         %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32>
-// CHECK:         %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
+// CHECK:         %[[PACKED_ARG2:.+]] = linalg.pack %[[ARG2]]
 // CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [32]
 // CHECK-SAME:    into %[[ARG2_EMPTY]]
 // CHECK:         %[[RES:.+]] = linalg.generic
@@ -293,7 +293,7 @@ func.func @affine_constant_expr_pack(%arg0: tensor<100x128x200x256xi32>, %arg1:
       %1 = arith.addi %0, %b2 : i32
       linalg.yield %1 : i32
     } -> tensor<100x200x128x256xi32>
-  %4 = tensor.pack %transpose
+  %4 = linalg.pack %transpose
     inner_dims_pos = [3, 2]
     inner_tiles = [16, 32]
     into %dest : tensor<100x200x128x256xi32> -> tensor<100x200x4x16x16x32xi32>
@@ -309,11 +309,11 @@ func.func @affine_constant_expr_pack(%arg0: tensor<100x128x200x256xi32>, %arg1:
 // CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]
 // CHECK-SAME:     %[[DEST:[a-zA-Z0-9]+]]
 // CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<100x4x200x16x16x32xi32>
-// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      inner_dims_pos = [3, 1] inner_tiles = [16, 32]
 // CHECK-SAME:    into %[[ARG0_EMPTY]]
 // CHECK:         %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<1x4x1x1x32xi32>
-// CHECK:         %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
+// CHECK:         %[[PACKED_ARG2:.+]] = linalg.pack %[[ARG2]]
 // CHECK-SAME:      inner_dims_pos = [1] inner_tiles = [32]
 // CHECK-SAME:    into %[[ARG2_EMPTY]]
 // CHECK:         %[[RES:.+]] = linalg.generic
@@ -339,7 +339,7 @@ func.func @transpose_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a
       %1 = arith.addi %0, %b2 : i32
       linalg.yield %1 : i32
     } -> tensor<100x200x128x256xi32>
-  %4 = tensor.pack %transpose
+  %4 = linalg.pack %transpose
     outer_dims_perm = [1, 2, 3, 0]
     inner_dims_pos = [3, 2]
     inner_tiles = [16, 32]
@@ -356,11 +356,11 @@ func.func @transpose_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a
 // CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]
 // CHECK-SAME:     %[[DEST:[a-zA-Z0-9]+]]
 // CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<200x4x16x100x16x32xi32>
-// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [2, 1, 3, 0] inner_dims_pos = [3, 1] inner_tiles = [16, 32]
 // CHECK-SAME:      into %[[ARG0_EMPTY]]
 // CHECK:         %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32>
-// CHECK:         %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
+// CHECK:         %[[PACKED_ARG2:.+]] = linalg.pack %[[ARG2]]
 // CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [32]
 // CHECK-SAME:      into %[[ARG2_EMPTY]]
 // CHECK:         %[[RES:.+]] = linalg.generic
@@ -380,7 +380,7 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: ten
       linalg.yield %4 : i32
   } -> tensor<128x256xi32>
   %empty = tensor.empty() : tensor<16x4x32x16xi32>
-  %pack = tensor.pack %elem
+  %pack = linalg.pack %elem
     outer_dims_perm = [1, 0]
     inner_dims_pos = [0, 1]
     inner_tiles = [32, 16]
@@ -393,11 +393,11 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: ten
 // CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]
 // CHECK:         %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
-// CHECK:         %[[PACKED_ARG1:.+]] = tensor.pack %[[ARG1]]
+// CHECK:         %[[PACKED_ARG1:.+]] = linalg.pack %[[ARG1]]
 // CHECK-SAME:      outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:      into %[[ARG1_EMPTY]]
 // CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
-// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
 // CHECK-SAME:      into %[[ARG0_EMPTY]]
 // CHECK:         %[[RES:.+]] = linalg.generic
@@ -411,7 +411,7 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: ten
 
 func.func @unpack_on_output(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56x64xf32> {
   %0 = tensor.empty() : tensor<12x56x56x64xf32>
-  %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
+  %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
   %2 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%1 : tensor<12x56x56x64xf32>) {
     ^bb0(%out: f32):
       %3 = arith.addf %out, %out : f32
@@ -424,17 +424,17 @@ func.func @unpack_on_output(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56
 // CHECK-LABEL: func.func @unpack_on_output
 // CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK:         %[[ARG0_EMPTY_UNPACK:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
-// CHECK:         %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]]
+// CHECK:         %[[UNPACKED_ARG0:.+]] = linalg.unpack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[ARG0_EMPTY_UNPACK]]
 // CHECK:         %[[ARG0_EMPTY_PACK:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]]
+// CHECK:         %[[PACKED_ARG0:.+]] = linalg.pack %[[UNPACKED_ARG0]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[ARG0_EMPTY_PACK]]
 // CHECK:         %[[RES:.+]] = linalg.generic
 // CHECK-SAME:      indexing_maps = [#[[$MAP]]]
 // CHECK-SAME:      outs(%[[PACKED_ARG0]]
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[RES]]
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[RES]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[UNPACKED_ARG0]]
 
@@ -444,7 +444,7 @@ func.func @unpack_on_output(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56
 
 func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56x56x64xf32>) -> tensor<12x56x56x64xf32> {
   %0 = tensor.empty() : tensor<12x56x56x64xf32>
-  %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
+  %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
   %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf32>) {
     ^bb0(%in: f32, %out: f32):
       %3 = arith.addf %in, %out : f32
@@ -458,22 +458,22 @@ func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56
 // CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]
 // CHECK:         %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
-// CHECK:         %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]]
+// CHECK:         %[[UNPACKED_ARG0:.+]] = linalg.unpack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[ARG0_UNPACK_EMPTY]]
 // CHECK:         %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK:         %[[ARG1_PACK:.+]] = tensor.pack %[[ARG1]]
+// CHECK:         %[[ARG1_PACK:.+]] = linalg.pack %[[ARG1]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[ARG1_PACK_EMPTY]]
 // CHECK:         %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK:         %[[ARG0_PACK:.+]] = tensor.pack %[[UNPACKED_ARG0]]
+// CHECK:         %[[ARG0_PACK:.+]] = linalg.pack %[[UNPACKED_ARG0]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[ARG0_PACK_EMPTY]]
 // CHECK:         %[[RES:.+]] = linalg.generic
 // CHECK-SAME:      indexing_maps = [#[[$MAP]], #[[$MAP]]]
 // CHECK-SAME:      ins(%[[ARG0_PACK]]
 // CHECK-SAME:      outs(%[[ARG1_PACK]]
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[RES]]
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[RES]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[ARG1]]
 
@@ -483,7 +483,7 @@ func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56
 
 func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56x56x64xf16>) -> tensor<12x56x56x64xf16> {
   %0 = tensor.empty() : tensor<12x56x56x64xf32>
-  %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
+  %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
   %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf16>) {
     ^bb0(%in: f32, %out: f16):
       %3 = arith.truncf %in : f32 to f16
@@ -497,22 +497,22 @@ func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: t
 // CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]
 // CHECK:         %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
-// CHECK:         %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]]
+// CHECK:         %[[UNPACKED_ARG0:.+]] = linalg.unpack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[ARG0_UNPACK_EMPTY]]
 // CHECK:         %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf16>
-// CHECK:         %[[ARG1_PACK:.+]] = tensor.pack %[[ARG1]]
+// CHECK:         %[[ARG1_PACK:.+]] = linalg.pack %[[ARG1]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[ARG1_PACK_EMPTY]]
 // CHECK:         %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK:         %[[ARG0_PACK:.+]] = tensor.pack %[[UNPACKED_ARG0]]
+// CHECK:         %[[ARG0_PACK:.+]] = linalg.pack %[[UNPACKED_ARG0]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[ARG0_PACK_EMPTY]]
 // CHECK:         %[[RES:.+]] = linalg.generic
 // CHECK-SAME:      indexing_maps = [#[[$MAP]], #[[$MAP]]]
 // CHECK-SAME:      ins(%[[ARG0_PACK]]
 // CHECK-SAME:      outs(%[[ARG1_PACK]]
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[RES]]
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[RES]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[ARG1]]
 
@@ -523,7 +523,7 @@ func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: t
 func.func @forward_tensor_empty(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56x64xf32> {
   %init = tensor.empty() : tensor<12x56x56x64xf32>
   %0 = tensor.empty() : tensor<12x56x56x64xf32>
-  %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
+  %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32>
   %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf32>) {
     ^bb0(%in: f32, %out: f32):
       %3 = arith.addf %in, %in : f32
@@ -537,19 +537,19 @@ func.func @forward_tensor_empty(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x5
 // CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK:         %[[FINAL_RES:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
 // CHECK:         %[[ARG0_UNPACK_EMPTY:.+]] = tensor.empty() : tensor<12x56x56x64xf32>
-// CHECK:         %[[UNPACKED_ARG0:.+]] = tensor.unpack %[[ARG0]]
+// CHECK:         %[[UNPACKED_ARG0:.+]] = linalg.unpack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[ARG0_UNPACK_EMPTY]]
 // CHECK:         %[[DEST:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
 // CHECK:         %[[ARG0_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32>
-// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]]
+// CHECK:         %[[PACKED_ARG0:.+]] = linalg.pack %[[UNPACKED_ARG0]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[ARG0_PACK_EMPTY]]
 // CHECK:         %[[RES:.+]] = linalg.generic
 // CHECK-SAME:      indexing_maps = [#[[$MAP]], #[[$MAP]]]
 // CHECK-SAME:      ins(%[[PACKED_ARG0]]
 // CHECK-SAME:      outs(%[[DEST]]
-// CHECK:         %[[UNPACKED:.+]] = tensor.unpack %[[RES]]
+// CHECK:         %[[UNPACKED:.+]] = linalg.unpack %[[RES]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[FINAL_RES]]
 
@@ -558,7 +558,7 @@ func.func @forward_tensor_empty(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x5
 func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x58x58x64xf32> {
   %cst = arith.constant 0.000000e+00 : f32
   %0 = tensor.empty() : tensor<1x56x56x64xf32>
-  %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
+  %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
   %padded = tensor.pad %1 low[0, 1, 1, 0] high[0, 1, 1, 0] {
     ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index):
     tensor.yield %cst : f32
@@ -571,7 +571,7 @@ func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tens
 // CHECK:         %[[CST:.+]] = arith.constant 0.000000e+00 : f32
 // CHECK:         %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0]
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x58x58x64xf32>
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[PADDED]]
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[PADDED]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[EMPTY]] : tensor<1x2x58x58x32xf32> -> tensor<1x58x58x64xf32>
 
@@ -580,7 +580,7 @@ func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tens
 func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<2x58x58x64xf32> {
   %cst = arith.constant 0.000000e+00 : f32
   %0 = tensor.empty() : tensor<1x56x56x64xf32>
-  %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
+  %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
   %padded = tensor.pad %1 low[1, 1, 1, 0] high[0, 1, 1, 0] {
     ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index):
     tensor.yield %cst : f32
@@ -593,7 +593,7 @@ func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tens
 // CHECK:         %[[CST:.+]] = arith.constant 0.000000e+00 : f32
 // CHECK:         %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[1, 0, 1, 1, 0] high[0, 0, 1, 1, 0]
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<2x58x58x64xf32>
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[PADDED]]
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[PADDED]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[EMPTY]] : tensor<2x2x58x58x32xf32> -> tensor<2x58x58x64xf32>
 
@@ -602,7 +602,7 @@ func.func @pad_valid_unpack_propagation(%arg0: tensor<1x2x56x56x32xf32>) -> tens
 func.func @pad_along_unpacked_dim(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x58x58x66xf32> {
   %cst = arith.constant 0.000000e+00 : f32
   %0 = tensor.empty() : tensor<1x56x56x64xf32>
-  %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
+  %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
   %padded = tensor.pad %1 low[0, 1, 1, 1] high[0, 1, 1, 1] {
     ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index):
     tensor.yield %cst : f32
@@ -614,7 +614,7 @@ func.func @pad_along_unpacked_dim(%arg0: tensor<1x2x56x56x32xf32>) -> tensor<1x5
 // CHECK:         %[[ARG0:.+]]: tensor<1x2x56x56x32xf32>)
 // CHECK:         %[[CST:.+]] = arith.constant 0.000000e+00 : f32
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x56x56x64xf32>
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:      into %[[EMPTY]] : tensor<1x2x56x56x32xf32> -> tensor<1x56x56x64xf32>
 // CHECK:         %[[PADDED:.+]] = tensor.pad %[[UNPACK]] low[0, 1, 1, 1] high[0, 1, 1, 1]
@@ -628,7 +628,7 @@ func.func @pad_valid_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> tensor<1
     tensor.yield %cst : f32
   } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32>
   %0 = tensor.empty() : tensor<1x2x58x58x32xf32>
-  %1 = tensor.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
+  %1 = linalg.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
   return %1 : tensor<1x2x58x58x32xf32>
 }
 
@@ -636,7 +636,7 @@ func.func @pad_valid_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> tensor<1
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<1x64x56x56xf32>)
 // CHECK:         %[[CST:.+]] = arith.constant 0.000000e+00 : f32
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x2x56x56x32xf32>
-// CHECK:         %[[PACKED:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [32]
+// CHECK:         %[[PACKED:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [32]
 // CHECK-SAME:      into %[[EMPTY]] : tensor<1x64x56x56xf32> -> tensor<1x2x56x56x32xf32>
 // CHECK:         %[[PADDED:.+]] = tensor.pad %[[PACKED]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0]
 // CHECK:         return %[[PADDED]]
@@ -650,7 +650,7 @@ func.func @pad_valid_outer_dims_pack_propagation(%arg0: tensor<1x64x56x56xf32>)
     tensor.yield %cst : f32
   } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32>
   %0 = tensor.empty() : tensor<1x58x58x2x32xf32>
-  %1 = tensor.pack %padded outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x58x58x2x32xf32>
+  %1 = linalg.pack %padded outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x58x58x2x32xf32>
   return %1 : tensor<1x58x58x2x32xf32>
 }
 
@@ -658,7 +658,7 @@ func.func @pad_valid_outer_dims_pack_propagation(%arg0: tensor<1x64x56x56xf32>)
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<1x64x56x56xf32>)
 // CHECK:         %[[CST:.+]] = arith.constant 0.000000e+00 : f32
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x56x56x2x32xf32>
-// CHECK:         %[[PACKED:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACKED:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 2, 1] inner_dims_pos = [1] inner_tiles = [32]
 // CHECK-SAME:      into %[[EMPTY]] : tensor<1x64x56x56xf32> -> tensor<1x56x56x2x32xf32>
 // CHECK:         %[[PADDED:.+]] = tensor.pad %[[PACKED]] low[0, 1, 1, 0, 0] high[0, 1, 1, 0, 0]
@@ -673,7 +673,7 @@ func.func @pad_along_packed_dim(%arg0: tensor<1x60x56x56xf32>) -> tensor<1x2x58x
     tensor.yield %cst : f32
   } : tensor<1x60x56x56xf32> to tensor<1x64x58x58xf32>
   %0 = tensor.empty() : tensor<1x2x58x58x32xf32>
-  %1 = tensor.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
+  %1 = linalg.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
   return %1 : tensor<1x2x58x58x32xf32>
 }
 
@@ -682,7 +682,7 @@ func.func @pad_along_packed_dim(%arg0: tensor<1x60x56x56xf32>) -> tensor<1x2x58x
 // CHECK:         %[[CST:.+]] = arith.constant 0.000000e+00 : f32
 // CHECK:         %[[PADDED:.+]] = tensor.pad %[[ARG0]] low[0, 2, 1, 1] high[0, 2, 1, 1]
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x2x58x58x32xf32>
-// CHECK:         tensor.pack %[[PADDED]] inner_dims_pos = [1] inner_tiles = [32]
+// CHECK:         linalg.pack %[[PADDED]] inner_dims_pos = [1] inner_tiles = [32]
 // CHECK-SAME:      into %[[EMPTY]] : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
 
 // -----
@@ -694,7 +694,7 @@ func.func @multi_use_pad_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> (ten
     tensor.yield %cst : f32
   } : tensor<1x64x56x56xf32> to tensor<1x64x58x58xf32>
   %0 = tensor.empty() : tensor<1x2x58x58x32xf32>
-  %1 = tensor.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
+  %1 = linalg.pack %padded inner_dims_pos = [1] inner_tiles = [32] into %0 : tensor<1x64x58x58xf32> -> tensor<1x2x58x58x32xf32>
   return %padded, %1 : tensor<1x64x58x58xf32>, tensor<1x2x58x58x32xf32>
 }
 
@@ -702,10 +702,10 @@ func.func @multi_use_pad_pack_propagation(%arg0: tensor<1x64x56x56xf32>) -> (ten
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<1x64x56x56xf32>)
 // CHECK:         %[[CST:.+]] = arith.constant 0.000000e+00 : f32
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x2x56x56x32xf32>
-// CHECK:         %[[PACKED:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [32]
+// CHECK:         %[[PACKED:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [32]
 // CHECK-SAME:      into %[[EMPTY]] : tensor<1x64x56x56xf32> -> tensor<1x2x56x56x32xf32>
 // CHECK:         %[[PADDED:.+]] = tensor.pad %[[PACKED]] low[0, 0, 1, 1, 0] high[0, 0, 1, 1, 0]
-// CHECK:         %[[UNPACKED:.+]] = tensor.unpack %[[PADDED]] inner_dims_pos = [1] inner_tiles = [32]
+// CHECK:         %[[UNPACKED:.+]] = linalg.unpack %[[PADDED]] inner_dims_pos = [1] inner_tiles = [32]
 // CHECK:         return %[[UNPACKED]], %[[PADDED]]
 
 // -----
@@ -721,7 +721,7 @@ func.func @would_break_dominance(%arg0: tensor<128x256xi32>) -> tensor<4x16x16x3
       linalg.yield %4 : i32
   } -> tensor<128x256xi32>
   %dest = bufferization.alloc_tensor() : tensor<4x16x16x32xi32>
-  %pack = tensor.pack %elem
+  %pack = linalg.pack %elem
     inner_dims_pos = [1, 0]
     inner_tiles = [16, 32]
     into %dest : tensor<128x256xi32> -> tensor<4x16x16x32xi32>
@@ -735,7 +735,7 @@ func.func @would_break_dominance(%arg0: tensor<128x256xi32>) -> tensor<4x16x16x3
 // CHECK-SAME:      ins(%[[ARG0]]
 // CHECK-SAME:      outs(%[[EMPTY]]
 // CHECK:         %[[ALLOC:.+]] = bufferization.alloc_tensor() : tensor<4x16x16x32xi32>
-// CHECK-NEXT:    %{{.+}} = tensor.pack %[[GEN]]
+// CHECK-NEXT:    %{{.+}} = linalg.pack %[[GEN]]
 // CHECK-SAME:      inner_dims_pos = [1, 0] inner_tiles = [16, 32]
 // CHECK-SAME:      into %[[ALLOC]]
 
@@ -751,7 +751,7 @@ func.func @scalar_tensor(%arg0 : tensor<f32>) -> tensor<1x32x7x7x32xf32> {
     linalg.yield %in : f32
   } -> tensor<1x7x7x1024xf32>
   %empty_pack = tensor.empty() : tensor<1x32x7x7x32xf32>
-  %pack = tensor.pack %gen outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %empty_pack : tensor<1x7x7x1024xf32> -> tensor<1x32x7x7x32xf32>
+  %pack = linalg.pack %gen outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %empty_pack : tensor<1x7x7x1024xf32> -> tensor<1x32x7x7x32xf32>
   return %pack : tensor<1x32x7x7x32xf32>
 }
 
@@ -772,7 +772,7 @@ func.func @scalar_tensor(%arg0 : tensor<f32>) -> tensor<1x32x7x7x32xf32> {
 func.func @unpack_empty_inner_dims(%arg0: tensor<12x64x56x56xf32>) -> tensor<12x56x56x64xf32> {
   %init = tensor.empty() : tensor<12x56x56x64xf32>
   %0 = tensor.empty() : tensor<12x56x56x64xf32>
-  %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] into %0 : tensor<12x64x56x56xf32> -> tensor<12x56x56x64xf32>
+  %1 = linalg.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = [] into %0 : tensor<12x64x56x56xf32> -> tensor<12x56x56x64xf32>
   %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf32>) {
     ^bb0(%in: f32, %out: f32):
       %3 = arith.addf %in, %in : f32
@@ -782,13 +782,13 @@ func.func @unpack_empty_inner_dims(%arg0: tensor<12x64x56x56xf32>) -> tensor<12x
 }
 
 // CHECK-LABEL: func.func @unpack_empty_inner_dims
-// CHECK:         %[[UNPACKED_ARG0:.+]] = tensor.unpack
+// CHECK:         %[[UNPACKED_ARG0:.+]] = linalg.unpack
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = []
-// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[UNPACKED_ARG0]]
+// CHECK:         %[[PACKED_ARG0:.+]] = linalg.pack %[[UNPACKED_ARG0]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = []
 // CHECK:         %[[RES:.+]] = linalg.generic
 // CHECK-SAME:      ins(%[[PACKED_ARG0]]
-// CHECK:         %[[UNPACKED:.+]] = tensor.unpack %[[RES]]
+// CHECK:         %[[UNPACKED:.+]] = linalg.unpack %[[RES]]
 // CHECK-SAME:      outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [] inner_tiles = []
 
 // -----
@@ -805,7 +805,7 @@ func.func @reduction_pack_transpose_inner_dims(%arg0: tensor<128x256x32xi32>,
       linalg.yield %4 : i32
   } -> tensor<128x256xi32>
   %dest = tensor.empty() : tensor<4x16x16x32xi32>
-  %pack = tensor.pack %elem
+  %pack = linalg.pack %elem
     inner_dims_pos = [1, 0]
     inner_tiles = [16, 32]
     into %dest : tensor<128x256xi32> -> tensor<4x16x16x32xi32>
@@ -817,11 +817,11 @@ func.func @reduction_pack_transpose_inner_dims(%arg0: tensor<128x256x32xi32>,
 // CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]
 // CHECK:         %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<4x16x16x32xi32>
-// CHECK:         %[[PACK_ARG1:.+]] = tensor.pack %[[ARG1]]
+// CHECK:         %[[PACK_ARG1:.+]] = linalg.pack %[[ARG1]]
 // CHECK-SAME:     inner_dims_pos = [1, 0] inner_tiles = [16, 32]
 // CHECK-SAME:     into %[[ARG1_EMPTY]]
 // CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x32x16x32xi32>
-// CHECK:         %[[PACK_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACK_ARG0:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      inner_dims_pos = [1, 0] inner_tiles = [16, 32]
 // CHECK-SAME:      into %[[ARG0_EMPTY]]
 // CHECK:         %[[RED:.+]] = linalg.generic
@@ -851,7 +851,7 @@ func.func @reduction_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a
       linalg.yield %2 : i32
     } -> tensor<100x128x256xi32>
   %init_pack = tensor.empty() : tensor<4x16x100x16x32xi32>
-  %4 = tensor.pack %reduction
+  %4 = linalg.pack %reduction
     outer_dims_perm = [1, 2, 0]
     inner_dims_pos = [2, 1]
     inner_tiles = [16, 32]
@@ -869,15 +869,15 @@ func.func @reduction_pack_with_outer_dims(%arg0: tensor<100x128x200x256xi32>, %a
 // CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]
 // CHECK-SAME:     %[[ARG3:[a-zA-Z0-9]+]]
 // CHECK:         %[[ARG3_EMPTY:.+]] = tensor.empty() : tensor<4x16x100x16x32xi32>
-// CHECK:         %[[PACKED_ARG3:.+]] = tensor.pack %[[ARG3]]
+// CHECK:         %[[PACKED_ARG3:.+]] = linalg.pack %[[ARG3]]
 // CHECK-SAME:      outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 32]
 // CHECK-SAME:      into %[[ARG3_EMPTY]]
 // CHECK:         %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<4x16x200x100x16x32xi32>
-// CHECK:         %[[PACKED_ARG0:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [1, 3, 2, 0] inner_dims_pos = [3, 1] inner_tiles = [16, 32]
 // CHECK-SAME:      into %[[ARG0_EMPTY]]
 // CHECK:         %[[ARG2_EMPTY:.+]] = tensor.empty() : tensor<4x32xi32>
-// CHECK:         %[[PACKED_ARG2:.+]] = tensor.pack %[[ARG2]]
+// CHECK:         %[[PACKED_ARG2:.+]] = linalg.pack %[[ARG2]]
 // CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [32]
 // CHECK-SAME:      into %[[ARG2_EMPTY]]
 // CHECK:         %[[RES:.+]] = linalg.generic
@@ -894,7 +894,7 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32
     %filter: tensor<2x2xi32>) -> tensor<16x540x960xi32>{
   %init = tensor.empty() : tensor<16x540x960xi32>
   %empty = tensor.empty() : tensor<1x16x1080x1920xi32>
-  %unpack = tensor.unpack %arg0
+  %unpack = linalg.unpack %arg0
       inner_dims_pos = [1]
       inner_tiles = [16]
       into %empty : tensor<1x1x1080x1920x16xi32> -> tensor<1x16x1080x1920xi32>
@@ -916,7 +916,7 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32
 // CHECK:         %[[FINAL_RES:.+]] = tensor.empty() : tensor<16x540x960xi32>
 // CHECK:         %[[INIT:.+]] = tensor.empty() : tensor<1x540x960x16xi32>
 // CHECK:         %[[PACK_EMPTY:.+]] = tensor.empty() : tensor<1x1x1080x1920x16xi32>
-// CHECK:         %[[PACK_ARG0:.+]] = tensor.pack
+// CHECK:         %[[PACK_ARG0:.+]] = linalg.pack
 // CHECK-SAME:      inner_dims_pos = [1] inner_tiles = [16]
 // CHECK-SAME:      into %[[PACK_EMPTY]]
 // CHECK:         %[[POOL:.+]] = linalg.generic
@@ -924,7 +924,7 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32
 // CHECK-SAME:      iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "parallel"]
 // CHECK-SAME:      ins(%[[PACK_ARG0]], %[[ARG1]]
 // CHECK-SAME:      outs(%[[INIT]]
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[POOL]]
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[POOL]]
 // CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [16]
 // CHECK-SAME:      into %[[FINAL_RES]]
 // CHECK:         return %[[UNPACK]] : tensor<16x540x960xi32>
@@ -934,7 +934,7 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32
 func.func @bubble_up_pack_through_collapse(%1: tensor<?x16x4xf32>, %dim : index) -> tensor<?x4x8x1xf32> {
   %collapsed = tensor.collapse_shape %1 [[0, 1], [2]] : tensor<?x16x4xf32> into tensor<?x4xf32>
   %2 = tensor.empty(%dim) : tensor<?x4x8x1xf32>
-  %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
+  %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
   func.return %pack : tensor<?x4x8x1xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_pack_through_collapse
@@ -943,7 +943,7 @@ func.func @bubble_up_pack_through_collapse(%1: tensor<?x16x4xf32>, %dim : index)
 // CHECK:         %[[C0:.+]] = arith.constant 0 : index
 // CHECK:         %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x16x4xf32>
 // CHECK:         %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x2x4x8x1xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
 // CHECK:         %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]] : tensor<?x2x4x8x1xf32> into tensor<?x4x8x1xf32>
 // CHECK:         return %[[COLLAPSED]] : tensor<?x4x8x1xf32>
 
@@ -952,7 +952,7 @@ func.func @bubble_up_pack_through_collapse(%1: tensor<?x16x4xf32>, %dim : index)
 func.func @bubble_up_pack_through_collapse_empty_outer_dims_perm(%1: tensor<?x16x4xf32>, %dim : index) -> tensor<?x4x8x1xf32> {
   %collapsed = tensor.collapse_shape %1 [[0, 1], [2]] : tensor<?x16x4xf32> into tensor<?x4xf32>
   %2 = tensor.empty(%dim) : tensor<?x4x8x1xf32>
-  %pack = tensor.pack %collapsed inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
+  %pack = linalg.pack %collapsed inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
   func.return %pack : tensor<?x4x8x1xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_pack_through_collapse_empty_outer_dims_perm
@@ -961,7 +961,7 @@ func.func @bubble_up_pack_through_collapse_empty_outer_dims_perm(%1: tensor<?x16
 // CHECK:         %[[C0:.+]] = arith.constant 0 : index
 // CHECK:         %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x16x4xf32>
 // CHECK:         %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x2x4x8x1xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
 // CHECK:         %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]] : tensor<?x2x4x8x1xf32> into tensor<?x4x8x1xf32>
 // CHECK:         return %[[COLLAPSED]] : tensor<?x4x8x1xf32>
 
@@ -970,13 +970,13 @@ func.func @bubble_up_pack_through_collapse_empty_outer_dims_perm(%1: tensor<?x16
 func.func @bubble_up_permuted_pack_through_collapse(%1: tensor<4x192x16x256xf32>) -> tensor<4x32x3072x8x1xf32> {
   %collapsed = tensor.collapse_shape %1 [[0], [1, 2], [3]] : tensor<4x192x16x256xf32> into tensor<4x3072x256xf32>
   %2 = tensor.empty() : tensor<4x32x3072x8x1xf32>
-  %pack = tensor.pack %collapsed outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %2 : tensor<4x3072x256xf32> -> tensor<4x32x3072x8x1xf32>
+  %pack = linalg.pack %collapsed outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %2 : tensor<4x3072x256xf32> -> tensor<4x32x3072x8x1xf32>
   func.return %pack : tensor<4x32x3072x8x1xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_permuted_pack_through_collapse
 // CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<4x32x192x16x8x1xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<4x192x16x256xf32> -> tensor<4x32x192x16x8x1xf32>
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<4x192x16x256xf32> -> tensor<4x32x192x16x8x1xf32>
 // CHECK:         %[[COLLAPSED:.+]] = tensor.collapse_shape %pack {{\[}}[0], [1], [2, 3], [4], [5]] : tensor<4x32x192x16x8x1xf32> into tensor<4x32x3072x8x1xf32>
 // CHECK:         return %[[COLLAPSED]] : tensor<4x32x3072x8x1xf32>
 
@@ -985,13 +985,13 @@ func.func @bubble_up_permuted_pack_through_collapse(%1: tensor<4x192x16x256xf32>
 func.func @bubble_up_pack_through_unit_collapse(%1: tensor<1x64x1x4xf32>) -> tensor<8x4x8x1xf32> {
   %collapsed = tensor.collapse_shape %1 [[0, 1, 2], [3]] : tensor<1x64x1x4xf32> into tensor<64x4xf32>
   %2 = tensor.empty() : tensor<8x4x8x1xf32>
-  %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<64x4xf32> -> tensor<8x4x8x1xf32>
+  %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<64x4xf32> -> tensor<8x4x8x1xf32>
   func.return %pack : tensor<8x4x8x1xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_pack_through_unit_collapse
 // CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x8x1x4x8x1xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<1x64x1x4xf32> -> tensor<1x8x1x4x8x1xf32>
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<1x64x1x4xf32> -> tensor<1x8x1x4x8x1xf32>
 // CHECK:         %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1, 2], [3], [4], [5]] : tensor<1x8x1x4x8x1xf32> into tensor<8x4x8x1xf32>
 // CHECK:         return %[[COLLAPSED]] : tensor<8x4x8x1xf32>
 
@@ -1000,7 +1000,7 @@ func.func @bubble_up_pack_through_unit_collapse(%1: tensor<1x64x1x4xf32>) -> ten
 func.func @bubble_up_pack_through_collapse_on_outer_dims(%1: tensor<?x16x4xf32>, %dim : index) -> tensor<?x1x4xf32> {
   %collapsed = tensor.collapse_shape %1 [[0, 1], [2]] : tensor<?x16x4xf32> into tensor<?x4xf32>
   %2 = tensor.empty(%dim) : tensor<?x1x4xf32>
-  %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [4] into %2 : tensor<?x4xf32> -> tensor<?x1x4xf32>
+  %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [4] into %2 : tensor<?x4xf32> -> tensor<?x1x4xf32>
   func.return %pack : tensor<?x1x4xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_pack_through_collapse_on_outer_dims
@@ -1009,7 +1009,7 @@ func.func @bubble_up_pack_through_collapse_on_outer_dims(%1: tensor<?x16x4xf32>,
 // CHECK:         %[[C0:.+]] = arith.constant 0 : index
 // CHECK:         %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x16x4xf32>
 // CHECK:         %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x16x1x4xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [2] inner_tiles = [4] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x16x1x4xf32>
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [2] inner_tiles = [4] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x16x1x4xf32>
 // CHECK:         %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1], [2], [3]] : tensor<?x16x1x4xf32> into tensor<?x1x4xf32>
 // CHECK:         return %[[COLLAPSED]] : tensor<?x1x4xf32>
 
@@ -1018,13 +1018,13 @@ func.func @bubble_up_pack_through_collapse_on_outer_dims(%1: tensor<?x16x4xf32>,
 func.func @no_bubble_up_pack_through_non_divisible_collapse(%1: tensor<3072x64x4xf32>) -> tensor<384x32x8x8xf32> {
   %collapsed = tensor.collapse_shape %1 [[0], [1, 2]] : tensor<3072x64x4xf32> into tensor<3072x256xf32>
   %2 = tensor.empty() : tensor<384x32x8x8xf32>
-  %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %2 : tensor<3072x256xf32> -> tensor<384x32x8x8xf32>
+  %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %2 : tensor<3072x256xf32> -> tensor<384x32x8x8xf32>
   func.return %pack : tensor<384x32x8x8xf32>
 }
 // CHECK-LABEL: func.func @no_bubble_up_pack_through_non_divisible_collapse
 // CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK:         %[[COLLAPSED:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0], [1, 2]] : tensor<3072x64x4xf32> into tensor<3072x256xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[COLLAPSED]]
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[COLLAPSED]]
 // CHECK:         return %[[PACK]] : tensor<384x32x8x8xf32>
 
 // -----
@@ -1032,13 +1032,13 @@ func.func @no_bubble_up_pack_through_non_divisible_collapse(%1: tensor<3072x64x4
 func.func @bubble_up_pack_outer_expanded_through_expand(%arg0: tensor<32x64xf32>) -> tensor<4x2x64x4xf32> {
   %empty = tensor.empty() : tensor<4x2x64x4xf32>
   %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
-  %pack = tensor.pack %expanded inner_dims_pos = [1] inner_tiles = [4] into %empty : tensor<4x8x64xf32> -> tensor<4x2x64x4xf32>
+  %pack = linalg.pack %expanded inner_dims_pos = [1] inner_tiles = [4] into %empty : tensor<4x8x64xf32> -> tensor<4x2x64x4xf32>
   return %pack : tensor<4x2x64x4xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_pack_outer_expanded_through_expand(
 // CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<8x64x4xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [4] into %[[EMPTY]] : tensor<32x64xf32> -> tensor<8x64x4xf32>
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2], [3]]
 // CHECK-SAME:      output_shape [4, 2, 64, 4] : tensor<8x64x4xf32> into tensor<4x2x64x4xf32>
@@ -1049,13 +1049,13 @@ func.func @bubble_up_pack_outer_expanded_through_expand(%arg0: tensor<32x64xf32>
 func.func @bubble_up_pack_inner_expanded_through_expand(%arg0: tensor<32x64xf32>) -> tensor<32x4x4x4xf32> {
   %empty = tensor.empty() : tensor<32x4x4x4xf32>
   %expanded = tensor.expand_shape %arg0 [[0], [1, 2]] output_shape [32, 4, 16] : tensor<32x64xf32> into tensor<32x4x16xf32>
-  %pack = tensor.pack %expanded inner_dims_pos = [2] inner_tiles = [4] into %empty : tensor<32x4x16xf32> -> tensor<32x4x4x4xf32>
+  %pack = linalg.pack %expanded inner_dims_pos = [2] inner_tiles = [4] into %empty : tensor<32x4x16xf32> -> tensor<32x4x4x4xf32>
   return %pack : tensor<32x4x4x4xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_pack_inner_expanded_through_expand(
 // CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<32x16x4xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      inner_dims_pos = [1] inner_tiles = [4] into %[[EMPTY]]
 // CHECK-SAME:      : tensor<32x64xf32> -> tensor<32x16x4xf32>
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0], [1, 2], [3]]
@@ -1067,13 +1067,13 @@ func.func @bubble_up_pack_inner_expanded_through_expand(%arg0: tensor<32x64xf32>
 func.func @bubble_up_pack_non_expanded_dims_through_expand(%arg0: tensor<32x64x16xf32>) -> tensor<8x2x32x16x4xf32> {
   %empty = tensor.empty() : tensor<8x2x32x16x4xf32>
   %expanded = tensor.expand_shape %arg0 [[0], [1, 2], [3]] output_shape [32, 2, 32, 16] : tensor<32x64x16xf32> into tensor<32x2x32x16xf32>
-  %pack = tensor.pack %expanded inner_dims_pos = [0] inner_tiles = [4] into %empty : tensor<32x2x32x16xf32> -> tensor<8x2x32x16x4xf32>
+  %pack = linalg.pack %expanded inner_dims_pos = [0] inner_tiles = [4] into %empty : tensor<32x2x32x16xf32> -> tensor<8x2x32x16x4xf32>
   return %pack : tensor<8x2x32x16x4xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_pack_non_expanded_dims_through_expand(
 // CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<8x64x16x4xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack
+// CHECK:         %[[PACK:.+]] = linalg.pack
 // CHECK-SAME:      %[[ARG0]] inner_dims_pos = [0] inner_tiles = [4] into %[[EMPTY]]
 // CHECK-SAME:      : tensor<32x64x16xf32> -> tensor<8x64x16x4xf32>
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0], [1, 2], [3], [4]]
@@ -1087,7 +1087,7 @@ func.func @bubble_up_pack_through_expand_dynamic(%arg0: tensor<?x64xf32>) -> ten
   %dim = tensor.dim %arg0, %c0 : tensor<?x64xf32>
   %empty = tensor.empty(%dim) : tensor<?x4x2x8xf32>
   %expanded = tensor.expand_shape %arg0 [[0], [1, 2]] output_shape [%dim, 4, 16] : tensor<?x64xf32> into tensor<?x4x16xf32>
-  %pack = tensor.pack %expanded inner_dims_pos = [2] inner_tiles = [8] into %empty : tensor<?x4x16xf32> -> tensor<?x4x2x8xf32>
+  %pack = linalg.pack %expanded inner_dims_pos = [2] inner_tiles = [8] into %empty : tensor<?x4x16xf32> -> tensor<?x4x2x8xf32>
   return %pack : tensor<?x4x2x8xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_pack_through_expand_dynamic(
@@ -1095,7 +1095,7 @@ func.func @bubble_up_pack_through_expand_dynamic(%arg0: tensor<?x64xf32>) -> ten
 // CHECK-DAG:     %[[C0:.+]] = arith.constant 0 : index
 // CHECK:         %[[DIM_INPUT:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x64xf32>
 // CHECK:         %[[EMPTY:.+]] = tensor.empty(%[[DIM_INPUT]]) : tensor<?x8x8xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      inner_dims_pos = [1] inner_tiles = [8] into %[[EMPTY]]
 // CHECK-SAME:      : tensor<?x64xf32> -> tensor<?x8x8xf32>
 // CHECK:         %[[DIM_PACK:.+]] = tensor.dim %[[PACK]], %[[C0]] : tensor<?x8x8xf32>
@@ -1109,14 +1109,14 @@ func.func @bubble_up_pack_non_expanded_padding_through_expand(%arg0: tensor<32x6
   %cst = arith.constant 3.000000e+00 : f32
   %empty = tensor.empty() : tensor<4x2x8x4x8xf32>
   %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x60xf32> into tensor<4x8x60xf32>
-  %pack = tensor.pack %expanded padding_value(%cst : f32) inner_dims_pos = [1, 2] inner_tiles = [4, 8] into %empty : tensor<4x8x60xf32> -> tensor<4x2x8x4x8xf32>
+  %pack = linalg.pack %expanded padding_value(%cst : f32) inner_dims_pos = [1, 2] inner_tiles = [4, 8] into %empty : tensor<4x8x60xf32> -> tensor<4x2x8x4x8xf32>
   return %pack : tensor<4x2x8x4x8xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_pack_non_expanded_padding_through_expand(
 // CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK-DAG:     %[[CST:.+]] = arith.constant 3.000000e+00 : f32
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<8x8x4x8xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]] padding_value(%[[CST]] : f32)
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]] padding_value(%[[CST]] : f32)
 // CHECK-SAME:      inner_dims_pos = [0, 1] inner_tiles = [4, 8] into %[[EMPTY]]
 // CHECK-SAME:      : tensor<32x60xf32> -> tensor<8x8x4x8xf32>
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]]
@@ -1128,13 +1128,13 @@ func.func @bubble_up_pack_non_expanded_padding_through_expand(%arg0: tensor<32x6
 func.func @bubble_up_pack_outer_dims_perm_identity_through_expand(%arg0: tensor<32x64xf32>) -> tensor<4x2x32x4x2xf32> {
   %empty = tensor.empty() : tensor<4x2x32x4x2xf32>
   %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
-  %pack = tensor.pack %expanded outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %empty : tensor<4x8x64xf32> -> tensor<4x2x32x4x2xf32>
+  %pack = linalg.pack %expanded outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %empty : tensor<4x8x64xf32> -> tensor<4x2x32x4x2xf32>
   return %pack : tensor<4x2x32x4x2xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_pack_outer_dims_perm_identity_through_expand(
 // CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<8x32x4x2xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      inner_dims_pos = [0, 1] inner_tiles = [4, 2] into %[[EMPTY]]
 // CHECK-SAME:      : tensor<32x64xf32> -> tensor<8x32x4x2xf32>
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]]
@@ -1146,13 +1146,13 @@ func.func @bubble_up_pack_outer_dims_perm_identity_through_expand(%arg0: tensor<
 func.func @bubble_up_pack_multiple_dims_through_expand(%arg0: tensor<32x64x16xf32>) -> tensor<8x2x4x8x4x8x2xf32> {
   %empty = tensor.empty() : tensor<8x2x4x8x4x8x2xf32>
   %expanded = tensor.expand_shape %arg0 [[0], [1, 2], [3]] output_shape [32, 2, 32, 16] : tensor<32x64x16xf32> into tensor<32x2x32x16xf32>
-  %pack = tensor.pack %expanded inner_dims_pos = [0, 2, 3] inner_tiles = [4, 8, 2] into %empty : tensor<32x2x32x16xf32> -> tensor<8x2x4x8x4x8x2xf32>
+  %pack = linalg.pack %expanded inner_dims_pos = [0, 2, 3] inner_tiles = [4, 8, 2] into %empty : tensor<32x2x32x16xf32> -> tensor<8x2x4x8x4x8x2xf32>
   return %pack : tensor<8x2x4x8x4x8x2xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_pack_multiple_dims_through_expand(
 // CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<8x8x8x4x8x2xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      inner_dims_pos = [0, 1, 2] inner_tiles = [4, 8, 2] into %[[EMPTY]]
 // CHECK-SAME:      : tensor<32x64x16xf32> -> tensor<8x8x8x4x8x2xf32>
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0], [1, 2], [3], [4], [5], [6]]
@@ -1164,13 +1164,13 @@ func.func @bubble_up_pack_multiple_dims_through_expand(%arg0: tensor<32x64x16xf3
 func.func @bubble_up_pack_inner_dims_reorder_through_expand(%arg0: tensor<32x64xf32>) -> tensor<4x2x4x16x4xf32> {
   %empty = tensor.empty() : tensor<4x2x4x16x4xf32>
   %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
-  %pack = tensor.pack %expanded inner_dims_pos = [2, 1] inner_tiles = [16, 4] into %empty : tensor<4x8x64xf32> -> tensor<4x2x4x16x4xf32>
+  %pack = linalg.pack %expanded inner_dims_pos = [2, 1] inner_tiles = [16, 4] into %empty : tensor<4x8x64xf32> -> tensor<4x2x4x16x4xf32>
   return %pack : tensor<4x2x4x16x4xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_pack_inner_dims_reorder_through_expand(
 // CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<8x4x16x4xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      inner_dims_pos = [1, 0] inner_tiles = [16, 4] into %[[EMPTY]]
 // CHECK-SAME:      : tensor<32x64xf32> -> tensor<8x4x16x4xf32>
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]]
@@ -1182,13 +1182,13 @@ func.func @bubble_up_pack_inner_dims_reorder_through_expand(%arg0: tensor<32x64x
 func.func @bubble_up_pack_multiple_different_expanded_dims_through_expand(%arg0: tensor<32x64x16xf32>) -> tensor<4x2x2x8x16x4x4xf32> {
   %empty = tensor.empty() : tensor<4x2x2x8x16x4x4xf32>
   %expanded = tensor.expand_shape %arg0 [[0, 1], [2, 3], [4]] output_shape [4, 8, 2, 32, 16] : tensor<32x64x16xf32> into tensor<4x8x2x32x16xf32>
-  %pack = tensor.pack %expanded inner_dims_pos = [1, 3] inner_tiles = [4, 4] into %empty : tensor<4x8x2x32x16xf32> -> tensor<4x2x2x8x16x4x4xf32>
+  %pack = linalg.pack %expanded inner_dims_pos = [1, 3] inner_tiles = [4, 4] into %empty : tensor<4x8x2x32x16xf32> -> tensor<4x2x2x8x16x4x4xf32>
   return %pack : tensor<4x2x2x8x16x4x4xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_pack_multiple_different_expanded_dims_through_expand(
 // CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<8x16x16x4x4xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %[[EMPTY]]
 // CHECK-SAME:      : tensor<32x64x16xf32> -> tensor<8x16x16x4x4xf32>
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[PACK]] {{\[}}[0, 1], [2, 3], [4], [5], [6]]
@@ -1200,7 +1200,7 @@ func.func @bubble_up_pack_multiple_different_expanded_dims_through_expand(%arg0:
 func.func @no_bubble_up_pack_outer_dims_permutation_through_expand(%arg0: tensor<32x64xf32>) -> tensor<32x4x2x4x2xf32> {
   %empty = tensor.empty() : tensor<32x4x2x4x2xf32>
   %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
-  %pack = tensor.pack %expanded outer_dims_perm = [2, 0, 1] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %empty : tensor<4x8x64xf32> -> tensor<32x4x2x4x2xf32>
+  %pack = linalg.pack %expanded outer_dims_perm = [2, 0, 1] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %empty : tensor<4x8x64xf32> -> tensor<32x4x2x4x2xf32>
   return %pack : tensor<32x4x2x4x2xf32>
 }
 // CHECK-LABEL: func.func @no_bubble_up_pack_outer_dims_permutation_through_expand(
@@ -1208,7 +1208,7 @@ func.func @no_bubble_up_pack_outer_dims_permutation_through_expand(%arg0: tensor
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<32x4x2x4x2xf32>
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2]]
 // CHECK-SAME:      output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[EXPANDED]]
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[EXPANDED]]
 // CHECK-SAME:      outer_dims_perm = [2, 0, 1] inner_dims_pos = [1, 2] inner_tiles = [4, 2] into %[[EMPTY]]
 // CHECK-SAME:      : tensor<4x8x64xf32> -> tensor<32x4x2x4x2xf32>
 // CHECK:         return %[[PACK]] : tensor<32x4x2x4x2xf32>
@@ -1218,7 +1218,7 @@ func.func @no_bubble_up_pack_outer_dims_permutation_through_expand(%arg0: tensor
 func.func @no_bubble_up_pack_multiple_same_expanded_dim_through_expand(%arg0: tensor<32x64xf32>) -> tensor<2x2x64x2x4xf32> {
   %empty = tensor.empty() : tensor<2x2x64x2x4xf32>
   %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
-  %pack = tensor.pack %expanded inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %empty : tensor<4x8x64xf32> -> tensor<2x2x64x2x4xf32>
+  %pack = linalg.pack %expanded inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %empty : tensor<4x8x64xf32> -> tensor<2x2x64x2x4xf32>
   return %pack : tensor<2x2x64x2x4xf32>
 }
 // CHECK-LABEL: func.func @no_bubble_up_pack_multiple_same_expanded_dim_through_expand(
@@ -1226,7 +1226,7 @@ func.func @no_bubble_up_pack_multiple_same_expanded_dim_through_expand(%arg0: te
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<2x2x64x2x4xf32>
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2]]
 // CHECK-SAME:      output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[EXPANDED]]
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[EXPANDED]]
 // CHECK-SAME:      inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %[[EMPTY]]
 // CHECK-SAME:      : tensor<4x8x64xf32> -> tensor<2x2x64x2x4xf32>
 // CHECK:         return %[[PACK]] : tensor<2x2x64x2x4xf32>
@@ -1236,7 +1236,7 @@ func.func @no_bubble_up_pack_multiple_same_expanded_dim_through_expand(%arg0: te
 func.func @no_bubble_up_pack_non_innermost_expanded_dim_through_expand(%arg0: tensor<32x64xf32>) -> tensor<2x8x64x2xf32> {
   %empty = tensor.empty() : tensor<2x8x64x2xf32>
   %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
-  %pack = tensor.pack %expanded inner_dims_pos = [0] inner_tiles = [2] into %empty : tensor<4x8x64xf32> -> tensor<2x8x64x2xf32>
+  %pack = linalg.pack %expanded inner_dims_pos = [0] inner_tiles = [2] into %empty : tensor<4x8x64xf32> -> tensor<2x8x64x2xf32>
   return %pack : tensor<2x8x64x2xf32>
 }
 // CHECK-LABEL: func.func @no_bubble_up_pack_non_innermost_expanded_dim_through_expand(
@@ -1244,7 +1244,7 @@ func.func @no_bubble_up_pack_non_innermost_expanded_dim_through_expand(%arg0: te
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<2x8x64x2xf32>
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2]]
 // CHECK-SAME:      output_shape [4, 8, 64] : tensor<32x64xf32> into tensor<4x8x64xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[EXPANDED]]
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[EXPANDED]]
 // CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [2] into %[[EMPTY]]
 // CHECK-SAME:      : tensor<4x8x64xf32> -> tensor<2x8x64x2xf32>
 // CHECK:         return %[[PACK]] : tensor<2x8x64x2xf32>
@@ -1255,7 +1255,7 @@ func.func @no_bubble_up_pack_expanded_padding_through_expand_cannot_reassociate(
   %cst = arith.constant 3.000000e+00 : f32
   %empty = tensor.empty() : tensor<3x2x60x8xf32>
   %expanded = tensor.expand_shape %arg0 [[0, 1], [2]] output_shape [3, 10, 60] : tensor<30x60xf32> into tensor<3x10x60xf32>
-  %pack = tensor.pack %expanded padding_value(%cst : f32) inner_dims_pos = [1] inner_tiles = [8] into %empty : tensor<3x10x60xf32> -> tensor<3x2x60x8xf32>
+  %pack = linalg.pack %expanded padding_value(%cst : f32) inner_dims_pos = [1] inner_tiles = [8] into %empty : tensor<3x10x60xf32> -> tensor<3x2x60x8xf32>
   return %pack : tensor<3x2x60x8xf32>
 }
 // CHECK-LABEL: func.func @no_bubble_up_pack_expanded_padding_through_expand_cannot_reassociate(
@@ -1264,7 +1264,7 @@ func.func @no_bubble_up_pack_expanded_padding_through_expand_cannot_reassociate(
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<3x2x60x8xf32>
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2]]
 // CHECK-SAME:      output_shape [3, 10, 60] : tensor<30x60xf32> into tensor<3x10x60xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[EXPANDED]] padding_value(%[[CST]] : f32)
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[EXPANDED]] padding_value(%[[CST]] : f32)
 // CHECK-SAME:      inner_dims_pos = [1] inner_tiles = [8] into %[[EMPTY]]
 // CHECK-SAME:      : tensor<3x10x60xf32> -> tensor<3x2x60x8xf32>
 // CHECK:         return %[[PACK]] : tensor<3x2x60x8xf32>
@@ -1274,7 +1274,7 @@ func.func @no_bubble_up_pack_expanded_padding_through_expand_cannot_reassociate(
 func.func @no_bubble_up_pack_extending_dimension_through_expand_cannot_reassociate(%arg0: tensor<32x64xf32>) -> tensor<8x4x16x8xf32> {
   %empty = tensor.empty() : tensor<8x4x16x8xf32>
   %expanded = tensor.expand_shape %arg0 [[0], [1, 2]] output_shape [32, 4, 16] : tensor<32x64xf32> into tensor<32x4x16xf32>
-  %pack = tensor.pack %expanded inner_dims_pos = [0] inner_tiles = [8] into %empty : tensor<32x4x16xf32> -> tensor<8x4x16x8xf32>
+  %pack = linalg.pack %expanded inner_dims_pos = [0] inner_tiles = [8] into %empty : tensor<32x4x16xf32> -> tensor<8x4x16x8xf32>
   return %pack : tensor<8x4x16x8xf32>
 }
 // CHECK-LABEL: func.func @no_bubble_up_pack_extending_dimension_through_expand_cannot_reassociate(
@@ -1282,7 +1282,7 @@ func.func @no_bubble_up_pack_extending_dimension_through_expand_cannot_reassocia
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<8x4x16x8xf32>
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0], [1, 2]]
 // CHECK-SAME:      output_shape [32, 4, 16] : tensor<32x64xf32> into tensor<32x4x16xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[EXPANDED]]
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[EXPANDED]]
 // CHECK-SAME:      inner_dims_pos = [0] inner_tiles = [8] into %[[EMPTY]]
 // CHECK-SAME:      : tensor<32x4x16xf32> -> tensor<8x4x16x8xf32>
 // CHECK:         return %[[PACK]] : tensor<8x4x16x8xf32>
@@ -1291,7 +1291,7 @@ func.func @no_bubble_up_pack_extending_dimension_through_expand_cannot_reassocia
 
 func.func @push_down_unpack_through_expand(%5: tensor<?x32x8x8xf32>, %dim: index, %sz0: index) -> tensor<?x256x256xf32> {
   %6 = tensor.empty(%dim) : tensor<?x256xf32>
-  %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<?x32x8x8xf32> -> tensor<?x256xf32>
+  %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<?x32x8x8xf32> -> tensor<?x256xf32>
   %expanded = tensor.expand_shape %unpack [[0, 1], [2]] output_shape [%sz0, 256, 256] : tensor<?x256xf32> into tensor<?x256x256xf32>
   func.return %expanded : tensor<?x256x256xf32>
 }
@@ -1305,14 +1305,14 @@ func.func @push_down_unpack_through_expand(%5: tensor<?x32x8x8xf32>, %dim: index
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2], [3], [4]] output_shape [%[[SZ0]], 32, 32, 8, 8] : tensor<?x32x8x8xf32> into tensor<?x32x32x8x8xf32>
 // CHECK:         %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] : tensor<?x32x32x8x8xf32>
 // CHECK:         %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x256x256xf32>
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<?x32x32x8x8xf32> -> tensor<?x256x256xf32>
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<?x32x32x8x8xf32> -> tensor<?x256x256xf32>
 // CHECK:         return %[[UNPACK]] : tensor<?x256x256xf32>
 
 // -----
 
 func.func @push_down_unpack_through_expand_empty_outer_dims_perm(%5: tensor<?x32x8x8xf32>, %dim: index, %sz0: index) -> tensor<?x256x256xf32> {
   %6 = tensor.empty(%dim) : tensor<?x256xf32>
-  %unpack = tensor.unpack %5 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<?x32x8x8xf32> -> tensor<?x256xf32>
+  %unpack = linalg.unpack %5 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<?x32x8x8xf32> -> tensor<?x256xf32>
   %expanded = tensor.expand_shape %unpack [[0, 1], [2]] output_shape [%sz0, 256, 256] : tensor<?x256xf32> into tensor<?x256x256xf32>
   func.return %expanded : tensor<?x256x256xf32>
 }
@@ -1326,14 +1326,14 @@ func.func @push_down_unpack_through_expand_empty_outer_dims_perm(%5: tensor<?x32
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2], [3], [4]] output_shape [%[[SZ0]], 32, 32, 8, 8] : tensor<?x32x8x8xf32> into tensor<?x32x32x8x8xf32>
 // CHECK:         %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] : tensor<?x32x32x8x8xf32>
 // CHECK:         %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x256x256xf32>
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED:.+]] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<?x32x32x8x8xf32> -> tensor<?x256x256xf32>
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED:.+]] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<?x32x32x8x8xf32> -> tensor<?x256x256xf32>
 // CHECK:         return %[[UNPACK]] : tensor<?x256x256xf32>
 
 // -----
 
 func.func @push_down_permuted_unpack_through_expand(%5: tensor<4x32x384x8x8xf32>) -> tensor<4x12x256x256xf32> {
   %6 = tensor.empty() : tensor<4x3072x256xf32>
-  %unpack = tensor.unpack %5 outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 8] into %6 : tensor<4x32x384x8x8xf32> -> tensor<4x3072x256xf32>
+  %unpack = linalg.unpack %5 outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 8] into %6 : tensor<4x32x384x8x8xf32> -> tensor<4x3072x256xf32>
   %expanded = tensor.expand_shape %unpack [[0], [1, 2], [3]] output_shape [4, 12, 256, 256] : tensor<4x3072x256xf32> into tensor<4x12x256x256xf32>
   func.return %expanded : tensor<4x12x256x256xf32>
 }
@@ -1341,14 +1341,14 @@ func.func @push_down_permuted_unpack_through_expand(%5: tensor<4x32x384x8x8xf32>
 // CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0], [1], [2, 3], [4], [5]] output_shape [4, 32, 12, 32, 8, 8] : tensor<4x32x384x8x8xf32> into tensor<4x32x12x32x8x8xf32>
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<4x12x256x256xf32>
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<4x32x12x32x8x8xf32> -> tensor<4x12x256x256xf32>
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<4x32x12x32x8x8xf32> -> tensor<4x12x256x256xf32>
 // CHECK:         return %[[UNPACK]] : tensor<4x12x256x256xf32>
 
 // -----
 
 func.func @push_down_unpack_through_unit_expand(%5: tensor<6x32x8x8xf32>) -> tensor<3x16x1x256xf32> {
   %6 = tensor.empty() : tensor<48x256xf32>
-  %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<6x32x8x8xf32> -> tensor<48x256xf32>
+  %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<6x32x8x8xf32> -> tensor<48x256xf32>
   %expanded = tensor.expand_shape %unpack [[0, 1, 2], [3]] output_shape [3, 16, 1, 256] : tensor<48x256xf32> into tensor<3x16x1x256xf32>
   func.return %expanded : tensor<3x16x1x256xf32>
 }
@@ -1356,14 +1356,14 @@ func.func @push_down_unpack_through_unit_expand(%5: tensor<6x32x8x8xf32>) -> ten
 // CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1, 2], [3], [4], [5]] output_shape [3, 2, 1, 32, 8, 8] : tensor<6x32x8x8xf32> into tensor<3x2x1x32x8x8xf32>
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<3x16x1x256xf32>
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<3x2x1x32x8x8xf32> -> tensor<3x16x1x256xf32>
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<3x2x1x32x8x8xf32> -> tensor<3x16x1x256xf32>
 // CHECK:         return %[[UNPACK]] : tensor<3x16x1x256xf32>
 
 // -----
 
 func.func @push_down_unpack_through_expand_on_outer_dims(%5: tensor<?x32x8xf32>, %dim: index, %sz0: index) -> tensor<?x256x256xf32> {
   %6 = tensor.empty(%dim) : tensor<?x256xf32>
-  %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [8] into %6 : tensor<?x32x8xf32> -> tensor<?x256xf32>
+  %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [8] into %6 : tensor<?x32x8xf32> -> tensor<?x256xf32>
   %expanded = tensor.expand_shape %unpack [[0, 1], [2]] output_shape [%sz0, 256, 256] : tensor<?x256xf32> into tensor<?x256x256xf32>
   func.return %expanded : tensor<?x256x256xf32>
 }
@@ -1377,19 +1377,19 @@ func.func @push_down_unpack_through_expand_on_outer_dims(%5: tensor<?x32x8xf32>,
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2], [3]] output_shape [%[[SZ0]], 256, 32, 8] : tensor<?x32x8xf32> into tensor<?x256x32x8xf32>
 // CHECK:         %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] : tensor<?x256x32x8xf32>
 // CHECK:         %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x256x256xf32>
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [2] inner_tiles = [8] into %[[EMPTY]] : tensor<?x256x32x8xf32> -> tensor<?x256x256xf32>
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [2] inner_tiles = [8] into %[[EMPTY]] : tensor<?x256x32x8xf32> -> tensor<?x256x256xf32>
 // CHECK:         return %[[UNPACK]] : tensor<?x256x256xf32>
 
 // -----
 
 func.func @no_push_down_unpack_through_non_divisible_expand(%5: tensor<384x32x8x8xf32>) -> tensor<256x12x256xf32> {
   %6 = tensor.empty() : tensor<3072x256xf32>
-  %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<384x32x8x8xf32> -> tensor<3072x256xf32>
+  %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<384x32x8x8xf32> -> tensor<3072x256xf32>
   %expanded = tensor.expand_shape %unpack [[0, 1], [2]] output_shape [256, 12, 256] : tensor<3072x256xf32> into tensor<256x12x256xf32>
   func.return %expanded : tensor<256x12x256xf32>
 }
 // CHECK-LABEL: func.func @no_push_down_unpack_through_non_divisible_expand
 // CHECK-SAME:      %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[UNPACK]] {{\[}}[0, 1], [2]] output_shape [256, 12, 256] : tensor<3072x256xf32> into tensor<256x12x256xf32>
 // CHECK:         return %[[EXPANDED]] : tensor<256x12x256xf32>
diff --git a/mlir/test/Dialect/Linalg/decompose-tensor-pack-tile.mlir b/mlir/test/Dialect/Linalg/decompose-tensor-pack-tile.mlir
index ec761d9a49436..72fde5490a305 100644
--- a/mlir/test/Dialect/Linalg/decompose-tensor-pack-tile.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-tensor-pack-tile.mlir
@@ -4,7 +4,7 @@
 // RUN: -transform-interpreter  %s | FileCheck %s
 
 func.func @KCRS_to_KCRSsr(%arg0: tensor<1x1x128x64xf32>, %arg1: tensor<1x1x4x8x8x32xf32>) -> tensor<1x1x4x8x8x32xf32> {
-  %0 = tensor.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x128x64xf32> -> tensor<1x1x4x8x8x32xf32>
+  %0 = linalg.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x128x64xf32> -> tensor<1x1x4x8x8x32xf32>
   return %0 : tensor<1x1x4x8x8x32xf32>
 }
 // CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)>
@@ -27,7 +27,7 @@ func.func @KCRS_to_KCRSsr(%arg0: tensor<1x1x128x64xf32>, %arg1: tensor<1x1x4x8x8
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
       %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
       transform.yield
   }
@@ -36,7 +36,7 @@ module attributes {transform.with_named_sequence} {
 // -----
 
 func.func @pad_and_pack(%arg0: tensor<13x15xf32>, %arg1: tensor<2x8x8x2xf32>, %arg2: f32) -> tensor<2x8x8x2xf32> {
-  %0 = tensor.pack %arg0 padding_value(%arg2 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
+  %0 = linalg.pack %arg0 padding_value(%arg2 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
   return %0 : tensor<2x8x8x2xf32>
 }
 // CHECK:       func.func @pad_and_pack
@@ -54,7 +54,7 @@ func.func @pad_and_pack(%arg0: tensor<13x15xf32>, %arg1: tensor<2x8x8x2xf32>, %a
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
       %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
       transform.yield
   }
@@ -64,7 +64,7 @@ module attributes {transform.with_named_sequence} {
 
 
 func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) -> tensor<32x4x32x8xf32> {
-  %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32>
+  %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32>
   return %0 : tensor<32x4x32x8xf32>
 }
 // CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)>
@@ -85,7 +85,7 @@ func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>)
 // CHECK-SAME:          [%[[C]], %[[K]], 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] : tensor<1x1x32x8xf32> into tensor<32x4x32x8xf32>
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
       %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
       transform.yield
   }
diff --git a/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir b/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir
index 1cc1484ed4095..911b453f919c3 100644
--- a/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-tensor-pack.mlir
@@ -5,7 +5,7 @@
 func.func @simple_KCRS_to_KCRSsr(%arg0: tensor<?x?xi32>, %arg1: tensor<1x1x?x1xi32>) -> tensor<1x1x?x1xi32> {
   %c8 = arith.constant 8 : index
   %c5 = arith.constant 5 : i32
-  %pack = tensor.pack %arg0 padding_value(%c5 : i32) inner_dims_pos = [0, 1] inner_tiles = [%c8, 1] into %arg1 : tensor<?x?xi32> -> tensor<1x1x?x1xi32>
+  %pack = linalg.pack %arg0 padding_value(%c5 : i32) inner_dims_pos = [0, 1] inner_tiles = [%c8, 1] into %arg1 : tensor<?x?xi32> -> tensor<1x1x?x1xi32>
   return %pack : tensor<1x1x?x1xi32>
 }
 
@@ -32,7 +32,7 @@ func.func @simple_KCRS_to_KCRSsr(%arg0: tensor<?x?xi32>, %arg1: tensor<1x1x?x1xi
 // -----
 
 func.func @simple_pad_and_pack_static_tiles(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2xf32>, %pad: f32) -> tensor<1x1x8x2xf32> {
-  %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<5x1xf32> -> tensor<1x1x8x2xf32>
+  %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<5x1xf32> -> tensor<1x1x8x2xf32>
   return %0 : tensor<1x1x8x2xf32>
 }
 // CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0] -> (s0 - 5)>
@@ -52,7 +52,7 @@ func.func @simple_pad_and_pack_static_tiles(%input: tensor<5x1xf32>, %output: te
 /// Same as example above, but with 1 dynamic tile size.
 
 func.func @simple_pad_and_pack_dynamic_tile(%input: tensor<5x1xf32>, %output: tensor<1x1x?x2xf32>, %pad: f32, %tile_dim_0: index) -> tensor<1x1x?x2xf32> {
-  %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32>
+  %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32>
   return %0 : tensor<1x1x?x2xf32>
 }
 // CHECK-LABEL:   func.func @simple_pad_and_pack_dynamic_tile(
@@ -72,7 +72,7 @@ func.func @simple_pad_and_pack_dynamic_tile(%input: tensor<5x1xf32>, %output: te
 
 func.func @simple_pad_and_pack_dynamic_tile_cst(%input: tensor<5x1xf32>, %output: tensor<1x1x?x2xf32>, %pad: f32) -> tensor<1x1x?x2xf32> {
   %tile_dim_0 = arith.constant 8 : index
-  %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32>
+  %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32>
   return %0 : tensor<1x1x?x2xf32>
 }
 // CHECK-LABEL:   func.func @simple_pad_and_pack_dynamic_tile_cst(
@@ -86,7 +86,7 @@ func.func @simple_pad_and_pack_dynamic_tile_cst(%input: tensor<5x1xf32>, %output
 // CHECK:           return %[[RES]] : tensor<1x1x?x2xf32>
 
 func.func @simple_pad_and_pack_dynamic_tile_transpose(%input: tensor<5x1xf32>, %output: tensor<1x1x2x?xf32>, %pad: f32, %tile_dim_1: index) -> tensor<1x1x2x?xf32> {
-  %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [1, 0] inner_tiles = [2, %tile_dim_1] into %output : tensor<5x1xf32> -> tensor<1x1x2x?xf32>
+  %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [1, 0] inner_tiles = [2, %tile_dim_1] into %output : tensor<5x1xf32> -> tensor<1x1x2x?xf32>
   return %0 : tensor<1x1x2x?xf32>
 }
 // CHECK-LABEL:   func.func @simple_pad_and_pack_dynamic_tile_transpose(
@@ -116,7 +116,7 @@ func.func @simple_pad_and_pack_scalable_tile(%input: tensor<5x1xf32>, %output: t
   %c8 = arith.constant 8 : index
   %vscale = vector.vscale
   %c8_vscale = arith.muli %vscale, %c8 : index
-  %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%c8_vscale, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32>
+  %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%c8_vscale, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32>
   return %0 : tensor<1x1x?x2xf32>
 }
 
@@ -138,7 +138,7 @@ func.func @simple_pad_and_pack_scalable_tile(%input: tensor<5x1xf32>, %output: t
 /// Same as example above, but with both tile sizes dynamic.
 
 func.func @simple_pad_and_pack_dynamic_tiles(%input: tensor<5x1xf32>, %output: tensor<1x1x?x?xf32>, %pad: f32, %tile_dim_0: index, %tile_dim_1: index) -> tensor<1x1x?x?xf32> {
-  %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, %tile_dim_1] into %output : tensor<5x1xf32> -> tensor<1x1x?x?xf32>
+  %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_dim_0, %tile_dim_1] into %output : tensor<5x1xf32> -> tensor<1x1x?x?xf32>
   return %0 : tensor<1x1x?x?xf32>
 }
 // CHECK-LABEL:   func.func @simple_pad_and_pack_dynamic_tiles(
@@ -158,7 +158,7 @@ func.func @simple_pad_and_pack_dynamic_tiles(%input: tensor<5x1xf32>, %output: t
 // -----
 
 func.func @simple_pad_and_pack_dynamic_tile_not_all_dims_tiled(%input: tensor<1x1x5x1xf32>, %output: tensor<1x1x1x1x2x?xf32>, %pad: f32, %high: index) -> tensor<1x1x1x1x2x?xf32> {
-  %0 = tensor.pack %input padding_value(%pad : f32) outer_dims_perm = [1, 0, 2, 3] inner_dims_pos = [3, 2] inner_tiles = [2, %high] into %output : tensor<1x1x5x1xf32> -> tensor<1x1x1x1x2x?xf32>
+  %0 = linalg.pack %input padding_value(%pad : f32) outer_dims_perm = [1, 0, 2, 3] inner_dims_pos = [3, 2] inner_tiles = [2, %high] into %output : tensor<1x1x5x1xf32> -> tensor<1x1x1x1x2x?xf32>
   return %0 : tensor<1x1x1x1x2x?xf32>
 }
 // CHECK: #[[$ATTR_2:.+]] = affine_map<()[s0] -> (s0 - 5)>
@@ -183,7 +183,7 @@ func.func @simple_pad_and_pack_dynamic_tile_not_all_dims_tiled(%input: tensor<1x
 // -----
 
 func.func @simple_NC_to_CNnc(%arg0: tensor<32x8xf32>, %arg1: tensor<1x1x32x8xf32>) -> tensor<1x1x32x8xf32>{
-  %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x8xf32> -> tensor<1x1x32x8xf32>
+  %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x8xf32> -> tensor<1x1x32x8xf32>
   return %0 : tensor<1x1x32x8xf32>
 }
 // CHECK-LABEL: func.func @simple_NC_to_CNnc
@@ -197,7 +197,7 @@ func.func @simple_NC_to_CNnc(%arg0: tensor<32x8xf32>, %arg1: tensor<1x1x32x8xf32
 // -----
 
 func.func @simple_CHW_to_CHWhwc(%arg0: tensor<3x5x7xf32>, %arg1: tensor<1x1x1x5x7x3xf32>) -> tensor<1x1x1x5x7x3xf32> {
-  %0 = tensor.pack %arg0 inner_dims_pos = [1, 2, 0] inner_tiles = [5, 7, 3] into %arg1 : tensor<3x5x7xf32> -> tensor<1x1x1x5x7x3xf32>
+  %0 = linalg.pack %arg0 inner_dims_pos = [1, 2, 0] inner_tiles = [5, 7, 3] into %arg1 : tensor<3x5x7xf32> -> tensor<1x1x1x5x7x3xf32>
   return %0 : tensor<1x1x1x5x7x3xf32>
 }
 // CHECK-LABEL: func.func @simple_CHW_to_CHWhwc
@@ -215,7 +215,7 @@ func.func @simple_CHW_to_CHWhwc(%arg0: tensor<3x5x7xf32>, %arg1: tensor<1x1x1x5x
 // -----
 
 func.func @simple_KCRS_to_KRSCsr(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<1x1x1x1x8x32xf32>) -> tensor<1x1x1x1x8x32xf32> {
-  %0 = tensor.pack %arg0 outer_dims_perm = [0, 2, 3, 1] inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x32x8xf32> -> tensor<1x1x1x1x8x32xf32>
+  %0 = linalg.pack %arg0 outer_dims_perm = [0, 2, 3, 1] inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x32x8xf32> -> tensor<1x1x1x1x8x32xf32>
   return %0 : tensor<1x1x1x1x8x32xf32>
 }
 // CHECK-LABEL: func.func @simple_KCRS_to_KRSCsr
diff --git a/mlir/test/Dialect/Linalg/decompose-tensor-unpack-tile.mlir b/mlir/test/Dialect/Linalg/decompose-tensor-unpack-tile.mlir
index 0dbdf470bbfc9..03437223f0d45 100644
--- a/mlir/test/Dialect/Linalg/decompose-tensor-unpack-tile.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-tensor-unpack-tile.mlir
@@ -4,13 +4,13 @@
 // RUN: -transform-interpreter  %s | FileCheck %s
 
 func.func @KCRSsr_to_KCRS(%arg0: tensor<1x1x4x8x8x32xf32>, %arg1: tensor<1x1x128x64xf32>) -> tensor<1x1x128x64xf32> {
-  %0 = tensor.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x4x8x8x32xf32> -> tensor<1x1x128x64xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x4x8x8x32xf32> -> tensor<1x1x128x64xf32>
   return %0 : tensor<1x1x128x64xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
     %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 32, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
     transform.yield
   }
@@ -38,7 +38,7 @@ module attributes {transform.with_named_sequence} {
 // -----
 
 func.func @unpack_and_extract_slice(%arg0: tensor<2x8x8x2xf32>, %arg1: tensor<13x15xf32>) -> tensor<13x15xf32> {
-  %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<2x8x8x2xf32> -> tensor<13x15xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<2x8x8x2xf32> -> tensor<13x15xf32>
   return %0 : tensor<13x15xf32>
 }
 // CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0) -> (-d0 + 13, 8)>
@@ -70,7 +70,7 @@ func.func @unpack_and_extract_slice(%arg0: tensor<2x8x8x2xf32>, %arg1: tensor<13
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
     %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [8, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
     transform.yield
   }
@@ -79,7 +79,7 @@ module attributes {transform.with_named_sequence} {
 // -----
 
 func.func @CKkc_to_KC(%arg0: tensor<32x4x32x8xf32>, %arg1: tensor<128x256xf32>) -> tensor<128x256xf32> {
-  %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x4x32x8xf32> -> tensor<128x256xf32>
+  %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x4x32x8xf32> -> tensor<128x256xf32>
   return %0 : tensor<128x256xf32>
 }
 // CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)>
@@ -102,7 +102,7 @@ func.func @CKkc_to_KC(%arg0: tensor<32x4x32x8xf32>, %arg1: tensor<128x256xf32>)
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
     %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [32, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
     transform.yield
   }
diff --git a/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir b/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir
index ba1f214952562..d460c506d6e18 100644
--- a/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-tensor-unpack.mlir
@@ -3,7 +3,7 @@
 // RUN: -transform-interpreter=entry-point=decompose_unpack %s | FileCheck %s
 
 func.func @simple_KCRSsr_to_KCRS(%arg0: tensor<1x1x1x1x8x32xf32>, %arg1: tensor<1x1x32x8xf32>) -> tensor<1x1x32x8xf32> {
-  %0 = tensor.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x1x1x8x32xf32> -> tensor<1x1x32x8xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x1x1x8x32xf32> -> tensor<1x1x32x8xf32>
   return %0 : tensor<1x1x32x8xf32>
 }
 // CHECK-LABEL: func.func @simple_KCRSsr_to_KCRS
@@ -22,7 +22,7 @@ func.func @simple_KCRSsr_to_KCRS(%arg0: tensor<1x1x1x1x8x32xf32>, %arg1: tensor<
 // -----
 
 func.func @simple_unpack_static_tiles(%input: tensor<1x1x8x2xf32>, %output: tensor<5x1xf32>) -> tensor<5x1xf32> {
-  %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<1x1x8x2xf32> -> tensor<5x1xf32>
+  %0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<1x1x8x2xf32> -> tensor<5x1xf32>
   return %0 : tensor<5x1xf32>
 }
 // CHECK-LABEL: func.func @simple_unpack_static_tiles
@@ -38,7 +38,7 @@ func.func @simple_unpack_static_tiles(%input: tensor<1x1x8x2xf32>, %output: tens
 /// Same as example above, but with 1 dynamic tile size.
 
 func.func @simple_unpack_dynamic_tile(%input: tensor<1x1x?x2xf32>, %output: tensor<5x1xf32>, %tile_dim: index) -> tensor<5x1xf32> {
-  %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%tile_dim, 2] into %output : tensor<1x1x?x2xf32> -> tensor<5x1xf32>
+  %0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%tile_dim, 2] into %output : tensor<1x1x?x2xf32> -> tensor<5x1xf32>
   return %0 : tensor<5x1xf32>
 }
 // CHECK-LABEL: func.func @simple_unpack_dynamic_tile
@@ -55,7 +55,7 @@ func.func @simple_unpack_dynamic_tile(%input: tensor<1x1x?x2xf32>, %output: tens
 /// Same as example above, but with 1 dynamic tile size and a trasnpose
 
 func.func @simple_unpack_dynamic_tile_transpose(%src: tensor<1x1x2x?xf32>, %dest: tensor<5x1xf32>, %tile_dim: index) -> tensor<5x1xf32> {
-  %0 = tensor.unpack %src inner_dims_pos = [1, 0] inner_tiles = [2, %tile_dim] into %dest : tensor<1x1x2x?xf32> -> tensor<5x1xf32>
+  %0 = linalg.unpack %src inner_dims_pos = [1, 0] inner_tiles = [2, %tile_dim] into %dest : tensor<1x1x2x?xf32> -> tensor<5x1xf32>
   return %0 : tensor<5x1xf32>
 }
 // CHECK-LABEL:   func.func @simple_unpack_dynamic_tile_transpose
@@ -78,7 +78,7 @@ func.func @simple_unpack_scalable_tile(%input: tensor<1x1x?x2xf32>, %output: ten
   %c8 = arith.constant 8 : index
   %vscale = vector.vscale
   %c8_vscale = arith.muli %vscale, %c8 : index
-  %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%c8_vscale, 2] into %output : tensor<1x1x?x2xf32> -> tensor<5x1xf32>
+  %0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [%c8_vscale, 2] into %output : tensor<1x1x?x2xf32> -> tensor<5x1xf32>
   return %0 : tensor<5x1xf32>
 }
 // CHECK-LABEL: func.func @simple_unpack_scalable_tile
@@ -97,7 +97,7 @@ func.func @simple_unpack_scalable_tile(%input: tensor<1x1x?x2xf32>, %output: ten
 // -----
 
 func.func @simple_CNnc_to_NC(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<32x8xf32>) -> tensor<32x8xf32>{
-  %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<1x1x32x8xf32> -> tensor<32x8xf32>
+  %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<1x1x32x8xf32> -> tensor<32x8xf32>
   return %0 : tensor<32x8xf32>
 }
 // CHECK-LABEL: func.func @simple_CNnc_to_NC
@@ -112,7 +112,7 @@ func.func @simple_CNnc_to_NC(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<32x8xf32
 // -----
 
 func.func @simple_NCHWc_to_NCHW(%arg0: tensor<2x1x16x8x32xf32>, %arg1: tensor<2x32x16x8xf32>) -> tensor<2x32x16x8xf32> {
-  %0 = tensor.unpack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %arg1 : tensor<2x1x16x8x32xf32> -> tensor<2x32x16x8xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %arg1 : tensor<2x1x16x8x32xf32> -> tensor<2x32x16x8xf32>
   return %0 : tensor<2x32x16x8xf32>
 }
 // CHECK-LABEL: func.func @simple_NCHWc_to_NCHW
@@ -131,7 +131,7 @@ func.func @simple_NCHWc_to_NCHW(%arg0: tensor<2x1x16x8x32xf32>, %arg1: tensor<2x
 // -----
 
 func.func @simple_NHWC_to_NCHW(%arg0: tensor<1x16x8x32xf32>, %arg1: tensor<1x32x16x8xf32>) -> tensor<1x32x16x8xf32> {
-  %0 = tensor.unpack %arg0 outer_dims_perm = [0, 2, 3, 1] inner_dims_pos = [] inner_tiles = [] into %arg1 : tensor<1x16x8x32xf32> -> tensor<1x32x16x8xf32>
+  %0 = linalg.unpack %arg0 outer_dims_perm = [0, 2, 3, 1] inner_dims_pos = [] inner_tiles = [] into %arg1 : tensor<1x16x8x32xf32> -> tensor<1x32x16x8xf32>
   return %0 : tensor<1x32x16x8xf32>
 }
 // CHECK-LABEL: func.func @simple_NHWC_to_NCHW
@@ -150,7 +150,7 @@ func.func @simple_NHWC_to_NCHW(%arg0: tensor<1x16x8x32xf32>, %arg1: tensor<1x32x
 // -----
 
 func.func @unpack_with_dynamic_dims(%arg0: tensor<?x1x1x1x8x32xf32>, %arg1: tensor<?x1x32x8xf32>) -> tensor<?x1x32x8xf32> {
-  %0 = tensor.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<?x1x1x1x8x32xf32> -> tensor<?x1x32x8xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<?x1x1x1x8x32xf32> -> tensor<?x1x32x8xf32>
   return %0 : tensor<?x1x32x8xf32>
 }
 // CHECK-LABEL: func.func @unpack_with_dynamic_dims
diff --git a/mlir/test/Dialect/Linalg/fold-empty-op.mlir b/mlir/test/Dialect/Linalg/fold-empty-op.mlir
new file mode 100644
index 0000000000000..5ce19d7091318
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/fold-empty-op.mlir
@@ -0,0 +1,82 @@
+// RUN: mlir-opt -split-input-file -transform-interpreter %s | FileCheck %s
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+    %func_op = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.op<"func.func">
+    transform.apply_patterns to %func_op {
+      transform.apply_patterns.linalg.fold_pack_unpack_into_empty
+    } : !transform.op<"func.func">
+    transform.yield
+  }
+}
+
+func.func @pack_empty(%arg0: tensor<8x8x32x32xf32>) -> tensor<8x8x32x32xf32> {
+  %empty_unpacked = tensor.empty() : tensor<256x256xf32>
+  %packed = linalg.pack %empty_unpacked
+    inner_dims_pos = [0, 1] inner_tiles = [32, 32]
+    into %arg0 : tensor<256x256xf32> -> tensor<8x8x32x32xf32>
+  return %packed : tensor<8x8x32x32xf32>
+}
+
+// CHECK-LABEL: func.func @pack_empty(
+// CHECK-SAME:   %[[T:.+]]: tensor<8x8x32x32xf32>
+// CHECK-NOT:    linalg.pack
+// CHECK:        return %[[T]] : tensor<8x8x32x32xf32>
+
+func.func @pack_empty_dynamic(%arg0: tensor<?x?x32x32xf32>, %dim0: index, %dim1: index) -> tensor<?x?x32x32xf32> {
+  %empty_unpacked = tensor.empty(%dim0, %dim1) : tensor<?x?xf32>
+  %packed = linalg.pack %empty_unpacked
+    inner_dims_pos = [0, 1] inner_tiles = [32, 32]
+    into %arg0 : tensor<?x?xf32> -> tensor<?x?x32x32xf32>
+  return %packed : tensor<?x?x32x32xf32>
+}
+
+// CHECK-LABEL: func.func @pack_empty_dynamic(
+// CHECK-SAME:   %[[T:.+]]: tensor<?x?x32x32xf32>,
+// CHECK-SAME:   %[[DIM0:[a-zA-Z0-9_]+]]: index,
+// CHECK-SAME:   %[[DIM1:[a-zA-Z0-9_]+]]: index
+// CHECK-NOT:    linalg.pack
+// CHECK:        return %[[T]] : tensor<?x?x32x32xf32>
+
+func.func @unpack_empty(%arg0: tensor<256x256xf32>) -> tensor<256x256xf32> {
+  %empty_packed = tensor.empty() : tensor<8x8x32x32xf32>
+  %unpacked = linalg.unpack %empty_packed
+    inner_dims_pos = [0, 1] inner_tiles = [32, 32]
+    into %arg0 : tensor<8x8x32x32xf32> -> tensor<256x256xf32>
+  return %unpacked : tensor<256x256xf32>
+}
+
+// CHECK-LABEL: func.func @unpack_empty(
+// CHECK-SAME:   %[[T:.+]]: tensor<256x256xf32>
+// CHECK-NOT:    linalg.unpack
+// CHECK:        return %[[T]] : tensor<256x256xf32>
+
+func.func @unpack_empty_dynamic(%arg0: tensor<?x?xf32>, %dim0: index, %dim1: index) -> tensor<?x?xf32> {
+  %empty_packed = tensor.empty(%dim0, %dim1) : tensor<?x?x32x32xf32>
+  %unpacked = linalg.unpack %empty_packed
+    inner_dims_pos = [0, 1] inner_tiles = [32, 32]
+    into %arg0 : tensor<?x?x32x32xf32> -> tensor<?x?xf32>
+  return %unpacked : tensor<?x?xf32>
+}
+
+// CHECK-LABEL: func.func @unpack_empty_dynamic(
+// CHECK-SAME:   %[[T:.+]]: tensor<?x?xf32>,
+// CHECK-SAME:   %[[DIM0:[a-zA-Z0-9_]+]]: index,
+// CHECK-SAME:   %[[DIM1:[a-zA-Z0-9_]+]]: index
+// CHECK-NOT:    linalg.unpack
+// CHECK:        return %[[T]] : tensor<?x?xf32>
+
+func.func @pack_padded_empty(%arg0: tensor<8x8x32x32xf32>) -> tensor<8x8x32x32xf32> {
+  %pad = arith.constant 1.0 : f32
+  %empty_unpacked = tensor.empty() : tensor<256x256xf32>
+  %packed = linalg.pack %empty_unpacked
+    padding_value(%pad : f32)
+    inner_dims_pos = [0, 1] inner_tiles = [32, 32]
+    into %arg0 : tensor<256x256xf32> -> tensor<8x8x32x32xf32>
+  return %packed : tensor<8x8x32x32xf32>
+}
+
+// CHECK-LABEL: func.func @pack_padded_empty(
+// CHECK-SAME:   %[[T:.+]]: tensor<8x8x32x32xf32>
+// CHECK:        %[[PACK:.+]] = linalg.pack
+// CHECK:        return %[[PACK]] : tensor<8x8x32x32xf32>
diff --git a/mlir/test/Dialect/Tensor/simplify-pack-unpack.mlir b/mlir/test/Dialect/Linalg/simplify-pack-unpack.mlir
similarity index 86%
rename from mlir/test/Dialect/Tensor/simplify-pack-unpack.mlir
rename to mlir/test/Dialect/Linalg/simplify-pack-unpack.mlir
index f9e51ae52a74b..51350e5bc8498 100644
--- a/mlir/test/Dialect/Tensor/simplify-pack-unpack.mlir
+++ b/mlir/test/Dialect/Linalg/simplify-pack-unpack.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -split-input-file -test-tensor-transform-patterns="test-simplify-pack-unpack-patterns" %s | FileCheck %s
+// RUN: mlir-opt -split-input-file -test-linalg-transform-patterns="test-simplify-pack-unpack-patterns" %s | FileCheck %s
 
 // CHECK-LABEL: func.func @single_dim_packing(
 // CHECK-SAME:    %[[ARG0:.+]]: tensor<256xf32>)
@@ -6,7 +6,7 @@
 // CHECK:         return %[[EXPANDED]] : tensor<8x32xf32>
 func.func @single_dim_packing(%arg0: tensor<256xf32>) -> tensor<8x32xf32> {
   %empty = tensor.empty() : tensor<8x32xf32>
-  %0 = tensor.pack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<256xf32> -> tensor<8x32xf32>
+  %0 = linalg.pack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<256xf32> -> tensor<8x32xf32>
   return %0 : tensor<8x32xf32>
 }
 
@@ -15,11 +15,11 @@ func.func @single_dim_packing(%arg0: tensor<256xf32>) -> tensor<8x32xf32> {
 // CHECK-LABEL: func.func @single_dim_packing_with_padding(
 // CHECK-SAME:    %[[ARG0:.+]]: tensor<255xf32>)
 // CHECK-NOT:     tensor.expand_shape
-// CHECK:         tensor.pack
+// CHECK:         linalg.pack
 func.func @single_dim_packing_with_padding(%arg0: tensor<255xf32>) -> tensor<8x32xf32> {
   %empty = tensor.empty() : tensor<8x32xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %0 = tensor.pack %arg0 padding_value(%cst : f32) inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<255xf32> -> tensor<8x32xf32>
+  %0 = linalg.pack %arg0 padding_value(%cst : f32) inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<255xf32> -> tensor<8x32xf32>
   return %0 : tensor<8x32xf32>
 }
 
@@ -31,7 +31,7 @@ func.func @single_dim_packing_with_padding(%arg0: tensor<255xf32>) -> tensor<8x3
 // CHECK:         return %[[EXPANDED]] : tensor<5x8x32xf32>
 func.func @single_last_inner_dim_packing(%arg0: tensor<5x256xf32>) -> tensor<5x8x32xf32> {
   %empty = tensor.empty() : tensor<5x8x32xf32>
-  %0 = tensor.pack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<5x8x32xf32>
+  %0 = linalg.pack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<5x8x32xf32>
   return %0 : tensor<5x8x32xf32>
 }
 
@@ -43,7 +43,7 @@ func.func @single_last_inner_dim_packing(%arg0: tensor<5x256xf32>) -> tensor<5x8
 // CHECK:         return %[[EXPANDED]] : tensor<2x32xf32>
 func.func @pack_1d_with_outer_dims_perm(%arg0: tensor<64xf32>) -> tensor<2x32xf32> {
   %empty = tensor.empty() :  tensor<2x32xf32>
-  %pack = tensor.pack %arg0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<64xf32> -> tensor<2x32xf32>
+  %pack = linalg.pack %arg0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<64xf32> -> tensor<2x32xf32>
   return %pack : tensor<2x32xf32>
 }
 
@@ -55,7 +55,7 @@ func.func @pack_1d_with_outer_dims_perm(%arg0: tensor<64xf32>) -> tensor<2x32xf3
 // CHECK:         return %[[EXPANDED]] : tensor<5x8x32xf32>
 func.func @single_last_inner_dim_packing_with_identity_outer_dims_perm(%arg0: tensor<5x256xf32>) -> tensor<5x8x32xf32> {
   %empty = tensor.empty() : tensor<5x8x32xf32>
-  %0 = tensor.pack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<5x8x32xf32>
+  %0 = linalg.pack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<5x8x32xf32>
   return %0 : tensor<5x8x32xf32>
 }
 
@@ -63,10 +63,10 @@ func.func @single_last_inner_dim_packing_with_identity_outer_dims_perm(%arg0: te
 
 // CHECK-LABEL: func.func @packing_with_outer_dims_perm(
 // CHECK-NOT:     tensor.expand_shape
-// CHECK:         tensor.pack
+// CHECK:         linalg.pack
 func.func @packing_with_outer_dims_perm(%arg0: tensor<5x256xf32>) -> tensor<8x5x32xf32> {
   %empty = tensor.empty() : tensor<8x5x32xf32>
-  %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<8x5x32xf32>
+  %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x256xf32> -> tensor<8x5x32xf32>
   return %0 : tensor<8x5x32xf32>
 }
 
@@ -74,10 +74,10 @@ func.func @packing_with_outer_dims_perm(%arg0: tensor<5x256xf32>) -> tensor<8x5x
 
 // CHECK-LABEL: func.func @single_first_inner_dim_packing(
 // CHECK-NOT:     tensor.expand_shape
-// CHECK:         tensor.pack
+// CHECK:         linalg.pack
 func.func @single_first_inner_dim_packing(%arg0: tensor<256x5xf32>) -> tensor<8x5x32xf32> {
   %empty = tensor.empty() : tensor<8x5x32xf32>
-  %0 = tensor.pack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<256x5xf32> -> tensor<8x5x32xf32>
+  %0 = linalg.pack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<256x5xf32> -> tensor<8x5x32xf32>
   return %0 : tensor<8x5x32xf32>
 }
 
@@ -89,7 +89,7 @@ func.func @single_first_inner_dim_packing(%arg0: tensor<256x5xf32>) -> tensor<8x
 // CHECK:         return %[[EXPANDED]]
 func.func @pack_1x32_to_1x32x1x1(%arg0 : tensor<1x32xf32>) -> tensor<1x32x1x1xf32> {
   %empty = tensor.empty() : tensor<1x32x1x1xf32>
-  %pack = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 1] into %empty
+  %pack = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 1] into %empty
     : tensor<1x32xf32> -> tensor<1x32x1x1xf32>
   return %pack : tensor<1x32x1x1xf32>
 }
@@ -102,7 +102,7 @@ func.func @pack_1x32_to_1x32x1x1(%arg0 : tensor<1x32xf32>) -> tensor<1x32x1x1xf3
 // CHECK:         return %[[EXPANDED]]
 func.func @pack_1x32_to_1x16x1x2(%arg0 : tensor<1x32xf32>) -> tensor<1x16x1x2xf32> {
   %empty = tensor.empty() : tensor<1x16x1x2xf32>
-  %pack = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 2] into %empty
+  %pack = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 2] into %empty
     : tensor<1x32xf32> -> tensor<1x16x1x2xf32>
   return %pack : tensor<1x16x1x2xf32>
 }
@@ -115,7 +115,7 @@ func.func @pack_1x32_to_1x16x1x2(%arg0 : tensor<1x32xf32>) -> tensor<1x16x1x2xf3
 // CHECK:         return %[[EXPANDED]]
 func.func @pack_32x1_to_16x1x2x1(%arg0 : tensor<32x1xf32>) -> tensor<1x16x2x1xf32> {
   %empty = tensor.empty() : tensor<1x16x2x1xf32>
-  %pack = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 1] into %empty
+  %pack = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 1] into %empty
     : tensor<32x1xf32> -> tensor<1x16x2x1xf32>
   return %pack : tensor<1x16x2x1xf32>
 }
@@ -124,10 +124,10 @@ func.func @pack_32x1_to_16x1x2x1(%arg0 : tensor<32x1xf32>) -> tensor<1x16x2x1xf3
 
 // CHECK-LABEL: func.func @pack_32x1_to_16x1x1x2
 // CHECK-NOT:     tensor.expand_shape
-// CHECK:         tensor.pack
+// CHECK:         linalg.pack
 func.func @pack_32x1_to_16x1x1x2(%arg0 : tensor<32x1xf32>) -> tensor<16x1x1x2xf32> {
   %empty = tensor.empty() : tensor<16x1x1x2xf32>
-  %pack = tensor.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [1, 2] into %empty
+  %pack = linalg.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [1, 2] into %empty
     : tensor<32x1xf32> -> tensor<16x1x1x2xf32>
   return %pack : tensor<16x1x1x2xf32>
 }
@@ -140,7 +140,7 @@ func.func @pack_32x1_to_16x1x1x2(%arg0 : tensor<32x1xf32>) -> tensor<16x1x1x2xf3
 // CHECK:         return %[[COLLAPSED]]
 func.func @unpack_1d_to_collapse(%arg0: tensor<8x32xf32>) -> tensor<256xf32> {
   %empty = tensor.empty() : tensor<256xf32>
-  %0 = tensor.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x32xf32> -> tensor<256xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x32xf32> -> tensor<256xf32>
   return %0 : tensor<256xf32>
 }
 
@@ -148,10 +148,10 @@ func.func @unpack_1d_to_collapse(%arg0: tensor<8x32xf32>) -> tensor<256xf32> {
 
 // CHECK-LABEL: func.func @unpack_to_partial_slice
 // CHECK-NOT:     tensor.collapse
-// CHECK:         tensor.unpack
+// CHECK:         linalg.unpack
 func.func @unpack_to_partial_slice(%arg0: tensor<8x32xf32>) -> tensor<255xf32> {
   %empty = tensor.empty() : tensor<255xf32>
-  %0 = tensor.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x32xf32> -> tensor<255xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x32xf32> -> tensor<255xf32>
   return %0 : tensor<255xf32>
 }
 
@@ -159,14 +159,14 @@ func.func @unpack_to_partial_slice(%arg0: tensor<8x32xf32>) -> tensor<255xf32> {
 
 // CHECK-LABEL: func.func @unpack_dynamic
 // CHECK-NOT:     tensor.collapse
-// CHECK:         tensor.unpack
+// CHECK:         linalg.unpack
 func.func @unpack_dynamic(%arg0: tensor<?x32xf32>) -> tensor<?xf32> {
   %c32 = arith.constant 32 : index
   %c0 = arith.constant 0 : index
   %d0 = tensor.dim %arg0, %c0 : tensor<?x32xf32>
   %size = arith.muli %d0, %c32 : index
   %empty = tensor.empty(%size) : tensor<?xf32>
-  %0 = tensor.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<?x32xf32> -> tensor<?xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<?x32xf32> -> tensor<?xf32>
   return %0 : tensor<?xf32>
 }
 
@@ -178,7 +178,7 @@ func.func @unpack_dynamic(%arg0: tensor<?x32xf32>) -> tensor<?xf32> {
 // CHECK:         return %[[COLLAPSED]] : tensor<5x256xf32>
 func.func @single_last_inner_dim_unpacking(%arg0: tensor<5x8x32xf32>) -> tensor<5x256xf32> {
   %empty = tensor.empty() : tensor<5x256xf32>
-  %0 = tensor.unpack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x8x32xf32> -> tensor<5x256xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x8x32xf32> -> tensor<5x256xf32>
   return %0 : tensor<5x256xf32>
 }
 
@@ -190,7 +190,7 @@ func.func @single_last_inner_dim_unpacking(%arg0: tensor<5x8x32xf32>) -> tensor<
 // CHECK:         return %[[COLLAPSED]] : tensor<5x256xf32>
 func.func @single_last_inner_dim_unpacking_with_identity_outer_dims_perm(%arg0: tensor<5x8x32xf32>) -> tensor<5x256xf32> {
   %empty = tensor.empty() : tensor<5x256xf32>
-  %0 = tensor.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x8x32xf32> -> tensor<5x256xf32>
+  %0 = linalg.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<5x8x32xf32> -> tensor<5x256xf32>
   return %0 : tensor<5x256xf32>
 }
 
@@ -198,10 +198,10 @@ func.func @single_last_inner_dim_unpacking_with_identity_outer_dims_perm(%arg0:
 
 // CHECK-LABEL: func.func @unpacking_with_outer_dims_perm(
 // CHECK-NOT:     tensor.collpase_shape
-// CHECK:         tensor.unpack
+// CHECK:         linalg.unpack
 func.func @unpacking_with_outer_dims_perm(%arg0: tensor<8x5x32xf32>) -> tensor<5x256xf32> {
   %empty = tensor.empty() : tensor<5x256xf32>
-  %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<8x5x32xf32> -> tensor<5x256xf32>
+  %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1] inner_tiles = [32] into %empty : tensor<8x5x32xf32> -> tensor<5x256xf32>
   return %0 : tensor<5x256xf32>
 }
 
@@ -209,10 +209,10 @@ func.func @unpacking_with_outer_dims_perm(%arg0: tensor<8x5x32xf32>) -> tensor<5
 
 // CHECK-LABEL: func.func @single_first_inner_dim_unpacking(
 // CHECK-NOT:     tensor.collapse_shape
-// CHECK:         tensor.unpack
+// CHECK:         linalg.unpack
 func.func @single_first_inner_dim_unpacking(%arg0: tensor<8x5x32xf32>) -> tensor<256x5xf32> {
   %empty = tensor.empty() : tensor<256x5xf32>
-  %0 = tensor.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x5x32xf32> -> tensor<256x5xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [0] inner_tiles = [32] into %empty : tensor<8x5x32xf32> -> tensor<256x5xf32>
   return %0 : tensor<256x5xf32>
 }
 
@@ -224,7 +224,7 @@ func.func @single_first_inner_dim_unpacking(%arg0: tensor<8x5x32xf32>) -> tensor
 // CHECK:         return %[[COLLAPSED]]
 func.func @unpack_1x32x1x1_to_1x32(%arg0 : tensor<1x32x1x1xf32>) -> tensor<1x32xf32> {
   %empty = tensor.empty() : tensor<1x32xf32>
-  %unpack = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 1] into %empty
+  %unpack = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [1, 1] into %empty
     : tensor<1x32x1x1xf32> -> tensor<1x32xf32>
   return %unpack : tensor<1x32xf32>
 }
@@ -237,7 +237,7 @@ func.func @unpack_1x32x1x1_to_1x32(%arg0 : tensor<1x32x1x1xf32>) -> tensor<1x32x
 // CHECK:         return %[[COLLAPSED]]
 func.func @unpack_1x2x1x16_to_1x32(%arg0 : tensor<1x2x1x16xf32>) -> tensor<1x32xf32> {
   %empty = tensor.empty() : tensor<1x32xf32>
-  %unpack = tensor.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [1, 16] into %empty
+  %unpack = linalg.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [1, 16] into %empty
     : tensor<1x2x1x16xf32> -> tensor<1x32xf32>
   return %unpack : tensor<1x32xf32>
 }
@@ -250,7 +250,7 @@ func.func @unpack_1x2x1x16_to_1x32(%arg0 : tensor<1x2x1x16xf32>) -> tensor<1x32x
 // CHECK:         return %[[COLLAPSED]]
 func.func @unpack_16x1x2x1_to_32x1(%arg0 : tensor<1x16x2x1xf32>) -> tensor<32x1xf32> {
   %empty = tensor.empty() : tensor<32x1xf32>
-  %unpack = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 1] into %empty
+  %unpack = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 1] into %empty
     : tensor<1x16x2x1xf32> -> tensor<32x1xf32>
   return %unpack : tensor<32x1xf32>
 }
@@ -259,10 +259,10 @@ func.func @unpack_16x1x2x1_to_32x1(%arg0 : tensor<1x16x2x1xf32>) -> tensor<32x1x
 
 // CHECK-LABEL: func.func @unpack_16x1x1x2_to_32x1
 // CHECK-NOT:     tensor.collapse_shape
-// CHECK:         tensor.unpack
+// CHECK:         linalg.unpack
 func.func @unpack_16x1x1x2_to_32x1(%arg0 : tensor<16x1x1x2xf32>) -> tensor<32x1xf32> {
   %empty = tensor.empty() : tensor<32x1xf32>
-  %unpack = tensor.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [1, 2] into %empty
+  %unpack = linalg.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [1, 2] into %empty
     : tensor<16x1x1x2xf32> -> tensor<32x1xf32>
   return %unpack : tensor<32x1xf32>
 }
@@ -275,7 +275,7 @@ func.func @unpack_16x1x1x2_to_32x1(%arg0 : tensor<16x1x1x2xf32>) -> tensor<32x1x
 // CHECK:         return %[[EXPANDED]] : tensor<1x1x32x64xf32>
 func.func @pad_like_pack(%arg0: tensor<32x64xf32>) -> tensor<1x1x32x64xf32> {
   %empty = tensor.empty() : tensor<1x1x32x64xf32>
-  %0 = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<32x64xf32> -> tensor<1x1x32x64xf32>
+  %0 = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<32x64xf32> -> tensor<1x1x32x64xf32>
   return %0 : tensor<1x1x32x64xf32>
 }
 
@@ -287,7 +287,7 @@ func.func @pad_like_pack(%arg0: tensor<32x64xf32>) -> tensor<1x1x32x64xf32> {
 // CHECK:         return %[[EXPANDED]] : tensor<1x1x32x64xf32>
 func.func @pad_like_pack_with_outer_dims_perm(%arg0: tensor<32x64xf32>) -> tensor<1x1x32x64xf32> {
   %empty = tensor.empty() : tensor<1x1x32x64xf32>
-  %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<32x64xf32> -> tensor<1x1x32x64xf32>
+  %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<32x64xf32> -> tensor<1x1x32x64xf32>
   return %0 : tensor<1x1x32x64xf32>
 }
 
@@ -299,7 +299,7 @@ func.func @pad_like_pack_with_outer_dims_perm(%arg0: tensor<32x64xf32>) -> tenso
 // CHECK:         return %[[EXPANDED]] : tensor<32x1x64xf32>
 func.func @inner_pad_like_pack(%arg0: tensor<32x64xf32>) -> tensor<32x1x64xf32> {
   %empty = tensor.empty() : tensor<32x1x64xf32>
-  %0 = tensor.pack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x64xf32> -> tensor<32x1x64xf32>
+  %0 = linalg.pack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x64xf32> -> tensor<32x1x64xf32>
   return %0 : tensor<32x1x64xf32>
 }
 
@@ -309,11 +309,11 @@ func.func @inner_pad_like_pack(%arg0: tensor<32x64xf32>) -> tensor<32x1x64xf32>
 // CHECK-LABEL: func.func @pad_and_inner_dim_shuffle_pack(
 // CHECK-SAME:    %[[ARG0:.+]]: tensor<32x64xf32>)
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<1x1x64x32xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1, 0] inner_tiles = [64, 32] into %[[EMPTY]] : tensor<32x64xf32> -> tensor<1x1x64x32xf32>
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1, 0] inner_tiles = [64, 32] into %[[EMPTY]] : tensor<32x64xf32> -> tensor<1x1x64x32xf32>
 // CHECK:         return %[[PACK]] : tensor<1x1x64x32xf32>
 func.func @pad_and_inner_dim_shuffle_pack(%arg0: tensor<32x64xf32>) -> tensor<1x1x64x32xf32> {
   %empty = tensor.empty() : tensor<1x1x64x32xf32>
-  %0 = tensor.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [64, 32] into %empty : tensor<32x64xf32> -> tensor<1x1x64x32xf32>
+  %0 = linalg.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [64, 32] into %empty : tensor<32x64xf32> -> tensor<1x1x64x32xf32>
   return %0 : tensor<1x1x64x32xf32>
 }
 
@@ -323,11 +323,11 @@ func.func @pad_and_inner_dim_shuffle_pack(%arg0: tensor<32x64xf32>) -> tensor<1x
 // CHECK-LABEL: func.func @pad_like_pack_with_transpose(
 // CHECK-SAME:    %[[ARG0:.+]]: tensor<32x64x16xf32>)
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<32x1x16x64xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [64] into %[[EMPTY]] : tensor<32x64x16xf32> -> tensor<32x1x16x64xf32>
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [64] into %[[EMPTY]] : tensor<32x64x16xf32> -> tensor<32x1x16x64xf32>
 // CHECK:         return %[[PACK]] : tensor<32x1x16x64xf32>
 func.func @pad_like_pack_with_transpose(%arg0: tensor<32x64x16xf32>) -> tensor<32x1x16x64xf32> {
   %empty = tensor.empty() : tensor<32x1x16x64xf32>
-  %0 = tensor.pack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x64x16xf32> -> tensor<32x1x16x64xf32>
+  %0 = linalg.pack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x64x16xf32> -> tensor<32x1x16x64xf32>
   return %0 : tensor<32x1x16x64xf32>
 }
 
@@ -339,7 +339,7 @@ func.func @pad_like_pack_with_transpose(%arg0: tensor<32x64x16xf32>) -> tensor<3
 // CHECK:         return %[[COLLAPSED]] : tensor<32x64xf32>
 func.func @unpad_like_unpack(%arg0: tensor<1x1x32x64xf32>) -> tensor<32x64xf32> {
   %empty = tensor.empty() : tensor<32x64xf32>
-  %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<32x64xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<32x64xf32>
   return %0 : tensor<32x64xf32>
 }
 
@@ -351,7 +351,7 @@ func.func @unpad_like_unpack(%arg0: tensor<1x1x32x64xf32>) -> tensor<32x64xf32>
 // CHECK:         return %[[COLLAPSED]] : tensor<32x64xf32>
 func.func @unpad_like_unpack_with_outer_dims_perm(%arg0: tensor<1x1x32x64xf32>) -> tensor<32x64xf32> {
   %empty = tensor.empty() : tensor<32x64xf32>
-  %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<32x64xf32>
+  %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<32x64xf32>
   return %0 : tensor<32x64xf32>
 }
 
@@ -363,7 +363,7 @@ func.func @unpad_like_unpack_with_outer_dims_perm(%arg0: tensor<1x1x32x64xf32>)
 // CHECK:         return %[[COLLAPSED]] : tensor<32x64xf32>
 func.func @inner_unpad_like_unpack(%arg0: tensor<32x1x64xf32>) -> tensor<32x64xf32> {
   %empty = tensor.empty() : tensor<32x64xf32>
-  %0 = tensor.unpack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x1x64xf32> -> tensor<32x64xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x1x64xf32> -> tensor<32x64xf32>
   return %0 : tensor<32x64xf32>
 }
 
@@ -373,11 +373,11 @@ func.func @inner_unpad_like_unpack(%arg0: tensor<32x1x64xf32>) -> tensor<32x64xf
 // CHECK-LABEL: func.func @unpad_and_inner_dim_shuffle_pack(
 // CHECK-SAME:    %[[ARG0:.+]]: tensor<1x1x32x64xf32>)
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<64x32xf32>
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] inner_dims_pos = [1, 0] inner_tiles = [32, 64] into %[[EMPTY]] : tensor<1x1x32x64xf32> -> tensor<64x32xf32>
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] inner_dims_pos = [1, 0] inner_tiles = [32, 64] into %[[EMPTY]] : tensor<1x1x32x64xf32> -> tensor<64x32xf32>
 // CHECK:         return %[[UNPACK]] : tensor<64x32xf32>
 func.func @unpad_and_inner_dim_shuffle_pack(%arg0: tensor<1x1x32x64xf32>) -> tensor<64x32xf32> {
   %empty = tensor.empty() : tensor<64x32xf32>
-  %0 = tensor.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<64x32xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [32, 64] into %empty : tensor<1x1x32x64xf32> -> tensor<64x32xf32>
   return %0 : tensor<64x32xf32>
 }
 
@@ -387,10 +387,10 @@ func.func @unpad_and_inner_dim_shuffle_pack(%arg0: tensor<1x1x32x64xf32>) -> ten
 // CHECK-LABEL: func.func @unpad_like_unpack_with_transpose(
 // CHECK-SAME:    %[[ARG0:.+]]: tensor<32x1x16x64xf32>)
 // CHECK:         %[[EMPTY:.+]] = tensor.empty() : tensor<32x64x16xf32>
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [64] into %[[EMPTY]] : tensor<32x1x16x64xf32> -> tensor<32x64x16xf32>
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] inner_dims_pos = [1] inner_tiles = [64] into %[[EMPTY]] : tensor<32x1x16x64xf32> -> tensor<32x64x16xf32>
 // CHECK:         return %[[UNPACK]] : tensor<32x64x16xf32>
 func.func @unpad_like_unpack_with_transpose(%arg0: tensor<32x1x16x64xf32>) -> tensor<32x64x16xf32> {
   %empty = tensor.empty() : tensor<32x64x16xf32>
-  %0 = tensor.unpack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x1x16x64xf32> -> tensor<32x64x16xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [64] into %empty : tensor<32x1x16x64xf32> -> tensor<32x64x16xf32>
   return %0 : tensor<32x64x16xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/td/decompose-pack.mlir b/mlir/test/Dialect/Linalg/td/decompose-pack.mlir
index 49c45e29d5a14..32054134266c7 100644
--- a/mlir/test/Dialect/Linalg/td/decompose-pack.mlir
+++ b/mlir/test/Dialect/Linalg/td/decompose-pack.mlir
@@ -1,6 +1,6 @@
 module @transforms attributes { transform.with_named_sequence } {
   transform.named_sequence @decompose_pack(%module: !transform.any_op {transform.readonly}) {
-    %pack = transform.structured.match ops{["tensor.pack"]} in %module : (!transform.any_op) -> !transform.any_op
+    %pack = transform.structured.match ops{["linalg.pack"]} in %module : (!transform.any_op) -> !transform.any_op
 
     %1 = transform.get_parent_op %pack {isolated_from_above} : (!transform.any_op) -> !transform.any_op
     transform.apply_patterns to %1 {
diff --git a/mlir/test/Dialect/Linalg/td/decompose-unpack.mlir b/mlir/test/Dialect/Linalg/td/decompose-unpack.mlir
index 11243634262e0..f5b8403af5e58 100644
--- a/mlir/test/Dialect/Linalg/td/decompose-unpack.mlir
+++ b/mlir/test/Dialect/Linalg/td/decompose-unpack.mlir
@@ -1,6 +1,6 @@
 module @transforms attributes { transform.with_named_sequence } {
   transform.named_sequence @decompose_unpack(%module: !transform.any_op {transform.readonly}) {
-    %pack = transform.structured.match ops{["tensor.unpack"]} in %module : (!transform.any_op) -> !transform.any_op
+    %pack = transform.structured.match ops{["linalg.unpack"]} in %module : (!transform.any_op) -> !transform.any_op
 
     %1 = transform.get_parent_op %pack {isolated_from_above} : (!transform.any_op) -> !transform.any_op
     transform.apply_patterns to %1 {
diff --git a/mlir/test/Dialect/Linalg/transform-lower-pack.mlir b/mlir/test/Dialect/Linalg/transform-lower-pack.mlir
index 5f8ff36a16578..81fd7a8a947d7 100644
--- a/mlir/test/Dialect/Linalg/transform-lower-pack.mlir
+++ b/mlir/test/Dialect/Linalg/transform-lower-pack.mlir
@@ -4,7 +4,7 @@
 func.func @pack(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<17x2x16x16x32x8xf32>) -> tensor<17x2x16x16x32x8xf32> {
   %cst_0 = arith.constant 0.0 : f32
 
-  // tensor.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose
+  // linalg.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose
   //      CHECK: tensor.pad {{.*}} low[0, 0, 0, 0]
   //      CHECK:   : tensor<129x47x16x16xf32> to tensor<136x64x16x16xf32>
   //      CHECK: tensor.expand_shape %{{.*}} [{{.*}}[0, 1], [2, 3], [4], [5]]
@@ -13,16 +13,16 @@ func.func @pack(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<17x2x16x16x32x8xf
   // CHECK-SAME:   ins(%{{.*}} : tensor<17x8x2x32x16x16xf32>)
   // CHECK-SAME:   outs(%{{.*}} : tensor<17x2x16x16x32x8xf32>)
   // CHECK-SAME:   permutation = [0, 2, 4, 5, 3, 1]
-  %pack = tensor.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1
+  %pack = linalg.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1
     : tensor<129x47x16x16xf32> -> tensor<17x2x16x16x32x8xf32>
   return %pack : tensor<17x2x16x16x32x8xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.pack">
-    transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+    %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.pack">
+    transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
       -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
       transform.yield
   }
@@ -33,7 +33,7 @@ module attributes {transform.with_named_sequence} {
   // CHECK-LABEL: func.func @pack(
 func.func @pack(%arg0: tensor<128x8xf32>, %arg1: tensor<8x8x16x1xf32>) -> tensor<8x8x16x1xf32> {
 
-  // tensor.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose
+  // linalg.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose
   //      CHECK: tensor.pad {{.*}} low[0, 0]
   //      CHECK:   : tensor<128x8xf32> to tensor<128x8xf32>
   //      CHECK: tensor.expand_shape %{{.*}} [{{.*}}[0, 1], [2, 3]]
@@ -43,7 +43,7 @@ func.func @pack(%arg0: tensor<128x8xf32>, %arg1: tensor<8x8x16x1xf32>) -> tensor
   // CHECK-SAME:   outs(%{{.*}} : tensor<8x8x16x1xf32>)
   // CHECK-SAME:   permutation = [0, 2, 1, 3]
 
-  %pack = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %arg1
+  %pack = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %arg1
     : tensor<128x8xf32> -> tensor<8x8x16x1xf32>
 
   return %pack : tensor<8x8x16x1xf32>
@@ -51,9 +51,9 @@ func.func @pack(%arg0: tensor<128x8xf32>, %arg1: tensor<8x8x16x1xf32>) -> tensor
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.pack">
-    transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+    %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.pack">
+    transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
       -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
       transform.yield
   }
@@ -67,7 +67,7 @@ module attributes {transform.with_named_sequence} {
 func.func @pack_as_pad(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x1x1x136x64x16x16xf32>) -> tensor<1x1x1x1x136x64x16x16xf32> {
   %cst_0 = arith.constant 0.0 : f32
 
-  // tensor.pack is lowered to tensor.pad + tensor.insert_slice
+  // linalg.pack is lowered to tensor.pad + tensor.insert_slice
   //      CHECK: %[[PAD:.*]] = tensor.pad %[[SRC]] low[0, 0, 0, 0] high[7, 17, 0, 0]
   //      CHECK:   : tensor<129x47x16x16xf32> to tensor<136x64x16x16xf32>
   //      CHECK: %[[RES:.*]] = tensor.insert_slice %[[PAD]] into %[[OUT]]
@@ -79,16 +79,16 @@ func.func @pack_as_pad(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x1x1x13
   // CHECK-SAME:   [1, 1, 1, 1, 1, 1, 1, 1]
   // CHECK-SAME:   : tensor<136x64x16x16xf32> into tensor<1x1x1x1x136x64x16x16xf32>
   //      CHECK: return %[[RES]]
-  %pack = tensor.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
+  %pack = linalg.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
     : tensor<129x47x16x16xf32> -> tensor<1x1x1x1x136x64x16x16xf32>
   return %pack :  tensor<1x1x1x1x136x64x16x16xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.pack">
-    transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+    %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.pack">
+    transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
       -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
       transform.yield
   }
@@ -101,22 +101,22 @@ module attributes {transform.with_named_sequence} {
 // CHECK-LABEL: func.func @pack_as_pad_disabled_insert_slice(
 func.func @pack_as_pad_disabled_insert_slice(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x1x1x136x64x16x16xf32>) -> tensor<1x1x1x1x136x64x16x16xf32> {
   %cst_0 = arith.constant 0.0 : f32
-  // tensor.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose
+  // linalg.pack is lowered to tensor.pad + tensor.expand_shape + linalg.transpose
   // CHECK-SAME: %[[ARG0:[^:]*]]: tensor<129x47x16x16xf32>
   //  CHECK-DAG: %[[PAD:.*]] = tensor.pad %[[ARG0]]
   //  CHECK-NOT: %[[RES:.*]] = tensor.insert_slice %[[PAD]]
   //      CHECK: %[[PAD_EXPANDED:.*]] = tensor.expand_shape %[[PAD]]
   //  CHECK-DAG: %[[RES:.*]] = linalg.transpose ins(%[[PAD_EXPANDED]]
-  %pack = tensor.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
+  %pack = linalg.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
     : tensor<129x47x16x16xf32> -> tensor<1x1x1x1x136x64x16x16xf32>
   return %pack :  tensor<1x1x1x1x136x64x16x16xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.pack">
-    transform.structured.lower_pack %pack {lowerPadLikeWithInsertSlice = false}: (!transform.op<"tensor.pack">)
+    %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.pack">
+    transform.structured.lower_pack %pack {lowerPadLikeWithInsertSlice = false}: (!transform.op<"linalg.pack">)
       -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
       transform.yield
   }
@@ -141,16 +141,16 @@ func.func @pack_not_a_pad(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x16x
   // CHECK-SAME:   outs(%{{.*}} : tensor<1x1x16x16x136x64xf32>)
   // CHECK-SAME:   permutation = [0, 2, 4, 5, 1, 3]
 
-  %pack = tensor.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1] inner_tiles = [136, 64] into %arg1
+  %pack = linalg.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [0, 1] inner_tiles = [136, 64] into %arg1
     : tensor<129x47x16x16xf32> -> tensor<1x1x16x16x136x64xf32>
   return %pack :  tensor<1x1x16x16x136x64xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.pack">
-    transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+    %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.pack">
+    transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
       -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
       transform.yield
   }
@@ -172,16 +172,16 @@ func.func @unpack(%arg0: tensor<17x2x16x16x32x8xf32>, %arg1: tensor<129x47x16x16
   // CHECK-SAME:   : tensor<136x64x16x16xf32> to tensor<129x47x16x16xf32>
   //      CHECK: linalg.copy ins(%[[SLICE]] : tensor<129x47x16x16xf32>)
   // CHECK-SAME:        outs(%[[ARG1]] : tensor<129x47x16x16xf32>)
-  %unpack = tensor.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1
+  %unpack = linalg.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1
     : tensor<17x2x16x16x32x8xf32> -> tensor<129x47x16x16xf32>
   return %unpack : tensor<129x47x16x16xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.unpack">
-    transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+    %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.unpack">
+    transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
       -> (!transform.op<"tensor.empty">,
           !transform.op<"linalg.transpose">,
           !transform.op<"tensor.collapse_shape">,
@@ -207,16 +207,16 @@ func.func @unpack_with_identity_outer_dims_perm(%arg0: tensor<17x2x16x16x32x8xf3
   // CHECK-SAME:   : tensor<136x64x16x16xf32> to tensor<129x47x16x16xf32>
   //      CHECK: linalg.copy ins(%[[SLICE]] : tensor<129x47x16x16xf32>)
   // CHECK-SAME:        outs(%[[ARG1]] : tensor<129x47x16x16xf32>)
-  %unpack = tensor.unpack %arg0 outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1
+  %unpack = linalg.unpack %arg0 outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1
     : tensor<17x2x16x16x32x8xf32> -> tensor<129x47x16x16xf32>
   return %unpack : tensor<129x47x16x16xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.unpack">
-    transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+    %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.unpack">
+    transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
       -> (!transform.op<"tensor.empty">,
           !transform.op<"linalg.transpose">,
           !transform.op<"tensor.collapse_shape">,
@@ -241,16 +241,16 @@ func.func @unpack_as_pad(%arg0: tensor<1x1x1x1x136x64x16x16xf32>, %arg1: tensor<
   // strides multiplers.
   // CHECK-SAME:   [1, 1, 1, 1, 1, 1, 1, 1]
   // CHECK-SAME:   : tensor<1x1x1x1x136x64x16x16xf32> to tensor<129x47x16x16xf32>
-  %pack = tensor.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
+  %pack = linalg.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
     : tensor<1x1x1x1x136x64x16x16xf32> -> tensor<129x47x16x16xf32>
   return %pack : tensor<129x47x16x16xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.unpack">
-    transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+    %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.unpack">
+    transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
       -> (!transform.op<"tensor.empty">,
           !transform.op<"linalg.transpose">,
           !transform.op<"tensor.collapse_shape">,
@@ -267,22 +267,22 @@ module attributes {transform.with_named_sequence} {
 func.func @unpack_as_pad_disabled_extract_slice(%arg0: tensor<1x1x1x1x136x64x16x16xf32>, %arg1: tensor<129x47x16x16xf32>) -> tensor<129x47x16x16xf32> {
   %cst_0 = arith.constant 0.0 : f32
 
-  // tensor.unpack is lowered to tensor.extract_slice + linalg.transpose + tensor.collapse_shape
+  // linalg.unpack is lowered to tensor.extract_slice + linalg.transpose + tensor.collapse_shape
   // CHECK-DAG: %[[ARG0:[^:]*]]: tensor<1x1x1x1x136x64x16x16xf32>
   // CHECK-NOT: %[[RES:.*]] = tensor.extract_slice %[[ARG0]]
   //     CHECK: %[[TRANSPOSED:.*]] = linalg.transpose ins(%[[ARG0]]
   //     CHECK: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[TRANSPOSED]]
   // CHECK-DAG: %[[RES:.*]] = tensor.extract_slice %[[COLLAPSED]]
-  %pack = tensor.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
+  %pack = linalg.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
     : tensor<1x1x1x1x136x64x16x16xf32> -> tensor<129x47x16x16xf32>
   return %pack : tensor<129x47x16x16xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.unpack">
-    transform.structured.lower_unpack %unpack {lowerUnpadLikeWithExtractSlice = false}: (!transform.op<"tensor.unpack">)
+    %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.unpack">
+    transform.structured.lower_unpack %unpack {lowerUnpadLikeWithExtractSlice = false}: (!transform.op<"linalg.unpack">)
       -> (!transform.op<"tensor.empty">,
           !transform.op<"linalg.transpose">,
           !transform.op<"tensor.collapse_shape">,
@@ -305,7 +305,7 @@ func.func @pack_with_outer_dims_perm(%src: tensor<100x200x128x256xi32>,
   // CHECK-SAME:   ins(%{{.*}} : tensor<100x200x4x32x16x16xi32>)
   // CHECK-SAME:   outs(%{{.*}} : tensor<200x4x16x100x16x32xi32>)
   // CHECK-SAME:   permutation = [1, 2, 4, 0, 5, 3]
-  %0 = tensor.pack %src
+  %0 = linalg.pack %src
     outer_dims_perm = [1, 2, 3, 0]
     inner_dims_pos = [3, 2]
     inner_tiles = [16, 32]
@@ -315,9 +315,9 @@ func.func @pack_with_outer_dims_perm(%src: tensor<100x200x128x256xi32>,
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.pack">
-    transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+    %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.pack">
+    transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
       -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
       transform.yield
   }
@@ -337,7 +337,7 @@ func.func @pack_with_pad(%src: tensor<4225x12xf32>, %dest: tensor<265x16x16x1xf3
   // CHECK-SAME:   outs(%{{[a-zA-Z0-9]*}} : tensor<265x16x16x1xf32>)
   // CHECK-SAME:   permutation = [0, 2, 1, 3]
   %cst = arith.constant 0.000000e+00 : f32
-  %0 = tensor.pack %src
+  %0 = linalg.pack %src
     padding_value(%cst : f32)
     inner_dims_pos = [0, 1]
     inner_tiles = [16, 1] into %dest
@@ -347,9 +347,9 @@ func.func @pack_with_pad(%src: tensor<4225x12xf32>, %dest: tensor<265x16x16x1xf3
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.pack">
-    transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+    %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.pack">
+    transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
       -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
       transform.yield
   }
@@ -370,7 +370,7 @@ func.func @pack_with_pad_and_outer_dims_perm(%src: tensor<100x200x127x255xi32>,
   // CHECK-SAME:   outs(%{{.*}} : tensor<200x4x16x100x16x32xi32>)
   // CHECK-SAME:   permutation = [1, 2, 4, 0, 5, 3]
   %cst_0 = arith.constant 0 : i32
-  %0 = tensor.pack %src
+  %0 = linalg.pack %src
     padding_value(%cst_0 : i32)
     outer_dims_perm = [1, 2, 3, 0]
     inner_dims_pos = [3, 2]
@@ -381,9 +381,9 @@ func.func @pack_with_pad_and_outer_dims_perm(%src: tensor<100x200x127x255xi32>,
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.pack">
-    transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+    %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.pack">
+    transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
       -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
       transform.yield
   }
@@ -429,7 +429,7 @@ func.func @dynamic_pack_pad_transpose_inner_and_outer_dims(%source: tensor<?x?xf
   %tiled_d0 = arith.ceildivui %d0, %c32 : index
   %tiled_d1 = arith.ceildivui %d1, %c16 : index
   %init_pack = tensor.empty(%tiled_d1, %tiled_d0) : tensor<?x?x16x32xf32>
-  %pack = tensor.pack %source padding_value(%padding_value : f32)
+  %pack = linalg.pack %source padding_value(%padding_value : f32)
       outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack
       : tensor<?x?xf32> -> tensor<?x?x16x32xf32>
   return %pack : tensor<?x?x16x32xf32>
@@ -437,9 +437,9 @@ func.func @dynamic_pack_pad_transpose_inner_and_outer_dims(%source: tensor<?x?xf
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.pack">
-    transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+    %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.pack">
+    transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
       -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
       transform.yield
   }
@@ -453,7 +453,7 @@ module attributes {transform.with_named_sequence} {
 func.func @pack_as_pad_with_outer_dims_perm(%arg0: tensor<129x47x16x16xf32>, %arg1: tensor<1x1x1x1x136x64x16x16xf32>) -> tensor<1x1x1x1x136x64x16x16xf32> {
   %cst_0 = arith.constant 0.0 : f32
 
-  // tensor.pack is lowered to tensor.pad + tensor.insert_slice
+  // linalg.pack is lowered to tensor.pad + tensor.insert_slice
   //      CHECK: %[[PAD:.*]] = tensor.pad %[[SRC]] low[0, 0, 0, 0] high[7, 17, 0, 0]
   //      CHECK:   : tensor<129x47x16x16xf32> to tensor<136x64x16x16xf32>
   //      CHECK: %[[RES:.*]] = tensor.insert_slice %[[PAD]] into %[[OUT]]
@@ -465,7 +465,7 @@ func.func @pack_as_pad_with_outer_dims_perm(%arg0: tensor<129x47x16x16xf32>, %ar
   // CHECK-SAME:   [1, 1, 1, 1, 1, 1, 1, 1]
   // CHECK-SAME:   : tensor<136x64x16x16xf32> into tensor<1x1x1x1x136x64x16x16xf32>
   //      CHECK: return %[[RES]]
-  %pack = tensor.pack %arg0
+  %pack = linalg.pack %arg0
     padding_value(%cst_0 : f32)
     outer_dims_perm = [1, 2, 3, 0]
     inner_dims_pos = [0, 1, 2, 3]
@@ -476,9 +476,9 @@ func.func @pack_as_pad_with_outer_dims_perm(%arg0: tensor<129x47x16x16xf32>, %ar
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.pack">
-    transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+    %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.pack">
+    transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
       -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
       transform.yield
   }
@@ -501,7 +501,7 @@ func.func @pack_as_pad_with_unit_dims(%arg0: tensor<3x1x1x1xf32>, %arg1: tensor<
   // CHECK-SAME:   outs(%[[OUT]] : tensor<1x1x1x1x8x1xf32>)
   // CHECK-SAME:   permutation = [0, 2, 4, 5, 1, 3]
   // CHECK:      return %[[TRANSPOSED]] : tensor<1x1x1x1x8x1xf32>
-  %pack = tensor.pack %arg0
+  %pack = linalg.pack %arg0
       padding_value(%zero : f32)
       inner_dims_pos = [0, 1]
       inner_tiles = [8, 1] into %arg1 : tensor<3x1x1x1xf32> -> tensor<1x1x1x1x8x1xf32>
@@ -512,9 +512,9 @@ func.func @pack_as_pad_with_unit_dims(%arg0: tensor<3x1x1x1xf32>, %arg1: tensor<
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %pack = transform.structured.match ops{["tensor.pack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.pack">
-    transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+    %pack = transform.structured.match ops{["linalg.pack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.pack">
+    transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
       -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
       transform.yield
   }
@@ -541,16 +541,16 @@ module attributes {transform.with_named_sequence} {
 //      CHECK: linalg.copy ins(%[[SLICE]] : tensor<32x?x?xf32>)
 // CHECK-SAME:        outs(%[[ARG1]] : tensor<32x?x?xf32>)
 func.func @unpack_with_dynamic_dest(%arg0: tensor<32x2x49x16x16xf32>, %arg1: tensor<32x?x?xf32>) -> tensor<32x?x?xf32> {
-  %pack = tensor.unpack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %arg1
+  %pack = linalg.unpack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %arg1
     : tensor<32x2x49x16x16xf32> -> tensor<32x?x?xf32>
   return %pack : tensor<32x?x?xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.unpack">
-    transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+    %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.unpack">
+    transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
       -> (!transform.op<"tensor.empty">,
           !transform.op<"linalg.transpose">,
           !transform.op<"tensor.collapse_shape">,
@@ -582,15 +582,15 @@ module attributes {transform.with_named_sequence} {
 //      CHECK: linalg.copy ins(%[[SLICE]] : tensor<?x?xf32>)
 // CHECK-SAME:        outs(%[[ARG1]] : tensor<?x?xf32>)
 func.func @unpack_with_dynamic_input_dest(%arg0: tensor<?x?x8x16xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
-    %unpack = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 16] into %arg1 : tensor<?x?x8x16xf32> -> tensor<?x?xf32>
+    %unpack = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 16] into %arg1 : tensor<?x?x8x16xf32> -> tensor<?x?xf32>
     return %unpack : tensor<?x?xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.unpack">
-    transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+    %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.unpack">
+    transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
       -> (!transform.op<"tensor.empty">,
           !transform.op<"linalg.transpose">,
           !transform.op<"tensor.collapse_shape">,
@@ -626,14 +626,14 @@ module attributes {transform.with_named_sequence} {
 //      CHECK: linalg.copy ins(%[[SLICE]] : tensor<?x?xf32>)
 // CHECK-SAME:        outs(%[[ARG1]] : tensor<?x?xf32>)
 func.func @unpack_fully_dynamic(%source: tensor<?x?x?x?xf32>, %dest: tensor<?x?xf32>, %tile_n : index, %tile_m : index) -> tensor<?x?xf32> {
-  %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
+  %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.unpack">
-    transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+    %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.unpack">
+    transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
           -> (!transform.op<"tensor.empty">,
           !transform.op<"linalg.transpose">,
           !transform.op<"tensor.collapse_shape">,
@@ -664,16 +664,16 @@ module attributes {transform.with_named_sequence} {
 // CHECK-SAME:   [1, 1, 1, 1, 1, 1, 1, 1]
 // CHECK-SAME:   :  tensor<1x1x1x1x136x64x16x16xf32> to tensor<?x?x?x?xf32>
 func.func @unpack_as_pad_dynamic(%arg0: tensor<1x1x1x1x136x64x16x16xf32>, %arg1: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
-  %pack = tensor.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
+  %pack = linalg.unpack %arg0 inner_dims_pos = [0, 1, 2, 3] inner_tiles = [136, 64, 16, 16] into %arg1
     : tensor<1x1x1x1x136x64x16x16xf32> -> tensor<?x?x?x?xf32>
   return %pack : tensor<?x?x?x?xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.unpack">
-    transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+    %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.unpack">
+    transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
       -> (!transform.op<"tensor.empty">,
           !transform.op<"linalg.transpose">,
           !transform.op<"tensor.collapse_shape">,
@@ -698,16 +698,16 @@ module attributes {transform.with_named_sequence} {
 //       CHECK: linalg.copy ins(%[[SLICE]]
 //  CHECK-SAME:   : tensor<32x64xf32>) outs(%[[ARG0]] : tensor<32x64xf32>) -> tensor<32x64xf32>
 func.func @unpack_with_outer_dims_perm(%arg0: tensor<32x64xf32>, %arg1: tensor<2x4x32x8xf32>) -> tensor<32x64xf32> {
-  %unpack = tensor.unpack %arg1 outer_dims_perm = [1, 0]
+  %unpack = linalg.unpack %arg1 outer_dims_perm = [1, 0]
     inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg0 : tensor<2x4x32x8xf32> -> tensor<32x64xf32>
   return %unpack : tensor<32x64xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
-    %unpack = transform.structured.match ops{["tensor.unpack"]} in %module_op
-      : (!transform.any_op) -> !transform.op<"tensor.unpack">
-    transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+    %unpack = transform.structured.match ops{["linalg.unpack"]} in %module_op
+      : (!transform.any_op) -> !transform.op<"linalg.unpack">
+    transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
       -> (!transform.op<"tensor.empty">,
           !transform.op<"linalg.transpose">,
           !transform.op<"tensor.collapse_shape">,
diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
index ac1ca9319d335..20019424e8d3c 100644
--- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
@@ -106,12 +106,12 @@ module attributes {transform.with_named_sequence} {
 // CHECK-LABEL: func.func @unpack_elemwise
 // CHECK:         %[[RES:.*]] = scf.for
 // CHECK:           scf.for
-// CHECK:             tensor.unpack
+// CHECK:             linalg.unpack
 // CHECK:             linalg.elemwise_unary
 // CHECK:         return %[[RES]]
 func.func @unpack_elemwise(%arg0: tensor<16x48x8x8xf32>, %arg1: tensor<128x384xf32>) -> tensor<128x384xf32> {
   %0 = tensor.empty() : tensor<128x384xf32>
-  %1 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
+  %1 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
       : tensor<16x48x8x8xf32> -> tensor<128x384xf32>
   %2 = linalg.elemwise_unary ins(%1: tensor<128x384xf32>)
                              outs(%arg1: tensor<128x384xf32>) -> tensor<128x384xf32>
@@ -132,12 +132,12 @@ module attributes {transform.with_named_sequence} {
 // CHECK-LABEL: func.func @pack_elemwise
 // CHECK:         %[[RES:.*]] = scf.for
 // CHECK:           scf.for
-// CHECK:             tensor.pack
+// CHECK:             linalg.pack
 // CHECK:             linalg.elemwise_unary
 // CHECK:         return %[[RES]]
 func.func @pack_elemwise(%arg0: tensor<128x384xf32>, %arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32> {
   %0 = tensor.empty() : tensor<16x48x8x8xf32>
-  %1 = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
+  %1 = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
       : tensor<128x384xf32> -> tensor<16x48x8x8xf32>
   %2 = linalg.elemwise_unary ins(%1: tensor<16x48x8x8xf32>)
                              outs(%arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32>
@@ -156,14 +156,14 @@ module attributes {transform.with_named_sequence} {
 // -----
 
 // CHECK-LABEL: func.func @nofuse_pack_elemwise
-// CHECK:         tensor.pack
+// CHECK:         linalg.pack
 // CHECK:         %[[RES:.*]] = scf.for
 // CHECK:           scf.for
 // CHECK:             linalg.elemwise_unary
 // CHECK:         return %[[RES]]
 func.func @nofuse_pack_elemwise(%arg0: tensor<128x384xf32>, %arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32> {
   %0 = tensor.empty() : tensor<16x48x8x8xf32>
-  %1 = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
+  %1 = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0
       : tensor<128x384xf32> -> tensor<16x48x8x8xf32>
   %2 = linalg.elemwise_unary ins(%1: tensor<16x48x8x8xf32>)
                              outs(%arg1: tensor<16x48x8x8xf32>) -> tensor<16x48x8x8xf32>
diff --git a/mlir/test/Dialect/Linalg/transform-op-pack.mlir b/mlir/test/Dialect/Linalg/transform-op-pack.mlir
index 6c26ebd0a5b84..b3ad73e8df8e7 100644
--- a/mlir/test/Dialect/Linalg/transform-op-pack.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-pack.mlir
@@ -15,9 +15,9 @@
 //   CHECK-SAME:   %[[T1:.+]]: tensor<3xf16>
 func.func @reduction_2d_static(%t0: tensor<3x7xf16>, %t1: tensor<3xf16>) -> tensor<3xf16> {
   //      CHECK:  %[[EMPTY:.*]] = tensor.empty() : tensor<3x2x4xf16>
-  //      CHECK: %[[PACKED:.*]] = tensor.pack %[[T0]] padding_value(%{{.*}} : f16)
+  //      CHECK: %[[PACKED:.*]] = linalg.pack %[[T0]] padding_value(%{{.*}} : f16)
   // CHECK-SAME:   inner_dims_pos = [1] inner_tiles = [4] into %[[EMPTY]] : tensor<3x7xf16> -> tensor<3x2x4xf16>
-  //  CHECK-NOT: tensor.pack
+  //  CHECK-NOT: linalg.pack
   //      CHECK: linalg.generic
   // CHECK-SAME:   indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]]
   // CHECK-SAME:   iterator_types = ["parallel", "reduction", "reduction"]
@@ -29,7 +29,7 @@ func.func @reduction_2d_static(%t0: tensor<3x7xf16>, %t1: tensor<3xf16>) -> tens
     linalg.yield %3 : f16
   } -> tensor<3xf16>
 
-  //  CHECK-NOT: tensor.unpack
+  //  CHECK-NOT: linalg.unpack
   return %2 : tensor<3xf16>
 }
 
@@ -59,9 +59,9 @@ module attributes {transform.with_named_sequence} {
 //   CHECK-SAME:   %[[T1:.+]]: tensor<3xf16>
 func.func @col_reduction_2d_static(%t0: tensor<7x3xf16>, %t1: tensor<3xf16>) -> tensor<3xf16> {
   //      CHECK:  %[[EMPTY:.*]] = tensor.empty() : tensor<3x2x4xf16>
-  //      CHECK: %[[PACKED:.*]] = tensor.pack %[[T0]] padding_value(%{{.*}} : f16)
+  //      CHECK: %[[PACKED:.*]] = linalg.pack %[[T0]] padding_value(%{{.*}} : f16)
   // CHECK-SAME:   outer_dims_perm = [1, 0] inner_dims_pos = [0] inner_tiles = [4] into %[[EMPTY]] : tensor<7x3xf16> -> tensor<3x2x4xf16>
-  //  CHECK-NOT: tensor.pack
+  //  CHECK-NOT: linalg.pack
   //      CHECK: linalg.generic
   // CHECK-SAME:   indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]]
   // CHECK-SAME:   iterator_types = ["reduction", "parallel", "reduction"]
@@ -73,7 +73,7 @@ func.func @col_reduction_2d_static(%t0: tensor<7x3xf16>, %t1: tensor<3xf16>) ->
     linalg.yield %3 : f16
   } -> tensor<3xf16>
 
-  //  CHECK-NOT: tensor.unpack
+  //  CHECK-NOT: linalg.unpack
   return %2 : tensor<3xf16>
 }
 
@@ -83,12 +83,12 @@ module attributes {transform.with_named_sequence} {
     %1 = transform.structured.pack %0 packed_sizes = [4, 0]
         : (!transform.any_op) -> (!transform.op<"linalg.generic">)
     %pack = transform.get_producer_of_operand %1[0]
-      : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.pack">)
+      : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.pack">)
     %2, %pack_2, %empty_unpack_2 =
       transform.structured.pack_transpose %pack with_compute_op(%1)
       outer_perm = [1, 0]
-       : (!transform.op<"tensor.pack">, !transform.op<"linalg.generic">)
-      -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.any_op)
+       : (!transform.op<"linalg.pack">, !transform.op<"linalg.generic">)
+      -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.any_op)
       transform.yield
   }
 }
@@ -116,9 +116,9 @@ func.func @reduction_2d_dynamic(%t0: tensor<?x?xf16>, %t1: tensor<?xf16>) -> ten
   //  CHECK-DAG:     %[[D1:.*]] = tensor.dim %[[T0]], %[[C1]] : tensor<?x?xf16>
   //      CHECK:   %[[D1B4:.*]] = affine.apply #[[$DIV4]]()[%[[D1]]]
   //      CHECK:  %[[EMPTY:.*]] = tensor.empty(%[[D0]], %[[D1B4]]) : tensor<?x?x4xf16>
-  //      CHECK: %[[PACKED:.*]] = tensor.pack %[[T0]] padding_value(%{{.*}} : f16)
+  //      CHECK: %[[PACKED:.*]] = linalg.pack %[[T0]] padding_value(%{{.*}} : f16)
   // CHECK-SAME:   inner_dims_pos = [1] inner_tiles = [4] into %[[EMPTY]] : tensor<?x?xf16> -> tensor<?x?x4xf16>
-  //  CHECK-NOT: tensor.pack
+  //  CHECK-NOT: linalg.pack
   //      CHECK: linalg.generic
   // CHECK-SAME:   indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]]
   // CHECK-SAME:   iterator_types = ["parallel", "reduction", "reduction"]
@@ -130,7 +130,7 @@ func.func @reduction_2d_dynamic(%t0: tensor<?x?xf16>, %t1: tensor<?xf16>) -> ten
     linalg.yield %3 : f16
   } -> tensor<?xf16>
 
-  //  CHECK-NOT: tensor.unpack
+  //  CHECK-NOT: linalg.unpack
   return %2 : tensor<?xf16>
 }
 
@@ -162,11 +162,11 @@ module attributes {transform.with_named_sequence} {
 //   CHECK-SAME:   %[[T0:.+]]: tensor<?x?xf16>,
 //   CHECK-SAME:   %[[T1:.+]]: tensor<?xf16>
 func.func @reduction_2d_dynamic(%t0: tensor<?x?xf16>, %t1: tensor<?xf16>) -> tensor<?xf16> {
-  //      CHECK: %[[PACKED_0:.*]] = tensor.pack %[[T0]] padding_value(%{{.*}} : f16)
+  //      CHECK: %[[PACKED_0:.*]] = linalg.pack %[[T0]] padding_value(%{{.*}} : f16)
   // CHECK-SAME:   inner_dims_pos = [0, 1] inner_tiles = [3, 4] into %{{.*}} : tensor<?x?xf16> -> tensor<?x?x3x4xf16>
-  //      CHECK: %[[PACKED_1:.*]] = tensor.pack %[[T1]] padding_value(%{{.*}} : f16)
+  //      CHECK: %[[PACKED_1:.*]] = linalg.pack %[[T1]] padding_value(%{{.*}} : f16)
   // CHECK-SAME:   inner_dims_pos = [0] inner_tiles = [3] into %{{.*}} : tensor<?xf16> -> tensor<?x3xf16>
-  //  CHECK-NOT: tensor.pack
+  //  CHECK-NOT: linalg.pack
   //      CHECK: linalg.generic
   // CHECK-SAME:   indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]]
   // CHECK-SAME:   iterator_types = ["parallel", "reduction", "parallel", "reduction"]
@@ -178,7 +178,7 @@ func.func @reduction_2d_dynamic(%t0: tensor<?x?xf16>, %t1: tensor<?xf16>) -> ten
     linalg.yield %3 : f16
   } -> tensor<?xf16>
 
-  //      CHECK: tensor.unpack %{{.*}} inner_dims_pos = [0] inner_tiles = [3] into %{{.*}} : tensor<?x3xf16> -> tensor<?xf16>
+  //      CHECK: linalg.unpack %{{.*}} inner_dims_pos = [0] inner_tiles = [3] into %{{.*}} : tensor<?x3xf16> -> tensor<?xf16>
   return %2 : tensor<?xf16>
 }
 
@@ -207,11 +207,11 @@ module attributes {transform.with_named_sequence} {
 func.func @matmul(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>)
     -> tensor<?x?xf32> {
 
-  //      CHECK: %[[PACK_A:.*]] = tensor.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [2, 4]
+  //      CHECK: %[[PACK_A:.*]] = linalg.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [2, 4]
   // CHECK-SAME:   : tensor<?x?xf32> -> tensor<?x?x2x4xf32>
-  //      CHECK: %[[PACK_B:.*]] = tensor.pack %{{.*}} inner_dims_pos = [1, 0] inner_tiles = [3, 4]
+  //      CHECK: %[[PACK_B:.*]] = linalg.pack %{{.*}} inner_dims_pos = [1, 0] inner_tiles = [3, 4]
   // CHECK-SAME:   : tensor<?x?xf32> -> tensor<?x?x3x4xf32>
-  //      CHECK: %[[PACK_C:.*]] = tensor.pack %{{.*}} outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [3, 2]
+  //      CHECK: %[[PACK_C:.*]] = linalg.pack %{{.*}} outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [3, 2]
   // CHECK-SAME:   : tensor<?x?xf32> -> tensor<?x?x3x2xf32>
 
   //      CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]]
@@ -222,7 +222,7 @@ func.func @matmul(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>)
                      outs(%C: tensor<?x?xf32>)
     -> tensor<?x?xf32>
 
-  //      CHECK: tensor.unpack %{{.*}} outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [3, 2]
+  //      CHECK: linalg.unpack %{{.*}} outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [3, 2]
   // CHECK-SAME:   : tensor<?x?x3x2xf32> -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
@@ -235,12 +235,12 @@ module attributes {transform.with_named_sequence} {
         : (!transform.any_op) -> (!transform.op<"linalg.generic">)
 
       %unpack = transform.get_consumers_of_result %1[0]
-        : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.unpack">)
+        : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.unpack">)
       %2, %pack_2, %unpack_2 =
         transform.structured.pack_transpose %unpack with_compute_op(%1)
         outer_perm = [1, 0] inner_perm = [1, 0]
-        : (!transform.op<"tensor.unpack">, !transform.op<"linalg.generic">)
-        -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.op<"tensor.unpack">)
+        : (!transform.op<"linalg.unpack">, !transform.op<"linalg.generic">)
+        -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.op<"linalg.unpack">)
         transform.yield
   }
 }
@@ -259,11 +259,11 @@ module attributes {transform.with_named_sequence} {
 func.func @conv_2d_nchw_fchw(%i: tensor<14x512x28x28xf32>, %f: tensor<1024x512x1x1xf32>,
                              %o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32> {
 
-  //      CHECK: %[[PACK_INPUT:.*]] = tensor.pack %{{.*}} inner_dims_pos = [1] inner_tiles = [8]
+  //      CHECK: %[[PACK_INPUT:.*]] = linalg.pack %{{.*}} inner_dims_pos = [1] inner_tiles = [8]
   // CHECK-SAME:   : tensor<14x512x28x28xf32> -> tensor<14x64x28x28x8xf32>
-  //      CHECK: %[[PACK_FILTER:.*]] = tensor.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [4, 8]
+  //      CHECK: %[[PACK_FILTER:.*]] = linalg.pack %{{.*}} inner_dims_pos = [0, 1] inner_tiles = [4, 8]
   // CHECK-SAME:   : tensor<1024x512x1x1xf32> -> tensor<256x64x1x1x4x8xf32>
-  //      CHECK: %[[PACK_INPUT:.*]] = tensor.pack %{{.*}} inner_dims_pos = [1] inner_tiles = [4]
+  //      CHECK: %[[PACK_INPUT:.*]] = linalg.pack %{{.*}} inner_dims_pos = [1] inner_tiles = [4]
   // CHECK-SAME:   : tensor<14x1024x28x28xf32> -> tensor<14x256x28x28x4xf32>
   //      CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]]
   // CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "parallel", "reduction"]}
@@ -272,7 +272,7 @@ func.func @conv_2d_nchw_fchw(%i: tensor<14x512x28x28xf32>, %f: tensor<1024x512x1
   %0 = linalg.conv_2d_nchw_fchw ins(%i, %f: tensor<14x512x28x28xf32>, tensor<1024x512x1x1xf32>)
                                 outs(%o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32>
 
-  //      CHECK: tensor.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [4]
+  //      CHECK: linalg.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [4]
   // CHECK-SAME:   : tensor<14x256x28x28x4xf32> -> tensor<14x1024x28x28xf32>
   return %0: tensor<14x1024x28x28xf32>
 }
@@ -300,11 +300,11 @@ module attributes {transform.with_named_sequence} {
 //  CHECK-SAME:   %[[INIT:.+]]: tensor<?x1x?x?xf32>
 func.func @conv_2d_nhwc_hwcf(%input: tensor<?x1x?x?xf32>, %filter: tensor<1x?x?x?xf32>, %init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32> {
 
-  //      CHECK: %[[PACK_INPUT:.*]] = tensor.pack %{{.*}} inner_dims_pos = [3] inner_tiles = [6]
+  //      CHECK: %[[PACK_INPUT:.*]] = linalg.pack %{{.*}} inner_dims_pos = [3] inner_tiles = [6]
   // CHECK-SAME:   : tensor<?x1x?x?xf32> -> tensor<?x1x?x?x6xf32>
-  //      CHECK: %[[PACK_FILTER:.*]] = tensor.pack %{{.*}} inner_dims_pos = [3, 2] inner_tiles = [4, 6]
+  //      CHECK: %[[PACK_FILTER:.*]] = linalg.pack %{{.*}} inner_dims_pos = [3, 2] inner_tiles = [4, 6]
   // CHECK-SAME:   : tensor<1x?x?x?xf32> -> tensor<1x?x?x?x4x6xf32>
-  //      CHECK: %[[PACK_OUTPUT:.*]] = tensor.pack %{{.*}} inner_dims_pos = [3] inner_tiles = [4]
+  //      CHECK: %[[PACK_OUTPUT:.*]] = linalg.pack %{{.*}} inner_dims_pos = [3] inner_tiles = [4]
   // CHECK-SAME:   : tensor<?x1x?x?xf32> -> tensor<?x1x?x?x4xf32>
 
   //      CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]]
@@ -315,7 +315,7 @@ func.func @conv_2d_nhwc_hwcf(%input: tensor<?x1x?x?xf32>, %filter: tensor<1x?x?x
      ins (%input, %filter: tensor<?x1x?x?xf32>, tensor<1x?x?x?xf32>)
     outs (%init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32>
 
-  //      CHECK: tensor.unpack %{{.*}} inner_dims_pos = [3] inner_tiles = [4]
+  //      CHECK: linalg.unpack %{{.*}} inner_dims_pos = [3] inner_tiles = [4]
   // CHECK-SAME:   : tensor<?x1x?x?x4xf32> -> tensor<?x1x?x?xf32>
   return %0 : tensor<?x1x?x?xf32>
 }
@@ -349,11 +349,11 @@ func.func @matmul_dynamic_pack_size(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C
   //      CHECK: %[[TS:.*]] = "some_tile_size"() : () -> index
   %sz = "some_tile_size"() : () -> (index)
 
-  //      CHECK: %[[PACK_A:.*]] = tensor.pack %[[A]] {{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]]
+  //      CHECK: %[[PACK_A:.*]] = linalg.pack %[[A]] {{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]]
   // CHECK-SAME:   : tensor<?x?xf32> -> tensor<?x?x?xf32>
-  //      CHECK: %[[PACK_B:.*]] = tensor.pack %[[B]] {{.*}} inner_dims_pos = [1, 0] inner_tiles = [%[[TS]], %[[TS]]]
+  //      CHECK: %[[PACK_B:.*]] = linalg.pack %[[B]] {{.*}} inner_dims_pos = [1, 0] inner_tiles = [%[[TS]], %[[TS]]]
   // CHECK-SAME:   : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
-  //      CHECK: %[[PACK_C:.*]] = tensor.pack %[[C]] {{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]]
+  //      CHECK: %[[PACK_C:.*]] = linalg.pack %[[C]] {{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]]
   // CHECK-SAME:   : tensor<?x?xf32> -> tensor<?x?x?xf32>
   //      CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]]
   // CHECK-SAME:     iterator_types = ["parallel", "parallel", "reduction", "parallel", "reduction"]}
@@ -363,7 +363,7 @@ func.func @matmul_dynamic_pack_size(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C
                      outs(%C: tensor<?x?xf32>)
     -> tensor<?x?xf32>
 
-  //      CHECK: tensor.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]] into %[[C]]
+  //      CHECK: linalg.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]] into %[[C]]
   // CHECK-SAME:   : tensor<?x?x?xf32> -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
@@ -445,16 +445,16 @@ module attributes {transform.with_named_sequence} {
 // -----
 
 func.func @no_single_packing_op(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>) {
-  %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
-  %1 = tensor.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
-  %2 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
+  %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
+  %1 = linalg.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
+  %2 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
   return
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-      %1 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %1 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
         // expected-error @below {{requires target to map to exactly 1 packing op and 1 packed op (got 2 and 1)}}
       transform.structured.pack_transpose %0 with_compute_op(%1)
       inner_perm = [0]
@@ -476,7 +476,7 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
       %0 = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op
       %1 = transform.structured.match ops{["tensor.empty"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-        // expected-error @below {{requires target to map to a tensor.pack or tensor.unpack}}
+        // expected-error @below {{requires target to map to a linalg.pack or linalg.unpack}}
       transform.structured.pack_transpose %0 with_compute_op(%1)
       inner_perm = [0]
         : (!transform.any_op, !transform.any_op)
@@ -488,14 +488,14 @@ module attributes {transform.with_named_sequence} {
 // -----
 
 func.func @no_linalg_target(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>) {
-  %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
+  %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
   %1 = arith.constant 0 : index
   return
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
       %1 = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op
         // expected-error @below {{requires a LinalgOp target}}
       transform.structured.pack_transpose %0 with_compute_op(%1)
@@ -509,7 +509,7 @@ module attributes {transform.with_named_sequence} {
 // -----
 
 func.func @no_single_use_by_linalg(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>) {
-  %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
+  %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
   %f0 = arith.constant 0.0 : f32
   %1 = tensor.empty() : tensor<f32>
   %2 = linalg.fill ins(%f0: f32) outs(%1 : tensor<f32>) -> tensor<f32>
@@ -518,7 +518,7 @@ func.func @no_single_use_by_linalg(%source: tensor<128x256xf32>, %dest: tensor<4
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
       %1 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op
         // expected-error @below {{not a single use by the LinalgOp target}}
       transform.structured.pack_transpose %0 with_compute_op(%1)
@@ -532,8 +532,8 @@ module attributes {transform.with_named_sequence} {
 // -----
 
 func.func @not_produced_by_linalg(%source: tensor<128x256xf32>, %dest: tensor<4x16x32x16xf32>) {
-  %a = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
-  %b = tensor.unpack %a inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
+  %a = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
+  %b = linalg.unpack %a inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
   %f0 = arith.constant 0.0 : f32
   %1 = tensor.empty() : tensor<f32>
   %2 = linalg.fill ins(%f0: f32) outs(%1 : tensor<f32>) -> tensor<f32>
@@ -542,7 +542,7 @@ func.func @not_produced_by_linalg(%source: tensor<128x256xf32>, %dest: tensor<4x
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
       %1 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op
         // expected-error @below {{not produced by the LinalgOp target}}
       transform.structured.pack_transpose %0 with_compute_op(%1)
@@ -559,13 +559,13 @@ func.func @no_matching_pack(%source: tensor<16xf32>) {
   %f0 = arith.constant 0.0 : f32
   %1 = tensor.empty() : tensor<4x4xf32>
   %2 = linalg.fill ins(%f0: f32) outs(%1 : tensor<4x4xf32>) -> tensor<4x4xf32>
-  %b = tensor.unpack %2 inner_dims_pos = [0] inner_tiles = [4] into %source : tensor<4x4xf32> -> tensor<16xf32>
+  %b = linalg.unpack %2 inner_dims_pos = [0] inner_tiles = [4] into %source : tensor<4x4xf32> -> tensor<16xf32>
   return
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
       %1 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op
         // expected-error @below {{could not find matching pack op}}
       transform.structured.pack_transpose %0 with_compute_op(%1)
@@ -593,13 +593,13 @@ module attributes {transform.with_named_sequence} {
         : (!transform.any_op) -> (!transform.op<"linalg.generic">)
 
       %unpack = transform.get_consumers_of_result %1[0]
-        : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.unpack">)
+        : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.unpack">)
       %2, %pack_2, %unpack_2 =
         // expected-error @below {{invalid outer_perm}}
         transform.structured.pack_transpose %unpack with_compute_op(%1)
         outer_perm = [1]
-        : (!transform.op<"tensor.unpack">, !transform.op<"linalg.generic">)
-        -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.op<"tensor.unpack">)
+        : (!transform.op<"linalg.unpack">, !transform.op<"linalg.generic">)
+        -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.op<"linalg.unpack">)
         transform.yield
   }
 }
@@ -621,13 +621,13 @@ module attributes {transform.with_named_sequence} {
         : (!transform.any_op) -> (!transform.op<"linalg.generic">)
 
       %unpack = transform.get_consumers_of_result %1[0]
-        : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.unpack">)
+        : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.unpack">)
       %2, %pack_2, %unpack_2 =
         // expected-error @below {{invalid inner_perm}}
         transform.structured.pack_transpose %unpack with_compute_op(%1)
         inner_perm = [1]
-        : (!transform.op<"tensor.unpack">, !transform.op<"linalg.generic">)
-        -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.op<"tensor.unpack">)
+        : (!transform.op<"linalg.unpack">, !transform.op<"linalg.generic">)
+        -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.op<"linalg.unpack">)
         transform.yield
   }
 }
@@ -643,12 +643,12 @@ func.func @no_padding_on_packs(%A: tensor<32x32xf32>, %B: tensor<32x32xf32>, %C:
 }
 
 // CHECK-LABEL: no_padding_on_packs
-// CHECK: tensor.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [4, 8]
+// CHECK: linalg.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [4, 8]
 // CHECK-SAME:  into %{{.+}} : tensor<32x32xf32> -> tensor<8x4x4x8xf32>
-// CHECK: tensor.pack %{{.+}} outer_dims_perm = [1, 0]
+// CHECK: linalg.pack %{{.+}} outer_dims_perm = [1, 0]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [8, 8]
 // CHECK-SAME:  into %{{.+}} : tensor<32x32xf32> -> tensor<4x4x8x8xf32>
-// CHECK: tensor.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [4, 8]
+// CHECK: linalg.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [4, 8]
 // CHECK-SAME:  into %{{.+}} : tensor<32x32xf32> -> tensor<8x4x4x8xf32>
 
 module attributes {transform.with_named_sequence} {
@@ -657,12 +657,12 @@ module attributes {transform.with_named_sequence} {
       %1 = transform.structured.pack %0 packed_sizes = [4, 8, 8]
         : (!transform.any_op) -> (!transform.op<"linalg.generic">)
       %pack = transform.get_producer_of_operand %1[1]
-      : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.pack">)
+      : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.pack">)
       %2, %pack_2, %empty_unpack_2 =
       transform.structured.pack_transpose %pack with_compute_op(%1)
       outer_perm = [1, 0] inner_perm = [1, 0]
-       : (!transform.op<"tensor.pack">, !transform.op<"linalg.generic">)
-      -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.any_op)
+       : (!transform.op<"linalg.pack">, !transform.op<"linalg.generic">)
+      -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.any_op)
       transform.yield
   }
 }
diff --git a/mlir/test/Dialect/Linalg/transform-op-tile-pack-unpack.mlir b/mlir/test/Dialect/Linalg/transform-op-tile-pack-unpack.mlir
new file mode 100644
index 0000000000000..456a5ea453963
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/transform-op-tile-pack-unpack.mlir
@@ -0,0 +1,491 @@
+// RUN: mlir-opt %s -transform-interpreter -canonicalize -cse -split-input-file | FileCheck %s
+
+// CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)>
+// CHECK:       func.func @NC_to_NCnc
+// CHECK-SAME:    %[[IN:.*]]: tensor<128x256xf32>,
+// CHECK-SAME:    %[[OUT:.*]]: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> {
+// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:     %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG:     %[[C8:.*]] = arith.constant 8 : index
+// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
+// CHECK:         %[[RES0:.*]] = scf.for %[[N:.*]] = %[[C0]] to %[[C4]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<4x8x32x32xf32>) {
+// CHECK:           %[[RES1:.+]] = scf.for %[[C:.*]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<4x8x32x32xf32>) {
+// CHECK-DAG:         %[[IN_N:.+]] = affine.apply #[[MAP0]](%[[N]])
+// CHECK-DAG:         %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]])
+// CHECK:             %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_N]], %[[IN_C]]] [64, 128] [1, 1] : tensor<128x256xf32> to tensor<64x128xf32>
+// CHECK:             %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[N]], %[[C]], 0, 0] [2, 4, 32, 32] [1, 1, 1, 1] : tensor<4x8x32x32xf32> to tensor<2x4x32x32xf32>
+// CHECK:             %[[SUB_RES:.*]] = linalg.pack
+// CHECK-SAME:          %[[SUB_IN]] inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %[[SUB_OUT]]
+// CHECK:             %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]]
+// CHECK:             scf.yield %[[INSERT]] : tensor<4x8x32x32xf32>
+// CHECK:           }
+// CHECK:           scf.yield %[[RES1:.*]] : tensor<4x8x32x32xf32>
+// CHECK:         }
+// CHECK:         return %[[RES0:.*]] : tensor<4x8x32x32xf32>
+// CHECK:       }
+func.func @NC_to_NCnc(%arg0: tensor<128x256xf32>, %arg1: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> {
+  %0 = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %arg1 : tensor<128x256xf32> -> tensor<4x8x32x32xf32>
+  return %0 : tensor<4x8x32x32xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+      %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+      transform.yield
+  }
+}
+
+// -----
+
+// CHECK:       #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 8)>
+// CHECK:       func.func @KC_to_CKkc
+// CHECK-SAME:    %[[IN:[A-Za-z0-9]+]]:
+// CHECK-SAME:    %[[OUT:[A-Za-z0-9]+]]:
+// CHECK-DAG:     %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG:     %[[C2:.+]] = arith.constant 2 : index
+// CHECK-DAG:     %[[C32:.+]] = arith.constant 32 : index
+// CHECK:         scf.for %[[C:.+]] = %[[C0]] to %[[C32]] step %[[C2]]
+// CHECK-DAG:         %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]])
+// CHECK:             %[[INPUT_SLICE:.+]] = tensor.extract_slice %[[IN]]
+// CHECK-SAME:          [0, %[[IN_C]]] [128, 16]
+// CHECK:             %[[OUTPUT_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[C]], 0, 0, 0] [2, 4, 32, 8]
+// CHECK:             linalg.pack
+// CHECK-SAME:          %[[INPUT_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8]
+// CHECK-SAME:          into %[[OUTPUT_SLICE]]
+func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) -> tensor<32x4x32x8xf32> {
+  %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32>
+  return %0 : tensor<32x4x32x8xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+      %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+      transform.yield
+  }
+}
+
+// -----
+
+// CHECK-DAG:     #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 2)>
+// CHECK-DAG:     #[[MAP1:.+]] = affine_map<(d0) -> (d0 * -2 + 15, 8)>
+// CHECK:         func.func @pad_and_pack_static(
+// CHECK-SAME:      %[[IN:.*]]: tensor<13x15xf32>,
+// CHECK-SAME:      %[[OUT:.*]]: tensor<2x8x8x2xf32>,
+// CHECK-SAME:      %[[PAD:.*]]: f32) -> tensor<2x8x8x2xf32> {
+// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG:       %[[C8:.*]] = arith.constant 8 : index
+// CHECK-DAG:       %[[RES0:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[OUT]]) -> (tensor<2x8x8x2xf32>) {
+// CHECK-DAG:         %[[IN_J:.*]] = affine.apply #[[MAP0]](%[[J]])
+// CHECK-DAG:         %[[IN_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])
+// CHECK:             %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][0, %[[IN_J]]] [13, %[[IN_J_SZ]]] [1, 1]
+// CHECK:             %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][0, %[[J]], 0, 0] [2, 4, 8, 2] [1, 1, 1, 1]
+// CHECK:             %[[SUB_RES:.*]] = linalg.pack
+// CHECK-SAME:          %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2]
+// CHECK-SAME:          into %[[SUB_OUT]]
+// CHECK:             %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]]
+// CHECK:             scf.yield %[[INSERT]] : tensor<2x8x8x2xf32>
+// CHECK:           }
+// CHECK:           return %[[RES0:.*]] : tensor<2x8x8x2xf32>
+// CHECK:         }
+func.func @pad_and_pack_static(%input: tensor<13x15xf32>, %output: tensor<2x8x8x2xf32>, %pad: f32) -> tensor<2x8x8x2xf32> {
+  %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
+  return %0 : tensor<2x8x8x2xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+      %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+      transform.yield
+  }
+}
+
+// -----
+
+// CHECK-DAG:     #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
+// CHECK-DAG:     #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
+// CHECK-DAG:     #[[MAP2:.+]] = affine_map<(d0) -> (d0 * 8)>
+// CHECK-DAG:     #[[MAP3:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -8 + s0, d0 * 8)>
+// CHECK-DAG:     #[[MAP4:.+]] = affine_map<(d0) -> (d0 * 2)>
+// CHECK-DAG:     #[[MAP5:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -2 + s0, d0 * 2)>
+// CHECK:         func.func @pad_and_pack_partially_dynamic(
+// CHECK-SAME:      %[[IN:.*]]: tensor<?x?xf32>,
+// CHECK-SAME:      %[[OUT:.*]]: tensor<?x?x8x2xf32>,
+// CHECK-SAME:      %[[PAD:.*]]: f32) -> tensor<?x?x8x2xf32> {
+// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[C2:.*]] = arith.constant 2 : index
+// CHECK-DAG:       %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG:       %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor<?x?x8x2xf32>
+// CHECK-DAG:       %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor<?x?x8x2xf32>
+// CHECK:           %[[RES0:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<?x?x8x2xf32>) {
+// CHECK:             %[[RES1:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<?x?x8x2xf32>) {
+// CHECK-DAG:           %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]]
+// CHECK-DAG:           %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]]
+// CHECK-DAG:           %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]])
+// CHECK-DAG:           %[[IN_I_SZ:.*]] = affine.min #[[MAP3]]
+// CHECK-DAG:           %[[IN_J:.*]] = affine.apply #[[MAP4]](%[[J]])
+// CHECK-DAG:           %[[IN_J_SZ:.*]] = affine.min #[[MAP5]]
+// CHECK:               %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+// CHECK:               %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], 8, 2] [1, 1, 1, 1] : tensor<?x?x8x2xf32> to tensor<?x?x8x2xf32>
+// CHECK:               %[[SUB_RES:.*]] = linalg.pack
+// CHECK-SAME:            %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2]
+// CHECK-SAME:            into %[[SUB_OUT]]
+// CHECK:               %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]]
+// CHECK:               scf.yield %[[INSERT]] : tensor<?x?x8x2xf32>
+// CHECK:             }
+// CHECK:             scf.yield %[[RES1:.*]] : tensor<?x?x8x2xf32>
+// CHECK:           }
+// CHECK:           return %[[VAL_34:.*]] : tensor<?x?x8x2xf32>
+// CHECK:         }
+func.func @pad_and_pack_partially_dynamic(%input: tensor<?x?xf32>, %output: tensor<?x?x8x2xf32>, %pad: f32) -> tensor<?x?x8x2xf32> {
+  %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
+  return %0 : tensor<?x?x8x2xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+      %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+      transform.yield
+  }
+}
+
+// -----
+
+// CHECK-DAG:     #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
+// CHECK-DAG:     #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
+// CHECK-DAG:     #[[MAP2:.+]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// CHECK-DAG:     #[[MAP3:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s0, -(d1 * s0) + s1)>
+// CHECK:         func.func @pad_and_pack_fully_dynamic(
+// CHECK-SAME:      %[[IN:.*]]: tensor<?x?xf32>,
+// CHECK-SAME:      %[[OUT:.*]]: tensor<?x?x?x?xf32>,
+// CHECK-SAME:      %[[PAD:.*]]: f32,
+// CHECK-SAME:      %[[TILE_0:.*]]: index,
+// CHECK-SAME:      %[[TILE_1:.*]]: index) -> tensor<?x?x?x?xf32> {
+// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[C2:.*]] = arith.constant 2 : index
+// CHECK-DAG:       %[[C3:.*]] = arith.constant 3 : index
+// CHECK-DAG:       %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG:       %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor<?x?x?x?xf32>
+// CHECK-DAG:       %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor<?x?x?x?xf32>
+// CHECK:           %[[RES0:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<?x?x?x?xf32>) {
+// CHECK:             %[[RES1:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<?x?x?x?xf32>) {
+// CHECK-DAG:           %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]]
+// CHECK-DAG:           %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]]
+// CHECK-DAG:           %[[IN_D0:.*]] = tensor.dim %[[IN]], %[[C0]]
+// CHECK-DAG:           %[[IN_D1:.*]] = tensor.dim %[[IN]], %[[C1]]
+// CHECK:               %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]])[%[[TILE_0]]]
+// CHECK:               %[[IN_I_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_I_SZ]], %[[I]])[%[[TILE_0]], %[[IN_D0]]]
+// CHECK:               %[[IN_J:.*]] = affine.apply #[[MAP2]](%[[J]])[%[[TILE_1]]]
+// CHECK:               %[[IN_J_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_J_SZ]], %[[J]])[%[[TILE_1]], %[[IN_D1]]]
+// CHECK:               %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+// CHECK:               %[[OUT_D2:.+]] = tensor.dim %[[ITER1]], %[[C2]]
+// CHECK:               %[[OUT_D3:.+]] = tensor.dim %[[ITER1]], %[[C3]]
+// CHECK:               %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], %[[OUT_D2]], %[[OUT_D3]]] [1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
+// CHECK:               %[[PACK:.*]] = linalg.pack
+// CHECK-SAME:            %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [%[[TILE_0]], %[[TILE_1]]]
+// CHECK-SAME:            into %[[SUB_OUT]]
+// CHECK:               %[[INSERT:.*]] = tensor.insert_slice %[[PACK]] into %[[ITER1]]
+// CHECK:               scf.yield %[[INSERT]] : tensor<?x?x?x?xf32>
+// CHECK:             }
+// CHECK:             scf.yield %[[RES1:.*]] : tensor<?x?x?x?xf32>
+// CHECK:           }
+// CHECK:           return %[[RES0:.*]] : tensor<?x?x?x?xf32>
+// CHECK:         }
+func.func @pad_and_pack_fully_dynamic(%source: tensor<?x?xf32>, %dest: tensor<?x?x?x?xf32>, %pad: f32, %tile_n : index, %tile_m : index) -> tensor<?x?x?x?xf32> {
+  %0 = linalg.pack %source padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
+  return %0 : tensor<?x?x?x?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+      %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+      transform.yield
+  }
+}
+
+// -----
+
+// CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)>
+// CHECK-DAG:   #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)>
+// CHECK-DAG:   #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)>
+// CHECK-DAG:   #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 16)>
+// CHECK-DAG:   #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 16)>
+// CHECK-DAG:   #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 16 - d0 floordiv 16 + 1)>
+// CHECK:       func.func @NCnc_to_NC
+// CHECK-SAME:    %[[IN:[A-Za-z0-9]+]]:
+// CHECK-SAME:    %[[OUT:[A-Za-z0-9]+]]:
+// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
+// CHECK-DAG:     %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG:     %[[C128:.*]] = arith.constant 128 : index
+// CHECK-DAG:     %[[C256:.*]] = arith.constant 256 : index
+// CHECK:         %{{.+}} = scf.for %[[I:.+]] = %[[C0]] to %[[C256]] step %[[C2]]
+// CHECK:           %{{.+}} = scf.for %[[J:.+]] = %[[C0]] to %[[C128]] step %[[C4]]
+// CHECK-DAG:         %[[IN_I:.+]] = affine.apply #[[MAP0]](%[[I]])
+// CHECK-DAG:         %[[OFFSET_I:.+]] = affine.apply #[[MAP1]](%[[I]])
+// CHECK-DAG:         %[[IN_I_SZ:.+]] = affine.apply #[[MAP2]](%[[I]])
+// CHECK-DAG:         %[[IN_J:.+]] = affine.apply #[[MAP4]](%[[J]])
+// CHECK-DAG:         %[[OFFSET_J:.+]] = affine.apply #[[MAP5]](%[[J]])
+// CHECK-DAG:         %[[IN_J_SZ:.+]] = affine.apply #[[MAP6]](%[[J]])
+// CHECK:             %[[SLICE:.+]] = tensor.extract_slice %[[IN]]
+// CHECK-SAME:          [%[[IN_I]], %[[IN_J]], 0, 0] [%[[IN_I_SZ]], %[[IN_J_SZ]], 32, 16]
+// CHECK-SAME:        : tensor<8x8x32x16xf32> to tensor<?x?x32x16xf32>
+// CHECK:             %[[EMPTY:.+]] = tensor.empty
+// CHECK:             %[[UNPACK:.+]] = linalg.unpack
+// CHECK-SAME:          %[[SLICE]] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
+// CHECK-SAME:          into %[[EMPTY]]
+// CHECK:             %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
+// CHECK-SAME:          [%[[OFFSET_I]], %[[OFFSET_J]]] [2, 4]
+// CHECK:             %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]]
+// CHECK-SAME:          into %{{.+}}[%[[I]], %[[J]]] [2, 4]
+// CHECK:             scf.yield %[[RES]]
+func.func @NCnc_to_NC(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> {
+  %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
+  return %0 : tensor<256x128xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+      %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+      transform.yield
+  }
+}
+
+// -----
+
+// CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)>
+// CHECK-DAG:   #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)>
+// CHECK-DAG:   #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)>
+// CHECK-DAG:   #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 8)>
+// CHECK-DAG:   #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 8)>
+// CHECK-DAG:   #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 8 - d0 floordiv 8 + 1)>
+// CHECK:       func.func @CKkc_to_KC
+// CHECK-SAME:    %[[IN:[A-Za-z0-9]+]]:
+// CHECK-SAME:    %[[OUT:[A-Za-z0-9]+]]:
+// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
+// CHECK-DAG:     %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG:     %[[C128:.*]] = arith.constant 128 : index
+// CHECK-DAG:     %[[C256:.*]] = arith.constant 256 : index
+// CHECK:         %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C128]] step %[[C2]]
+// CHECK:           %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[C256]] step %[[C4]]
+// CHECK-DAG:         %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]])
+// CHECK-DAG:         %[[OFFSET_K:.+]] = affine.apply #[[MAP1]](%[[K]])
+// CHECK-DAG:         %[[IN_K_SZ:.+]] = affine.apply #[[MAP2]](%[[K]])
+// CHECK-DAG:         %[[IN_C:.+]] = affine.apply #[[MAP4]](%[[C]])
+// CHECK-DAG:         %[[OFFSET_C:.+]] = affine.apply #[[MAP5]](%[[C]])
+// CHECK-DAG:         %[[IN_C_SZ:.+]] = affine.apply #[[MAP6]](%[[C]])
+// CHECK:             %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]]
+// CHECK:               [%[[IN_C]], %[[IN_K]], 0, 0] [%[[IN_C_SZ]], %[[IN_K_SZ]], 32, 8]
+// CHECK:             %[[EMPTY:.+]] = tensor.empty
+// CHECK:             %[[UNPACK:.+]] = linalg.unpack
+// CHECK-SAME:          %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8]
+// CHECK-SAME:          into %[[EMPTY]]
+// CHECK:             %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
+// CHECK-SAME:          [%[[OFFSET_K]], %[[OFFSET_C]]] [2, 4]
+// CHECK:             %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]]
+// CHECK-SAME:          into %{{.+}}[%[[K]], %[[C]]] [2, 4]
+// CHECK:             scf.yield %[[RES]]
+func.func @CKkc_to_KC(%source: tensor<32x4x32x8xf32>, %dest: tensor<128x256xf32>) -> tensor<128x256xf32> {
+  %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %dest : tensor<32x4x32x8xf32> -> tensor<128x256xf32>
+  return %0 : tensor<128x256xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+      %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+      transform.yield
+  }
+}
+
+// -----
+
+// CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 2)>
+// CHECK-DAG:   #[[MAP1:.+]] = affine_map<(d0) -> (d0 floordiv 4)>
+// CHECK:       func.func @perfect_CKkc_to_KC
+// CHECK-SAME:    %[[IN:[A-Za-z0-9]+]]:
+// CHECK-SAME:    %[[OUT:[A-Za-z0-9]+]]:
+// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
+// CHECK-DAG:     %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG:     %[[C8:.*]] = arith.constant 8 : index
+// CHECK-DAG:     %[[C128:.*]] = arith.constant 128 : index
+// CHECK:         %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C8]] step %[[C2]]
+// CHECK:           %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[C128]] step %[[C4]]
+// CHECK-DAG:         %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]])
+// CHECK-DAG:         %[[IN_C:.+]] = affine.apply #[[MAP1]](%[[C]])
+// CHECK:             %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]]
+// CHECK:               [%[[IN_C]], %[[IN_K]], 0, 0] [1, 1, 2, 4]
+// CHECK:             %[[ITER_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[K]], %[[C]]] [2, 4]
+// CHECK:             %[[UNPACK:.+]] = linalg.unpack
+// CHECK-SAME:          %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 4]
+// CHECK-SAME:          into %[[ITER_SLICE]]
+// CHECK:             %[[RES:.+]] = tensor.insert_slice %[[UNPACK]]
+// CHECK-SAME:          into %{{.+}}[%[[K]], %[[C]]] [2, 4]
+// CHECK:             scf.yield %[[RES]]
+func.func @perfect_CKkc_to_KC(%source: tensor<32x4x2x4xf32>, %dest: tensor<8x128xf32>) -> tensor<8x128xf32> {
+  %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %dest : tensor<32x4x2x4xf32> -> tensor<8x128xf32>
+  return %0 : tensor<8x128xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+      %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+      transform.yield
+  }
+}
+
+// -----
+
+// CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
+// CHECK-DAG:   #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
+// CHECK-DAG:   #[[MAP2:.+]] = affine_map<(d0) -> (d0 floordiv 2)>
+// CHECK-DAG:   #[[MAP3:.+]] = affine_map<(d0) -> (d0 ceildiv 2)>
+// CHECK:       func.func @dynamic_perfect_CKkc_to_KC
+// CHECK-SAME:    %[[IN:[A-Za-z0-9]+]]:
+// CHECK-SAME:    %[[OUT:[A-Za-z0-9]+]]:
+// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:     %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG:     %[[C4:.*]] = arith.constant 4 : index
+// CHECK-DAG:     %[[DIM_0:.+]] = tensor.dim %[[OUT]], %[[C0]]
+// CHECK-DAG:     %[[DIM_1:.+]] = tensor.dim %[[OUT]], %[[C1]]
+// CHECK:         %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[DIM_0]] step %[[C2]]
+// CHECK:           %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[DIM_1]] step %[[C4]]
+// CHECK-DAG:         %[[OUT_K_SZ:.+]] = affine.min #[[MAP0]](%[[K]])[%[[DIM_0]]]
+// CHECK-DAG:         %[[OUT_C_SZ:.+]] = affine.min #[[MAP1]](%[[C]])[%[[DIM_1]]]
+// CHECK-DAG:         %[[IN_K:.+]] = affine.apply #[[MAP2]](%[[K]])
+// CHECK-DAG:         %[[IN_C:.+]] = affine.apply #[[MAP2]](%[[C]])
+// CHECK-DAG:         %[[IN_C_SZ:.+]] = affine.apply #[[MAP3]](%[[OUT_C_SZ]])
+// CHECK:             %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]]
+// CHECK:               [%[[IN_C]], %[[IN_K]], 0, 0] [%[[IN_C_SZ]], 1, 2, 2]
+// CHECK:             %[[ITER_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[K]], %[[C]]] [%[[OUT_K_SZ]], %[[OUT_C_SZ]]]
+// CHECK:             %[[UNPACK:.+]] = linalg.unpack
+// CHECK-SAME:          %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 2]
+// CHECK-SAME:          into %[[ITER_SLICE]]
+// CHECK:             %[[RES:.+]] = tensor.insert_slice %[[UNPACK]]
+// CHECK-SAME:          into %{{.+}}[%[[K]], %[[C]]] [%[[OUT_K_SZ]], %[[OUT_C_SZ]]]
+// CHECK:             scf.yield %[[RES]]
+
+func.func @dynamic_perfect_CKkc_to_KC(%source: tensor<?x?x2x2xf32>, %dest: tensor<?x?xf32>) -> tensor<?x?xf32> {
+  %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %dest : tensor<?x?x2x2xf32> -> tensor<?x?xf32>
+  return %0 : tensor<?x?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+      %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+      transform.yield
+  }
+}
+
+// -----
+
+// CHECK: #[[MAP:.+]] = affine_map<(d0) -> (d0 floordiv 2)>
+// CHECK: func.func @perfect_NKPQk_to_NPQK(
+// CHECK-SAME:  %[[SOURCE:.+]]: tensor<1x4x6x6x2xf32>,
+// CHECK-SAME:  %{{.+}}: tensor<1x6x6x8xf32>)
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index
+// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
+// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
+// CHECK: %{{.+}} = scf.for %[[P:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
+// CHECK:   %{{.+}} = scf.for %[[Q:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
+// CHECK:     %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C8]] step %[[C4]]
+// CHECK:       %[[K_SZ:.+]] = affine.apply #[[MAP]](%[[K]])
+// CHECK:       %[[SLICE_SOURCE:.+]] = tensor.extract_slice %[[SOURCE]][0, %[[K_SZ]], %[[P]], %[[Q]], 0]
+// CHECK:       %[[SLICE_DEST:.+]] = tensor.extract_slice %{{.+}}[0, %[[P]], %[[Q]], %[[K]]]
+// CHECK:       %[[UNPACK:.+]] = linalg.unpack
+// CHECK-SAME:    %[[SLICE_SOURCE]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2]
+// CHECK-SAME:    into %[[SLICE_DEST]]
+// CHECK:       %[[RES:.+]] = tensor.insert_slice %[[UNPACK]]
+// CHECK-SAME:    into %{{.+}}[0, %[[P]], %[[Q]], %[[K]]]
+// CHECK:       scf.yield %[[RES]]
+
+func.func @perfect_NKPQk_to_NPQK(%source: tensor<1x4x6x6x2xf32>, %dest: tensor<1x6x6x8xf32>) -> tensor<1x6x6x8xf32> {
+  %0 = linalg.unpack %source outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %dest : tensor<1x4x6x6x2xf32> -> tensor<1x6x6x8xf32>
+  return %0 : tensor<1x6x6x8xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+      %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
+      transform.yield
+  }
+}
+
+// -----
+
+func.func private @get_dynamic_tile_size() -> index
+
+// CHECK-LABEL: func.func @fully_dynamic_unpack
+// CHECK-SAME:    %[[SRC:[0-9a-zA-Z]+]]
+// CHECK-SAME:    %[[DST:[0-9a-zA-Z]+]]
+// CHECK:         %[[INNER_TS:.+]] = call @get_dynamic_tile_size() : () -> index
+// CHECK:         %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[DST]])
+// CHECK:           %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]])
+// CHECK:             %[[SLICE:.+]] = tensor.extract_slice %[[SRC]]
+// CHECK:             %[[EMPTY:.+]] = tensor.empty
+// CHECK:             %[[UNPACK:.+]] = linalg.unpack %[[SLICE]]
+// CHECK-SAME:          inner_dims_pos = [1, 0] inner_tiles = [%[[INNER_TS]], %[[INNER_TS]]] into %[[EMPTY]]
+func.func @fully_dynamic_unpack(%source: tensor<?x?x?x?xf32>, %dest: tensor<?x?xf32>) -> tensor<?x?xf32> {
+  %0 = func.call @get_dynamic_tile_size() : () -> index
+  %1 = linalg.unpack %source inner_dims_pos = [1, 0] inner_tiles = [%0, %0] into %dest : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
+  return %1 : tensor<?x?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+      %0 = transform.structured.match ops{["linalg.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [4, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+      transform.yield
+  }
+}
+
+// -----
+
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * 2)>
+// CHECK: func.func @perfect_NPQK_to_NKPQk
+// CHECK-SAME:  %[[SOURCE:.+]]: tensor<1x6x6x8xf32>,
+// CHECK-SAME:  %{{.+}}: tensor<1x4x6x6x2xf32>)
+// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
+// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
+// CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index
+// CHECK: %{{.+}} = scf.for %[[ARG2:.+]] = %[[C0]] to %[[C4]] step %[[C1]]
+// CHECK:   %{{.+}} = scf.for %[[ARG4:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
+// CHECK:     %{{.+}} = scf.for %[[ARG6:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
+// CHECK:       %[[APPLY:.+]] = affine.apply #[[MAP1]](%[[ARG2]])
+// CHECK:       %[[SLICE_SOURCE:.+]] = tensor.extract_slice %[[SOURCE]][0, %[[ARG4]], %[[ARG6]], %[[APPLY]]]
+// CHECK:       %[[SLICE_DEST:.+]] = tensor.extract_slice %{{.+}}[0, %[[ARG2]], %[[ARG4]], %[[ARG6]], 0]
+// CHECK:       %[[PACK:.+]] = linalg.pack
+// CHECK-SAME:    %[[SLICE_SOURCE]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2]
+// CHECK-SAME:    into %[[SLICE_DEST]]
+// CHECK:       %[[RES:.+]] = tensor.insert_slice %[[PACK]]
+// CHECK-SAME:    into %{{.+}}[0, %[[ARG2]], %[[ARG4]], %[[ARG6]], 0]
+// CHECK:       scf.yield %[[RES]]
+
+func.func @perfect_NPQK_to_NKPQk(%source: tensor<1x6x6x8xf32>, %dest: tensor<1x4x6x6x2xf32>) -> tensor<1x4x6x6x2xf32> {
+  %0 = linalg.pack %source outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %dest : tensor<1x6x6x8xf32> -> tensor<1x4x6x6x2xf32>
+  return %0 : tensor<1x4x6x6x2xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+      %0 = transform.structured.match ops{["linalg.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+      %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
+      transform.yield
+  }
+}
diff --git a/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir b/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir
index 100692426ef44..5812c4db88247 100644
--- a/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir
+++ b/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir
@@ -378,11 +378,11 @@ func.func @no_padding_on_packs(%A: tensor<32x32xf32>, %B: tensor<32x32xf32>, %C:
 }
 
 // CHECK-LABEL: no_padding_on_packs
-// CHECK: tensor.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [8, 4]
+// CHECK: linalg.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [8, 4]
 // CHECK-SAME:  into %{{.+}} : tensor<32x32xf32> -> tensor<4x8x8x4xf32>
-// CHECK: tensor.pack %{{.+}} outer_dims_perm = [1, 0]
+// CHECK: linalg.pack %{{.+}} outer_dims_perm = [1, 0]
 // CHECK-SAME:  inner_dims_pos = [0, 1] inner_tiles = [4, 16] into %{{.+}} : tensor<32x32xf32> -> tensor<2x8x4x16xf32>
-// CHECK: tensor.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [8, 16]
+// CHECK: linalg.pack %{{.+}} inner_dims_pos = [0, 1] inner_tiles = [8, 16]
 // CHECK-SAME:  into %{{.+}} : tensor<32x32xf32> -> tensor<4x2x8x16xf32>
 
 module attributes {transform.with_named_sequence} {
@@ -393,12 +393,12 @@ module attributes {transform.with_named_sequence} {
           matmul_packed_sizes = [8, 16, 4] matmul_inner_dims_order = [0, 1, 2]
         : (!transform.op<"linalg.matmul">) -> !transform.op<"linalg.generic">
       %pack = transform.get_producer_of_operand %1[1]
-      : (!transform.op<"linalg.generic">) -> (!transform.op<"tensor.pack">)
+      : (!transform.op<"linalg.generic">) -> (!transform.op<"linalg.pack">)
       %2, %pack_2, %empty_unpack_2 =
       transform.structured.pack_transpose %pack with_compute_op(%1)
       outer_perm = [1, 0] inner_perm = [1, 0]
-       : (!transform.op<"tensor.pack">, !transform.op<"linalg.generic">)
-      -> (!transform.op<"linalg.generic">, !transform.op<"tensor.pack">, !transform.any_op)
+       : (!transform.op<"linalg.pack">, !transform.op<"linalg.generic">)
+      -> (!transform.op<"linalg.generic">, !transform.op<"linalg.pack">, !transform.any_op)
       transform.yield
   }
 }
diff --git a/mlir/test/Dialect/Linalg/transform-tile-and-fuse-pack-unpack.mlir b/mlir/test/Dialect/Linalg/transform-tile-and-fuse-pack-unpack.mlir
index faf7ff9ad7ed0..5d4ae4f15d3fd 100644
--- a/mlir/test/Dialect/Linalg/transform-tile-and-fuse-pack-unpack.mlir
+++ b/mlir/test/Dialect/Linalg/transform-tile-and-fuse-pack-unpack.mlir
@@ -14,7 +14,7 @@ module {
   func.func @fuse_pack_as_producer(%src: tensor<128x256xf32>, %other: tensor<4x4x128x256xf32>)
       -> tensor<4x4x128x256xf32> {
     %dest = tensor.empty() : tensor<1x1x128x256xf32>
-    %pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [128, 256]
+    %pack = linalg.pack %src inner_dims_pos = [0, 1] inner_tiles = [128, 256]
         into %dest : tensor<128x256xf32> -> tensor<1x1x128x256xf32>
 
     %out = tensor.empty() : tensor<4x4x128x256xf32>
@@ -36,10 +36,10 @@ module {
   module attributes {transform.with_named_sequence} {
     transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
       // Find and lower pack operation.
-      %pack = transform.structured.match ops{["tensor.pack"]} in %arg1
-        : (!transform.any_op) -> !transform.op<"tensor.pack">
+      %pack = transform.structured.match ops{["linalg.pack"]} in %arg1
+        : (!transform.any_op) -> !transform.op<"linalg.pack">
       %paded, %expanded, %transpose = transform.structured.lower_pack %pack {lowerPadLikeWithInsertSlice = false}
-        : (!transform.op<"tensor.pack">)
+        : (!transform.op<"linalg.pack">)
         -> (!transform.op<"tensor.pad">,
             !transform.op<"tensor.expand_shape">,
             !transform.op<"linalg.transpose">)
@@ -72,7 +72,7 @@ module {
   func.func @fuse_pack_as_producer_blocked_by_insert_slice(%src: tensor<128x256xf32>, %other: tensor<4x4x128x256xf32>)
       -> tensor<4x4x128x256xf32> {
     %dest = tensor.empty() : tensor<1x1x128x256xf32>
-    %pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [128, 256]
+    %pack = linalg.pack %src inner_dims_pos = [0, 1] inner_tiles = [128, 256]
         into %dest : tensor<128x256xf32> -> tensor<1x1x128x256xf32>
 
     %out = tensor.empty() : tensor<4x4x128x256xf32>
@@ -94,10 +94,10 @@ module {
   module attributes {transform.with_named_sequence} {
     transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
       // Find and lower pack operation.
-      %pack = transform.structured.match ops{["tensor.pack"]} in %arg1
-        : (!transform.any_op) -> !transform.op<"tensor.pack">
+      %pack = transform.structured.match ops{["linalg.pack"]} in %arg1
+        : (!transform.any_op) -> !transform.op<"linalg.pack">
       %paded, %expanded, %transpose = transform.structured.lower_pack %pack
-        : (!transform.op<"tensor.pack">)
+        : (!transform.op<"linalg.pack">)
         -> (!transform.op<"tensor.pad">,
             !transform.op<"tensor.expand_shape">,
             !transform.op<"linalg.transpose">)
@@ -143,7 +143,7 @@ module {
     } -> tensor<1x1x128x256xf32>
 
     %dest = tensor.empty() : tensor<128x256xf32>
-    %unpack = tensor.unpack %res inner_dims_pos = [0, 1] inner_tiles = [128, 256]
+    %unpack = linalg.unpack %res inner_dims_pos = [0, 1] inner_tiles = [128, 256]
         into %dest : tensor<1x1x128x256xf32> -> tensor<128x256xf32>
 
     return %unpack : tensor<128x256xf32>
@@ -152,10 +152,10 @@ module {
   module attributes {transform.with_named_sequence} {
     transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
       // Find and lower unpack operation.
-      %unpack = transform.structured.match ops{["tensor.unpack"]} in %arg1
-          : (!transform.any_op) -> !transform.op<"tensor.unpack">
+      %unpack = transform.structured.match ops{["linalg.unpack"]} in %arg1
+          : (!transform.any_op) -> !transform.op<"linalg.unpack">
       transform.structured.lower_unpack %unpack {lowerUnpadLikeWithExtractSlice = false}
-        : (!transform.op<"tensor.unpack">)
+        : (!transform.op<"linalg.unpack">)
         -> (!transform.op<"tensor.empty">,
             !transform.op<"linalg.transpose">,
             !transform.op<"tensor.collapse_shape">,
@@ -204,7 +204,7 @@ module {
     } -> tensor<1x1x128x256xf32>
 
     %dest = tensor.empty() : tensor<128x256xf32>
-    %unpack = tensor.unpack %res inner_dims_pos = [0, 1] inner_tiles = [128, 256]
+    %unpack = linalg.unpack %res inner_dims_pos = [0, 1] inner_tiles = [128, 256]
         into %dest : tensor<1x1x128x256xf32> -> tensor<128x256xf32>
 
     return %unpack : tensor<128x256xf32>
@@ -213,10 +213,10 @@ module {
   module attributes {transform.with_named_sequence} {
     transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
       // Find and lower unpack operation.
-      %unpack = transform.structured.match ops{["tensor.unpack"]} in %arg1
-          : (!transform.any_op) -> !transform.op<"tensor.unpack">
+      %unpack = transform.structured.match ops{["linalg.unpack"]} in %arg1
+          : (!transform.any_op) -> !transform.op<"linalg.unpack">
       transform.structured.lower_unpack %unpack
-        : (!transform.op<"tensor.unpack">)
+        : (!transform.op<"linalg.unpack">)
         -> (!transform.op<"tensor.empty">,
             !transform.op<"linalg.transpose">,
             !transform.op<"tensor.collapse_shape">,
diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
index 8fbc74ec345c6..8f3b199145ce0 100644
--- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
@@ -115,13 +115,13 @@ module attributes {transform.with_named_sequence} {
 func.func @test_pack_no_vectorize_dynamic_shape(%arg0: tensor<?xf32>, %arg1: tensor<4x16xf32>) -> tensor<4x16xf32> {
   %pad = arith.constant 0.000000e+00 : f32
   // expected-error @+1 {{Attempted to vectorize, but failed}}
-  %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [0] inner_tiles = [16] into %arg1 : tensor<?xf32> -> tensor<4x16xf32>
+  %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [0] inner_tiles = [16] into %arg1 : tensor<?xf32> -> tensor<4x16xf32>
   return %pack : tensor<4x16xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     transform.structured.vectorize %0 : !transform.any_op
     transform.yield
   }
diff --git a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir
index 5ae3f893c2e73..9f2ee47b45b3e 100644
--- a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir
@@ -1944,13 +1944,13 @@ module attributes {transform.with_named_sequence} {
 // masking was used.
 
 func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> {
-  %pack = tensor.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32>
+  %pack = linalg.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32>
   return %pack : tensor<4x1x32x16x2xf32>
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
     %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
     transform.yield
@@ -1977,7 +1977,7 @@ module attributes {transform.with_named_sequence} {
 
 func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
   %pad = arith.constant 0.000000e+00 : f32
-  %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
+  %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
   return %pack : tensor<32x4x1x16x2xf32>
 }
 
@@ -1995,7 +1995,7 @@ func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
     %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
     transform.yield
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
index 6d39262945de5..c6d9ec6215715 100644
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -671,7 +671,7 @@ module attributes {transform.with_named_sequence} {
 // masking was used.
 
 func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> {
-  %pack = tensor.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32>
+  %pack = linalg.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32>
   return %pack : tensor<4x1x32x16x2xf32>
 }
 //  CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32
@@ -688,7 +688,7 @@ func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     transform.structured.vectorize %0 vector_sizes [4, 1, 32] : !transform.any_op
     transform.yield
   }
@@ -702,7 +702,7 @@ module attributes {transform.with_named_sequence} {
 
 func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
   %pad = arith.constant 0.000000e+00 : f32
-  %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
+  %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
   return %pack : tensor<32x4x1x16x2xf32>
 }
 //  CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32
@@ -725,7 +725,7 @@ func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     transform.structured.vectorize %0 vector_sizes [32, 4, 1] : !transform.any_op
     transform.yield
   }
@@ -734,7 +734,7 @@ module attributes {transform.with_named_sequence} {
 // -----
 
 func.func @test_vectorize_dynamic_pack(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?x16x2xf32>) -> tensor<?x?x16x2xf32> {
-  %pack = tensor.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg1 : tensor<?x?xf32> -> tensor<?x?x16x2xf32>
+  %pack = linalg.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg1 : tensor<?x?xf32> -> tensor<?x?x16x2xf32>
   return %pack : tensor<?x?x16x2xf32>
 }
 //  CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32
@@ -766,7 +766,7 @@ func.func @test_vectorize_dynamic_pack(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     transform.structured.vectorize %0 vector_sizes [4, 1] : !transform.any_op
     transform.yield
   }
@@ -893,12 +893,12 @@ func.func @test_vectorize_dynamic_shapes_unpack(%arg0: tensor<?x?xf32>, %arg1: t
 // CHECK: %[[writeMsk0:.*]] = vector.create_mask {{.*}} : vector<4x16xi1>
 // CHECK: %[[write0:.*]] = vector.mask %[[writeMsk0:.*]] {{.*}} vector.transfer_write %[[sc0]], %[[empt0]]
 // CHECK: return %[[write0]]
- %ret = tensor.unpack %arg1 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg0 : tensor<?x?x16x2xf32> -> tensor<?x?xf32>
+ %ret = linalg.unpack %arg1 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg0 : tensor<?x?x16x2xf32> -> tensor<?x?xf32>
  return %ret : tensor<?x?xf32>
 }
 module attributes {transform.with_named_sequence} {
  transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-   %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+   %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
    transform.structured.vectorize %0 vector_sizes [4, 16] : !transform.any_op
    transform.yield
  }
@@ -925,12 +925,12 @@ func.func @test_vectorize_unpack(%source: tensor<8x8x32x16xf32>, %dest: tensor<2
     // CHECK: %[[WRITEMSK:.*]] = vector.create_mask %[[C256]], %[[C128]] : vector<512x128xi1>
     // CHECK: %[[WRIT:.*]] = vector.mask %[[WRITEMSK]] {{.*}} : vector<512x128xi1> -> tensor<256x128xf32>
     // CHECK: return %[[WRIT]] : tensor<256x128xf32>
-   %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
+   %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
    return %0 : tensor<256x128xf32>
  }
  module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
    transform.structured.vectorize %0 vector_sizes [512, 128] : !transform.any_op
     transform.yield
   }
@@ -949,12 +949,12 @@ func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest:
   // CHECK: %[[C00:.*]] = arith.constant 0 : index
   // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32>
   // CHECK: return %[[WRIT]] : tensor<256x128xf32>
-   %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
+   %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
    return %0 : tensor<256x128xf32>
  }
  module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
    transform.structured.vectorize %0 vector_sizes [256, 128] : !transform.any_op
     transform.yield
   }
@@ -973,12 +973,12 @@ func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest:
   // CHECK: %[[C00:.*]] = arith.constant 0 : index
   // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32>
   // CHECK: return %[[WRIT]] : tensor<256x128xf32>
-   %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
+   %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
    return %0 : tensor<256x128xf32>
  }
  module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
    transform.structured.vectorize %0 vector_sizes [256, 128] : !transform.any_op
     transform.yield
   }
@@ -988,7 +988,7 @@ func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest:
 
 // CHECK-LABEL: test_vectorize_pack_no_vector_sizes
 func.func @test_vectorize_pack_no_vector_sizes(%arg0: tensor<64x4xf32>, %arg1: tensor<2x4x16x2xf32>) -> tensor<2x4x16x2xf32> {
-  %pack = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<64x4xf32> -> tensor<2x4x16x2xf32>
+  %pack = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<64x4xf32> -> tensor<2x4x16x2xf32>
   return %pack : tensor<2x4x16x2xf32>
 }
 //  CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32
@@ -1005,7 +1005,7 @@ func.func @test_vectorize_pack_no_vector_sizes(%arg0: tensor<64x4xf32>, %arg1: t
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     transform.structured.vectorize %0 : !transform.any_op
     transform.yield
   }
@@ -1016,7 +1016,7 @@ module attributes {transform.with_named_sequence} {
 // CHECK-LABEL: test_vectorize_padded_pack_no_vector_sizes
 func.func @test_vectorize_padded_pack_no_vector_sizes(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
   %pad = arith.constant 0.000000e+00 : f32
-  %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
+  %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
   return %pack : tensor<32x4x1x16x2xf32>
 }
 //  CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32
@@ -1033,7 +1033,7 @@ func.func @test_vectorize_padded_pack_no_vector_sizes(%arg0: tensor<32x7x15xf32>
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     transform.structured.vectorize %0 : !transform.any_op
     transform.yield
   }
@@ -1051,12 +1051,12 @@ func.func @test_vectorize_unpack_no_vector_sizes(%source: tensor<8x8x32x16xf32>,
   // CHECK: %[[C00:.*]] = arith.constant 0 : index
   // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32>
   // CHECK: return %[[WRIT]] : tensor<256x128xf32>
-   %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
+   %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
    return %0 : tensor<256x128xf32>
  }
  module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
    transform.structured.vectorize %0 : !transform.any_op
     transform.yield
   }
@@ -1075,12 +1075,12 @@ func.func @test_vectorize_unpack_no_vector_sizes_slice_output(%source: tensor<8x
   //      CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[EMPT]]{{\[}}%[[C00]], %[[C00]]]
   // CHECK-SAME:  {in_bounds = [true, false]} : vector<64x128xf32>, tensor<64x127xf32>
   //      CHECK: return %[[WRIT]] : tensor<64x127xf32>
-   %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %dest : tensor<8x4x16x16xf32> -> tensor<64x127xf32>
+   %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %dest : tensor<8x4x16x16xf32> -> tensor<64x127xf32>
    return %0 : tensor<64x127xf32>
  }
  module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
    transform.structured.vectorize %0 : !transform.any_op
     transform.yield
   }
@@ -1089,7 +1089,7 @@ func.func @test_vectorize_unpack_no_vector_sizes_slice_output(%source: tensor<8x
 // -----
 
 func.func @test_vectorize_unpack_no_vector_sizes_permute(%source: tensor<4x7x4xf32>, %dest: tensor<7x16xf32>) -> tensor<7x16xf32> {
-   %0 = tensor.unpack %source outer_dims_perm=[1, 0] inner_dims_pos = [1] inner_tiles = [4] into %dest : tensor<4x7x4xf32> -> tensor<7x16xf32>
+   %0 = linalg.unpack %source outer_dims_perm=[1, 0] inner_dims_pos = [1] inner_tiles = [4] into %dest : tensor<4x7x4xf32> -> tensor<7x16xf32>
    return %0 : tensor<7x16xf32>
  }
   // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
@@ -1103,7 +1103,7 @@ func.func @test_vectorize_unpack_no_vector_sizes_permute(%source: tensor<4x7x4xf
   // CHECK: return %[[WRIT]] : tensor<7x16xf32>
  module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
    transform.structured.vectorize %0 : !transform.any_op
     transform.yield
   }
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
index 01d14871072cd..90cc0ca658ffb 100644
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -899,225 +899,6 @@ func.func @fold_extract_constant_splat() -> (tensor<4x4xi32>) {
 
 // -----
 
-// CHECK-LABEL: func @fold_pack_constant_splat
-//   CHECK-NOT: tensor.pack
-//       CHECK: arith.constant dense<1.000000e-01> : tensor<8x16x8x32xf32>
-func.func @fold_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> {
-  %cst = arith.constant dense<1.000000e-01> : tensor<64x128xf32>
-  %0 = tensor.pack %cst outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
-    inner_tiles = [8, 32] into %dest : tensor<64x128xf32> -> tensor<8x16x8x32xf32>
-  return %0 : tensor<8x16x8x32xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @fold_padding_value_pack_constant_splat
-//   CHECK-NOT: tensor.pack
-//       CHECK: arith.constant dense<1.000000e-01> : tensor<8x16x8x32xf32>
-func.func @fold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> {
-  %pad = arith.constant 1.000000e-01 : f32
-  %cst = arith.constant dense<1.000000e-01> : tensor<63x127xf32>
-  %0 = tensor.pack %cst
-    padding_value(%pad : f32)
-    outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
-    inner_tiles = [8, 32] into %dest : tensor<63x127xf32> -> tensor<8x16x8x32xf32>
-  return %0 : tensor<8x16x8x32xf32>
-}
-
-
-// -----
-
-// CHECK-LABEL: func @nofold_padding_value_pack_constant_splat
-//       CHECK: arith.constant dense<1.000000e-01> : tensor<63x127xf32>
-//       CHECK: tensor.pack
-func.func @nofold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> {
-  %pad = arith.constant 0.0 : f32
-  %cst = arith.constant dense<1.000000e-01> : tensor<63x127xf32>
-  %0 = tensor.pack %cst
-    padding_value(%pad : f32)
-    outer_dims_perm = [1, 0]
-    inner_dims_pos = [0, 1]
-    inner_tiles = [8, 32]
-    into %dest : tensor<63x127xf32> -> tensor<8x16x8x32xf32>
-  return %0 : tensor<8x16x8x32xf32>
-}
-
-// -----
-
-func.func @fold_padding_value_pack(%arg0: tensor<1200x500000xf32>) -> tensor<31250x1200x16x1xf32> {
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = tensor.empty() : tensor<31250x1200x16x1xf32>
-  %pack = tensor.pack %arg0
-    padding_value(%cst : f32)
-    outer_dims_perm = [1, 0]
-    inner_dims_pos = [1, 0]
-    inner_tiles = [16, 1]
-    into %0 : tensor<1200x500000xf32> -> tensor<31250x1200x16x1xf32>
-  return %pack : tensor<31250x1200x16x1xf32>
-}
-// CHECK-LABEL: func @fold_padding_value_pack
-// CHECK-NOT:     padding_value
-
-// -----
-
-func.func @infer_src_shape_pack(%src: tensor<?x?x?x?xf32>, %dest: tensor<10x20x30x40x16xf32>) -> tensor<10x20x30x40x16xf32> {
-  %cst = arith.constant 0.000000e+00 : f32
-   %pack = tensor.pack %src
-    padding_value(%cst : f32)
-    outer_dims_perm = [2, 1, 3, 0]
-    inner_dims_pos = [2]
-    inner_tiles = [16]
-    into %dest : tensor<?x?x?x?xf32> -> tensor<10x20x30x40x16xf32>
-  return %pack : tensor<10x20x30x40x16xf32>
-}
-// CHECK-LABEL: func.func @infer_src_shape_pack
-// CHECK-SAME:    %[[SRC:[0-9a-zA-Z]+]]
-// CHECK-SAME:    %[[DEST:[0-9a-zA-Z]+]]
-// CHECK:         %[[CAST_SRC:.+]] = tensor.cast %[[SRC]] : tensor<?x?x?x?xf32> to tensor<40x20x?x30xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[CAST_SRC]] {{.+}} into %[[DEST]]
-// CHECK:         return %[[PACK]]
-
-// -----
-
-func.func @infer_dest_shape_pack(%src: tensor<30x20x?x10xf32>, %dest: tensor<?x?x?x?x16xf32>) -> tensor<?x?x?x?x16xf32> {
-  %cst = arith.constant 0.000000e+00 : f32
-   %pack = tensor.pack %src
-    padding_value(%cst : f32)
-    outer_dims_perm = [2, 1, 3, 0]
-    inner_dims_pos = [2]
-    inner_tiles = [16]
-    into %dest : tensor<30x20x?x10xf32> -> tensor<?x?x?x?x16xf32>
-  return %pack : tensor<?x?x?x?x16xf32>
-}
-// CHECK-LABEL: func.func @infer_dest_shape_pack
-// CHECK-SAME:    %[[SRC:[0-9a-zA-Z]+]]
-// CHECK-SAME:    %[[DEST:[0-9a-zA-Z]+]]
-// CHECK:         %[[CAST_DEST:.+]] = tensor.cast %[[DEST]] : tensor<?x?x?x?x16xf32> to tensor<?x20x10x30x16xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[SRC]] {{.+}} into %[[CAST_DEST]]
-// CHECK:         %[[CAST_PACK:.+]] = tensor.cast %[[PACK]] : tensor<?x20x10x30x16xf32> to tensor<?x?x?x?x16xf32>
-// CHECK:         return %[[CAST_PACK]]
-
-// -----
-
-func.func @no_infer_pack_shape(%arg0: tensor<?x32x100xf32>, %arg1: index) -> tensor<32x7x?x16x1xf32> {
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = tensor.empty(%arg1) : tensor<32x7x?x16x1xf32>
-  %pack = tensor.pack %arg0 padding_value(%cst : f32) outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 0] inner_tiles = [16, 1] into %0 : tensor<?x32x100xf32> -> tensor<32x7x?x16x1xf32>
-  return %pack : tensor<32x7x?x16x1xf32>
-}
-// CHECK-LABEL: func.func @no_infer_pack_shape
-// CHECK-NOT:     tensor.cast
-
-// -----
-
-func.func @fold_padding_value_pack_negative1(%arg0: tensor<1200x499999xf32>) -> tensor<31250x1200x16x1xf32> {
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = tensor.empty() : tensor<31250x1200x16x1xf32>
-  %pack = tensor.pack %arg0
-    padding_value(%cst : f32)
-    outer_dims_perm = [1, 0]
-    inner_dims_pos = [1, 0]
-    inner_tiles = [16, 1]
-    into %0 : tensor<1200x499999xf32> -> tensor<31250x1200x16x1xf32>
-  return %pack : tensor<31250x1200x16x1xf32>
-}
-// CHECK-LABEL: func @fold_padding_value_pack_negative1
-// CHECK:         tensor.pack
-// CHECK-SAME:      padding_value
-
-// -----
-
-func.func @fold_padding_value_pack_negative2(%arg0: tensor<1200x?xf32>, %arg1: tensor<?x1200x16x1xf32>) -> tensor<?x1200x16x1xf32> {
-  %cst = arith.constant 0.000000e+00 : f32
-  %pack = tensor.pack %arg0
-    padding_value(%cst : f32)
-    outer_dims_perm = [1, 0]
-    inner_dims_pos = [1, 0]
-    inner_tiles = [16, 1]
-    into %arg1 : tensor<1200x?xf32> -> tensor<?x1200x16x1xf32>
-  return %pack : tensor<?x1200x16x1xf32>
-}
-// CHECK-LABEL: func @fold_padding_value_pack_negative2
-// CHECK:         tensor.pack
-// CHECK-SAME:      padding_value
-
-// -----
-
-func.func @fold_padding_value_pack_negative3(%arg0: tensor<1200x500000xf32>, %arg1: tensor<?x1200x?x1xf32>, %tile : index) -> tensor<?x1200x?x1xf32> {
-  %cst = arith.constant 0.000000e+00 : f32
-  %pack = tensor.pack %arg0
-    padding_value(%cst : f32)
-    outer_dims_perm = [1, 0]
-    inner_dims_pos = [1, 0]
-    inner_tiles = [%tile, 1]
-    into %arg1 : tensor<1200x500000xf32> -> tensor<?x1200x?x1xf32>
-  return %pack : tensor<?x1200x?x1xf32>
-}
-// CHECK-LABEL: func @fold_padding_value_pack_negative3
-// CHECK:         tensor.pack
-// CHECK-SAME:      padding_value
-
-// -----
-
-// CHECK-LABEL: func @fold_unpack_constant_splat
-//   CHECK-NOT: tensor.unpack
-//       CHECK: arith.constant dense<1.000000e-01> : tensor<128x256xf32>
-func.func @fold_unpack_constant_splat(%dest : tensor<128x256xf32>) -> tensor<128x256xf32> {
-  %cst = arith.constant dense<1.000000e-01> : tensor<16x8x8x32xf32>
-  %0 = tensor.unpack %cst inner_dims_pos = [0, 1]
-    inner_tiles = [8, 32] into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32>
-  return %0 : tensor<128x256xf32>
-}
-
-// -----
-
-func.func @infer_dest_shape_unpack(%src: tensor<10x20x30x40x16xf32>, %dest: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
-  %unpack = tensor.unpack %src
-    outer_dims_perm = [2, 1, 3, 0]
-    inner_dims_pos = [2]
-    inner_tiles = [16]
-    into %dest : tensor<10x20x30x40x16xf32> -> tensor<?x?x?x?xf32>
-  return %unpack : tensor<?x?x?x?xf32>
-}
-// CHECK-LABEL: func.func @infer_dest_shape_unpack
-// CHECK-SAME:    %[[SRC:[0-9a-zA-Z]+]]
-// CHECK-SAME:    %[[DEST:[0-9a-zA-Z]+]]
-// CHECK:         %[[CAST_DEST:.+]] = tensor.cast %[[DEST]] : tensor<?x?x?x?xf32> to tensor<40x20x?x30xf32>
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[SRC]] {{.+}} into %[[CAST_DEST]]
-// CHECK:         %[[CAST_UNPACK:.+]] = tensor.cast %[[UNPACK]] : tensor<40x20x?x30xf32> to tensor<?x?x?x?xf32>
-// CHECK:         return %[[CAST_UNPACK]]
-
-// -----
-
-func.func @infer_src_shape_unpack(%src: tensor<?x?x?x?x16xf32>, %dest: tensor<30x20x?x10xf32>) -> tensor<30x20x?x10xf32> {
-  %unpack = tensor.unpack %src
-    outer_dims_perm = [2, 1, 3, 0]
-    inner_dims_pos = [2]
-    inner_tiles = [16]
-    into %dest : tensor<?x?x?x?x16xf32> -> tensor<30x20x?x10xf32>
-  return %unpack : tensor<30x20x?x10xf32>
-}
-// CHECK-LABEL: func.func @infer_src_shape_unpack
-// CHECK-SAME:    %[[SRC:[0-9a-zA-Z]+]]
-// CHECK-SAME:    %[[DEST:[0-9a-zA-Z]+]]
-// CHECK:         %[[CAST_SRC:.+]] = tensor.cast %[[SRC]] : tensor<?x?x?x?x16xf32> to tensor<?x20x10x30x16xf32>
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[CAST_SRC]]
-// CHECK:         return %[[UNPACK]]
-
-// -----
-
-func.func @no_infer_unpack_shape(%arg1: tensor<32x7x?x16x1xf32>, %arg2: index) -> tensor<?x32x100xf32> {
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = tensor.empty(%arg2) : tensor<?x32x100xf32>
-  %unpack = tensor.unpack %arg1 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 0] inner_tiles = [16, 1] into %0 : tensor<32x7x?x16x1xf32> -> tensor<?x32x100xf32>
-  return %unpack : tensor<?x32x100xf32>
-}
-// CHECK-LABEL: func.func @no_infer_unpack_shape
-// CHECK-NOT:     tensor.cast
-
-// -----
-
-
 // CHECK-LABEL: func @fold_overlapping_insert
 //  CHECK-SAME: %[[INPUT:.+]]: tensor<?x?x?xf32>, %{{.+}}: tensor<4x?x8xf32>, %[[SLICE2:.+]]: tensor<4x?x8xf32>
 func.func @fold_overlapping_insert(%input : tensor<?x?x?xf32>, %slice1: tensor<4x?x8xf32>, %slice2: tensor<4x?x8xf32>, %i: index, %size: index) -> (tensor<?x?x?xf32>) {
@@ -2370,174 +2151,6 @@ func.func @collapse_expand_fold_to_cast(%t: tensor<?xf32>, %sz0: index) -> (tens
 
 // -----
 
-// Chain: NC -> NCnc -> NCnc -> NC
-// CHECK: func.func @unpack_pack(
-// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>)
-// CHECK: return %[[T]] : tensor<128x128xf32>
-func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> {
-  %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32>
-  %packed = tensor.pack %t inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
-  %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
-  %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor<128x128xf32>
-  return %unpacked : tensor<128x128xf32>
-}
-
-// -----
-
-// Chain: NC -> NCcn -> NCnc -> NC
-// CHECK: func.func @unpack_pack(
-// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>)
-// CHECK-NOT: return %[[T]] : tensor<128x128xf32>
-func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> {
-  %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32>
-  %packed = tensor.pack %t inner_dims_pos = [1, 0] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
-  %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
-  %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor
-<128x128xf32>
-  return %unpacked : tensor<128x128xf32>
-}
-
-// -----
-
-// Chain: NC -> CNcn -> NCnc -> NC
-// CHECK: func.func @unpack_pack(
-// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>)
-// CHECK-NOT: return %[[T]] : tensor<128x128xf32>
-func.func @unpack_pack(%t: tensor<128x128xf32>) -> tensor<128x128xf32> {
-  %tensor_empty = tensor.empty() : tensor<16x16x8x8xf32>
-  %packed = tensor.pack %t outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 8] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
-  %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
-  %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<16x16x8x8xf32> -> tensor
-<128x128xf32>
-  return %unpacked : tensor<128x128xf32>
-}
-
-// -----
-
-// Chain: NC -> NCnc -> NCnc -> NC
-// CHECK: func.func @unpack_pack(
-// CHECK-SAME: %[[T:.+]]: tensor<128x128xf32>,
-// CHECK: return %[[T]] : tensor<128x128xf32>
-func.func @unpack_pack(%t: tensor<128x128xf32>, %tile1: index, %tile2: index) -> tensor<128x128xf32> {
-  %tensor_empty = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
-  %packed = tensor.pack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
-  %tensor_empty1 = tensor.empty() : tensor<128x128xf32>
-  %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<16x16x?x?xf32> -> tensor
-<128x128xf32>
-  return %unpacked : tensor<128x128xf32>
-}
-
-// -----
-
-// CHECK: func.func @unpack_pack_with_padding_no_canonicalization(
-// CHECK:         tensor.pack
-// CHECK:         tensor.unpack
-func.func @unpack_pack_with_padding_no_canonicalization(%t: tensor<256x512xbf16>) -> tensor<224x512xbf16> {
-  %tensor_empty = tensor.empty() : tensor<4x16x64x32xbf16>
-  %tensor_empty1 = tensor.empty() : tensor<224x512xbf16>
-  %packed = tensor.pack %t outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty : tensor<256x512xbf16> -> tensor<4x16x64x32xbf16>
-  %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty1 : tensor<4x16x64x32xbf16> -> tensor<224x512xbf16>
-  return %unpacked : tensor<224x512xbf16>
-}
-
-// -----
-
-// Chain NCnc -> NC -> NC -> NCnc
-// CHECK: func.func @pack_unpack(
-// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>,
-// CHECK: return %[[T]] : tensor<16x16x?x?xf32>
-func.func @pack_unpack(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> {
-  %tensor_empty = tensor.empty() : tensor<128x128xf32>
-  %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32>
-  %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
-  %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
-  return %packed : tensor<16x16x?x?xf32>
-}
-
-// -----
-
-// Chain NCnc -> NC -> NC -> NCnc
-// CHECK: func.func @pack_unpack(
-// CHECK-SAME: %[[T:.+]]: tensor<16x16x8x8xf32>
-// CHECK: return %[[T]] : tensor<16x16x8x8xf32>
-func.func @pack_unpack(%t: tensor<16x16x8x8xf32>) -> tensor<16x16x8x8xf32> {
-  %tensor_empty = tensor.empty() : tensor<128x128xf32>
-  %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty : tensor<16x16x8x8xf32> -> tensor<128x128xf32>
-  %tensor_empty1 = tensor.empty() : tensor<16x16x8x8xf32>
-  %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x8x8xf32>
-  return %packed : tensor<16x16x8x8xf32>
-}
-
-// -----
-
-// CHECK: func.func @pack_unpack_same_tiles(
-// CHECK-SAME:  %[[T:.+]]: tensor<?x?x?x?xf32>,
-// CHECK: return %[[T]] : tensor<?x?x?x?xf32>
-func.func @pack_unpack_same_tiles(%t: tensor<?x?x?x?xf32>, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index,
-                       %tile1: index, %tile2: index) -> tensor<?x?x?x?xf32> {
-  %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
-  %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
-  %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor<?x?x?x?xf32>
-  %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
-  return %packed : tensor<?x?x?x?xf32>
-}
-
-// -----
-
-// CHECK: func.func @pack_unpack_different_tiles(
-// CHECK-SAME:  %[[T:.+]]: tensor<?x?x?x?xf32>,
-// CHECK-NOT: return %[[T]] : tensor<?x?x?x?xf32>
-func.func @pack_unpack_different_tiles(%t: tensor<?x?x?x?xf32>, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index,
-                       %tile1: index, %tile2: index) -> tensor<?x?x?x?xf32> {
-  %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
-  %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
-  %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor<?x?x?x?xf32>
-  %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile2, %tile1] into %tensor_empty1 : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
-  return %packed : tensor<?x?x?x?xf32>
-}
-
-// -----
-
-// CHECK: func.func @pack_unpack_dynamic_with_padding(
-// CHECK-SAME:  %[[T:.+]]: tensor<?x?x?x?xf32>,
-// CHECK-NOT: return %[[T]] : tensor<?x?x?x?xf32>
-func.func @pack_unpack_dynamic_with_padding(%t: tensor<?x?x?x?xf32>, %dim1: index, %dim2: index, %dim3: index, %dim4: index, %dim5: index, %dim6: index,
-                       %tile1: index, %tile2: index, %pad: f32) -> tensor<?x?x?x?xf32> {
-  %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
-  %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
-  %tensor_empty1 = tensor.empty(%dim3, %dim4, %dim5, %dim6) : tensor<?x?x?x?xf32>
-  %packed = tensor.pack %unpacked padding_value(%pad: f32) inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
-  return %packed : tensor<?x?x?x?xf32>
-}
-
-// -----
-
-// CHECK: func.func @pack_outer_dims_unpack_no_outer_dims(
-// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>,
-// CHECK: return %[[T]] : tensor<16x16x?x?xf32>
-func.func @pack_outer_dims_unpack_no_outer_dims(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> {
-  %tensor_empty = tensor.empty() : tensor<128x128xf32>
-  %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32>
-  %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
-  %packed = tensor.pack %unpacked outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
-  return %packed : tensor<16x16x?x?xf32>
-}
-
-// -----
-
-// CHECK: func.func @pack_no_outer_dims_unpack_outer_dims(
-// CHECK-SAME: %[[T:.+]]: tensor<16x16x?x?xf32>,
-// CHECK: return %[[T]] : tensor<16x16x?x?xf32>
-func.func @pack_no_outer_dims_unpack_outer_dims(%t: tensor<16x16x?x?xf32>, %tile1: index, %tile2: index) -> tensor<16x16x?x?xf32> {
-  %tensor_empty = tensor.empty() : tensor<128x128xf32>
-  %unpacked = tensor.unpack %t outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty : tensor<16x16x?x?xf32> -> tensor<128x128xf32>
-  %tensor_empty1 = tensor.empty(%tile1, %tile2) : tensor<16x16x?x?xf32>
-  %packed = tensor.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [%tile1, %tile2] into %tensor_empty1 : tensor<128x128xf32> -> tensor<16x16x?x?xf32>
-  return %packed : tensor<16x16x?x?xf32>
-}
-
-// -----
-
 // CHECK: func.func @invalid_empty_negative_size
 // CHECK: %[[IDX:.*]] = index.constant
 // CHECK: %[[T:.*]] = tensor.empty(%[[IDX]]) : tensor<4x5x?xf32>
@@ -2551,22 +2164,6 @@ func.func @invalid_empty_negative_size() -> (tensor<4x5x?xf32>) {
 
 // -----
 
-// Fold DstStyleOp -> tensor.unpack operations.
-func.func @fold_dst_style_ops_into_unpack(%arg0 : tensor<?x?x16x64xf32>, %init : tensor<?x?xf32>) -> tensor<?x?xf32> {
-  %cst = arith.constant 0.0 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
-  %unpack = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [16, 64] into %fill : tensor<?x?x16x64xf32> -> tensor<?x?xf32>
-  return %unpack : tensor<?x?xf32>
-}
-// CHECK-LABEL: func @fold_dst_style_ops_into_unpack
-//  CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x16x64xf32>
-//  CHECK-SAME:     %[[INIT:.+]]: tensor<?x?xf32>
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
-//  CHECK-SAME:       into %[[INIT]]
-//       CHECK:   return %[[UNPACK]]
-
-// -----
-
 // The IR in this test case in invalid. This test tests that the canonicalizer
 // does not crash.
 
@@ -2598,21 +2195,6 @@ func.func @generate_negative_size_verifies() -> tensor<?x8xi32> {
   return %tensor : tensor<?x8xi32>
 }
 
-// -----
-
-func.func @infer_and_fold_pack_unpack_same_tiles(%t: tensor<10x20x4x4xf32>) -> tensor<10x20x4x4xf32> {
-  %dim1 = arith.constant 40 : index
-  %dim2 = arith.constant 80 : index
-  %tensor_empty = tensor.empty(%dim1, %dim2) : tensor<?x?xf32>
-  %unpacked = tensor.unpack %t inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %tensor_empty : tensor<10x20x4x4xf32> -> tensor<?x?xf32>
-  %cast = tensor.cast %unpacked : tensor<?x?xf32> to tensor<40x80xf32>
-  %tensor_empty1 = tensor.empty() : tensor<10x20x4x4xf32>
-  %packed = tensor.pack %cast inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %tensor_empty1 : tensor<40x80xf32> -> tensor<10x20x4x4xf32>
-  return %packed : tensor<10x20x4x4xf32>
-}
-// CHECK-LABEL: func.func @infer_and_fold_pack_unpack_same_tiles
-// CHECK-SAME:    %[[SRC:[0-9a-zA-Z]+]]
-// CHECK:         return %[[SRC]]
 
 // -----
 
@@ -2787,62 +2369,6 @@ func.func @fold_cast_multiple_results(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2x
   return %0#1 : index
 }
 
-// -----
-
-// CHECK-LABEL:   func.func @fold_cast_pack_dynamic_tile_size
-// CHECK-SAME:      %[[DEST:.*]]: tensor<1x1x8x1xi32>,
-// CHECK-SAME:      %[[SRC:.*]]: tensor<7x?xi32>,
-// CHECK-SAME:      %[[PAD:.*]]: i32) -> tensor<1x1x8x1xi32> {
-// CHECK:           %[[PACK:.*]] = tensor.pack %[[SRC]] padding_value(%[[PAD]] : i32)
-// CHECK-SAME:        inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[DEST]]
-// CHECK-SAME:        test_attr
-// CHECK-SAME:        : tensor<7x?xi32> -> tensor<1x1x8x1xi32>
-// CHECK:           return %[[PACK]] : tensor<1x1x8x1xi32>
-func.func @fold_cast_pack_dynamic_tile_size(
-  %dest: tensor<1x1x8x1xi32>,
-  %src: tensor<7x?xi32>,
-  %pad: i32) -> tensor<1x1x8x1xi32> {
-
-    %cast = tensor.cast %dest : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32>
-    %c8 = arith.constant 8 : index
-    %pack = tensor.pack %src padding_value(%pad : i32)
-      inner_dims_pos = [0, 1]
-      inner_tiles = [%c8, 1]
-      into %cast {test_attr} : tensor<7x?xi32> -> tensor<1x1x?x1xi32>
-    %res = tensor.cast %pack : tensor<1x1x?x1xi32> to tensor<1x1x8x1xi32>
-    return %res : tensor<1x1x8x1xi32>
-}
-
-// -----
-
-// CHECK-LABEL:   func.func @fold_cast_unpack_dynamic_tile_size(
-// CHECK-SAME:      %[[SRC:.*]]: tensor<1x1x8x1xi32>,
-// CHECK-SAME:      %[[DEST:.*]]: tensor<7x?xi32>) -> tensor<7x?xi32> {
-// CHECK:           %[[RES:.*]] = tensor.unpack %[[SRC]] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[DEST]] {test_attr} : tensor<1x1x8x1xi32> -> tensor<7x?xi32>
-// CHECK:           return %[[RES]] : tensor<7x?xi32>
-func.func @fold_cast_unpack_dynamic_tile_size(
-  %src: tensor<1x1x8x1xi32>,
-  %res: tensor<7x?xi32>) -> tensor<7x?xi32> {
-
-    %cast = tensor.cast %src : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32>
-    %c8 = arith.constant 8 : index
-    %unpack = tensor.unpack %cast
-      inner_dims_pos = [0, 1]
-      inner_tiles = [%c8, 1]
-      into %res {test_attr} : tensor<1x1x?x1xi32> -> tensor<7x?xi32>
-    return %unpack : tensor<7x?xi32>
-}
-
-// -----
-
-// CHECK-LABEL:   func.func @pack_dont_drop_attributes(
-// CHECK: tensor.pack {{.*}}  {test_attr}
-func.func @pack_dont_drop_attributes(%arg0: tensor<?x?x?xf16>, %arg1: tensor<128x?x100x16x1xf16>) -> tensor<128x?x100x16x1xf16> {
-  %c32_i64 = arith.constant 32 : i64
-  %cst = arith.constant 0.000000e+00 : f16
-  %pack = tensor.pack %arg0 padding_value(%cst : f16) outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %arg1 {test_attr} : tensor<?x?x?xf16> -> tensor<128x?x100x16x1xf16>
-  return %pack : tensor<128x?x100x16x1xf16>
-}
 
 // -----
 
diff --git a/mlir/test/Dialect/Tensor/fold-empty-op.mlir b/mlir/test/Dialect/Tensor/fold-empty-op.mlir
index 850bbcee34020..7b11c9f43c7ec 100644
--- a/mlir/test/Dialect/Tensor/fold-empty-op.mlir
+++ b/mlir/test/Dialect/Tensor/fold-empty-op.mlir
@@ -61,77 +61,6 @@ func.func @rank_reducing_empty_tensor_extract(%sz : index, %idx : index) -> tens
   return %r: tensor<2xf32>
 }
 
-func.func @pack_empty(%arg0: tensor<8x8x32x32xf32>) -> tensor<8x8x32x32xf32> {
-  %empty_unpacked = tensor.empty() : tensor<256x256xf32>
-  %packed = tensor.pack %empty_unpacked
-    inner_dims_pos = [0, 1] inner_tiles = [32, 32]
-    into %arg0 : tensor<256x256xf32> -> tensor<8x8x32x32xf32>
-  return %packed : tensor<8x8x32x32xf32>
-}
-
-// CHECK-LABEL: func.func @pack_empty(
-// CHECK-SAME:   %[[T:.+]]: tensor<8x8x32x32xf32>
-// CHECK-NOT:    tensor.pack
-// CHECK:        return %[[T]] : tensor<8x8x32x32xf32>
-
-func.func @pack_empty_dynamic(%arg0: tensor<?x?x32x32xf32>, %dim0: index, %dim1: index) -> tensor<?x?x32x32xf32> {
-  %empty_unpacked = tensor.empty(%dim0, %dim1) : tensor<?x?xf32>
-  %packed = tensor.pack %empty_unpacked
-    inner_dims_pos = [0, 1] inner_tiles = [32, 32]
-    into %arg0 : tensor<?x?xf32> -> tensor<?x?x32x32xf32>
-  return %packed : tensor<?x?x32x32xf32>
-}
-
-// CHECK-LABEL: func.func @pack_empty_dynamic(
-// CHECK-SAME:   %[[T:.+]]: tensor<?x?x32x32xf32>,
-// CHECK-SAME:   %[[DIM0:[a-zA-Z0-9_]+]]: index,
-// CHECK-SAME:   %[[DIM1:[a-zA-Z0-9_]+]]: index
-// CHECK-NOT:    tensor.pack
-// CHECK:        return %[[T]] : tensor<?x?x32x32xf32>
-
-func.func @unpack_empty(%arg0: tensor<256x256xf32>) -> tensor<256x256xf32> {
-  %empty_packed = tensor.empty() : tensor<8x8x32x32xf32>
-  %unpacked = tensor.unpack %empty_packed
-    inner_dims_pos = [0, 1] inner_tiles = [32, 32]
-    into %arg0 : tensor<8x8x32x32xf32> -> tensor<256x256xf32>
-  return %unpacked : tensor<256x256xf32>
-}
-
-// CHECK-LABEL: func.func @unpack_empty(
-// CHECK-SAME:   %[[T:.+]]: tensor<256x256xf32>
-// CHECK-NOT:    tensor.unpack
-// CHECK:        return %[[T]] : tensor<256x256xf32>
-
-func.func @unpack_empty_dynamic(%arg0: tensor<?x?xf32>, %dim0: index, %dim1: index) -> tensor<?x?xf32> {
-  %empty_packed = tensor.empty(%dim0, %dim1) : tensor<?x?x32x32xf32>
-  %unpacked = tensor.unpack %empty_packed
-    inner_dims_pos = [0, 1] inner_tiles = [32, 32]
-    into %arg0 : tensor<?x?x32x32xf32> -> tensor<?x?xf32>
-  return %unpacked : tensor<?x?xf32>
-}
-
-// CHECK-LABEL: func.func @unpack_empty_dynamic(
-// CHECK-SAME:   %[[T:.+]]: tensor<?x?xf32>,
-// CHECK-SAME:   %[[DIM0:[a-zA-Z0-9_]+]]: index,
-// CHECK-SAME:   %[[DIM1:[a-zA-Z0-9_]+]]: index
-// CHECK-NOT:    tensor.unpack
-// CHECK:        return %[[T]] : tensor<?x?xf32>
-
-func.func @pack_padded_empty(%arg0: tensor<8x8x32x32xf32>) -> tensor<8x8x32x32xf32> {
-  %pad = arith.constant 1.0 : f32
-  %empty_unpacked = tensor.empty() : tensor<256x256xf32>
-  %packed = tensor.pack %empty_unpacked
-    padding_value(%pad : f32)
-    inner_dims_pos = [0, 1] inner_tiles = [32, 32]
-    into %arg0 : tensor<256x256xf32> -> tensor<8x8x32x32xf32>
-  return %packed : tensor<8x8x32x32xf32>
-}
-
-// CHECK-LABEL: func.func @pack_padded_empty(
-// CHECK-SAME:   %[[T:.+]]: tensor<8x8x32x32xf32>
-// CHECK:        %[[PACK:.+]] = tensor.pack
-// CHECK:        return %[[PACK]] : tensor<8x8x32x32xf32>
-
 // -----
 
 module attributes {transform.with_named_sequence} {
diff --git a/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir b/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir
index bff913f5f55fe..84eb60248b8be 100644
--- a/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir
+++ b/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir
@@ -1,8 +1,8 @@
-// RUN: mlir-opt -split-input-file -test-tensor-transform-patterns=test-fold-into-pack-and-unpack  %s | FileCheck %s
+// RUN: mlir-opt -split-input-file -test-linalg-transform-patterns=test-fold-into-pack-and-unpack  %s | FileCheck %s
 
 func.func @fold_unpack_slice(%arg0 : tensor<?x?x8x4xf32>, %arg1 : tensor<?x?xf32>,
     %arg2 : index, %arg3 : index) -> tensor<?x?xf32> {
-  %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
+  %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
       : tensor<?x?x8x4xf32> -> tensor<?x?xf32>
   %1 = tensor.extract_slice %0[0, 0] [%arg2, %arg3] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
@@ -13,7 +13,7 @@ func.func @fold_unpack_slice(%arg0 : tensor<?x?x8x4xf32>, %arg1 : tensor<?x?xf32
 // CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]: index
 // CHECK-SAME:     %[[ARG3:[a-zA-Z0-9]+]]: index
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%[[ARG2]], %[[ARG3]]) : tensor<?x?xf32>
-//      CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] inner_dims_pos = [0, 1] inner_tiles = [8, 4]
+//      CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] inner_dims_pos = [0, 1] inner_tiles = [8, 4]
 // CHECK-SAME:       into %[[INIT]]
 //      CHECK:   return %[[UNPACK]]
 
@@ -21,39 +21,39 @@ func.func @fold_unpack_slice(%arg0 : tensor<?x?x8x4xf32>, %arg1 : tensor<?x?xf32
 
 func.func @nofold_unpack_slice_non_zero_offset(%arg0 : tensor<?x?x8x4xf32>, %arg1 : tensor<?x?xf32>,
     %arg2 : index, %arg3 : index, %arg4 : index) -> tensor<?x?xf32> {
-  %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
+  %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
       : tensor<?x?x8x4xf32> -> tensor<?x?xf32>
   %1 = tensor.extract_slice %0[0, %arg4] [%arg2, %arg3] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
 }
 // CHECK-LABEL: func @nofold_unpack_slice_non_zero_offset(
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack
 //       CHECK:   tensor.extract_slice %[[UNPACK]]
 
 // -----
 
 func.func @nofold_unpack_slice_non_unit_stride(%arg0 : tensor<?x?x8x4xf32>, %arg1 : tensor<?x?xf32>,
     %arg2 : index, %arg3 : index, %arg4 : index) -> tensor<?x?xf32> {
-  %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
+  %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
       : tensor<?x?x8x4xf32> -> tensor<?x?xf32>
   %1 = tensor.extract_slice %0[0, 0] [%arg2, %arg3] [%arg4, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
 }
 // CHECK-LABEL: func @nofold_unpack_slice_non_unit_stride(
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack
 //       CHECK:   tensor.extract_slice %[[UNPACK]]
 
 // -----
 
 func.func @nofold_unpack_slice_rank_reduced(%arg0 : tensor<?x?x8x4xf32>, %arg1 : tensor<?x?xf32>,
     %arg2 : index, %arg3 : index) -> tensor<f32> {
-  %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
+  %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
       : tensor<?x?x8x4xf32> -> tensor<?x?xf32>
   %1 = tensor.extract_slice %0[0, 0] [1, 1] [1, 1] : tensor<?x?xf32> to tensor<f32>
   return %1 : tensor<f32>
 }
 // CHECK-LABEL: func @nofold_unpack_slice_rank_reduced(
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack
 //       CHECK:   tensor.extract_slice %[[UNPACK]]
 
 // -----
@@ -66,7 +66,7 @@ func.func @pad_pack(%src: tensor<16641x16xf32>) -> tensor<2082x1x8x32xf32> {
     tensor.yield %cst : f32
   } : tensor<16641x16xf32> to tensor<16656x16xf32>
   %empty = tensor.empty() : tensor<2082x1x8x32xf32>
-  %pack = tensor.pack %padded padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty
+  %pack = linalg.pack %padded padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty
       : tensor<16656x16xf32> -> tensor<2082x1x8x32xf32>
   return %pack : tensor<2082x1x8x32xf32>
 }
@@ -74,7 +74,7 @@ func.func @pad_pack(%src: tensor<16641x16xf32>) -> tensor<2082x1x8x32xf32> {
 // CHECK-SAME:    %[[SRC:[a-zA-Z0-9]+]]
 // CHECK:         %[[PAD_VAL:.+]] = arith.constant 0.000000e+00 : f32
 // CHECK:         %[[DEST:.+]] = tensor.empty() : tensor<2082x1x8x32xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[SRC]]
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[SRC]]
 // CHECK-SAME:      padding_value(%[[PAD_VAL]] : f32)
 // CHECK-SAME:      inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %[[DEST]]
 
@@ -88,13 +88,13 @@ func.func @nofold_pad_pack(%src: tensor<16641x16xf32>) -> tensor<2082x1x8x32xf32
     tensor.yield %cst : f32
   } : tensor<16641x16xf32> to tensor<16656x16xf32>
   %empty = tensor.empty() : tensor<2082x1x8x32xf32>
-  %pack = tensor.pack %padded padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty
+  %pack = linalg.pack %padded padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty
       : tensor<16656x16xf32> -> tensor<2082x1x8x32xf32>
   return %pack : tensor<2082x1x8x32xf32>
 }
 // CHECK-LABEL: func.func @nofold_pad_pack
 // CHECK:         tensor.pad
-// CHECK:         tensor.pack
+// CHECK:         linalg.pack
 
 // -----
 
@@ -107,19 +107,19 @@ func.func @pad_pack_different_padding_value(%src: tensor<16641x16xf32>) -> tenso
     tensor.yield %cst0 : f32
   } : tensor<16641x16xf32> to tensor<16656x16xf32>
   %empty = tensor.empty() : tensor<2082x1x8x32xf32>
-  %pack = tensor.pack %padded padding_value(%cst1 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty
+  %pack = linalg.pack %padded padding_value(%cst1 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty
       : tensor<16656x16xf32> -> tensor<2082x1x8x32xf32>
   return %pack : tensor<2082x1x8x32xf32>
 }
 // CHECK-LABEL: func.func @pad_pack_different_padding_value
 // CHECK:         tensor.pad
-// CHECK:         tensor.pack
+// CHECK:         linalg.pack
 
 // -----
 
-func.func @tensor_pack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x57x56x2x32xf32> {
+func.func @linalg.pack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x57x56x2x32xf32> {
   %0 = tensor.empty() : tensor<56x2x1x57x32xf32>
-  %pack = tensor.pack %arg0
+  %pack = linalg.pack %arg0
     outer_dims_perm = [0, 3, 2, 1]
     inner_dims_pos = [3]
     inner_tiles = [32]
@@ -132,10 +132,10 @@ func.func @tensor_pack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> t
     permutation = [2, 3, 0, 1, 4]
   return %transposed : tensor<1x57x56x2x32xf32>
 }
-//      CHECK: func @tensor_pack_linalg_transpose_fold(
+//      CHECK: func @linalg.pack_linalg_transpose_fold(
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<56x57x1x64xf32>)
 //      CHECK:   %[[INIT:.+]] = tensor.empty() : tensor<1x57x56x2x32xf32>
-//      CHECK:   %[[PACK:.+]] = tensor.pack %[[ARG0]]
+//      CHECK:   %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [2, 1, 0, 3]
 // CHECK-SAME:      inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:       into %[[INIT]]
@@ -143,9 +143,9 @@ func.func @tensor_pack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> t
 
 // -----
 
-func.func @tensor_pack_linalg_transpose_fold_with_padding(%arg0: tensor<56x57x1x55xf32>, %padding: f32) -> tensor<1x57x56x2x32xf32> {
+func.func @linalg.pack_linalg_transpose_fold_with_padding(%arg0: tensor<56x57x1x55xf32>, %padding: f32) -> tensor<1x57x56x2x32xf32> {
   %0 = tensor.empty() : tensor<56x2x1x57x32xf32>
-  %pack = tensor.pack %arg0 padding_value(%padding : f32)
+  %pack = linalg.pack %arg0 padding_value(%padding : f32)
     outer_dims_perm = [0, 3, 2, 1]
     inner_dims_pos = [3]
     inner_tiles = [32]
@@ -158,10 +158,10 @@ func.func @tensor_pack_linalg_transpose_fold_with_padding(%arg0: tensor<56x57x1x
     permutation = [2, 3, 0, 1, 4]
   return %transposed : tensor<1x57x56x2x32xf32>
 }
-//      CHECK: func @tensor_pack_linalg_transpose_fold_with_padding(
+//      CHECK: func @linalg.pack_linalg_transpose_fold_with_padding(
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<56x57x1x55xf32>, %[[PADDING:.+]]: f32)
 //      CHECK:   %[[INIT:.+]] = tensor.empty() : tensor<1x57x56x2x32xf32>
-//      CHECK:   %[[PACK:.+]] = tensor.pack %[[ARG0]] padding_value(%[[PADDING]] : f32)
+//      CHECK:   %[[PACK:.+]] = linalg.pack %[[ARG0]] padding_value(%[[PADDING]] : f32)
 // CHECK-SAME:      outer_dims_perm = [2, 1, 0, 3]
 // CHECK-SAME:      inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:       into %[[INIT]]
@@ -169,9 +169,9 @@ func.func @tensor_pack_linalg_transpose_fold_with_padding(%arg0: tensor<56x57x1x
 
 // -----
 
-func.func @tensor_pack_linalg_transpose_fold_no_outer_dims_perm(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x2x56x57x32xf32> {
+func.func @linalg.pack_linalg_transpose_fold_no_outer_dims_perm(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x2x56x57x32xf32> {
   %0 = tensor.empty() : tensor<56x57x1x2x32xf32>
-  %pack = tensor.pack %arg0
+  %pack = linalg.pack %arg0
     inner_dims_pos = [3]
     inner_tiles = [32]
     into %0 : tensor<56x57x1x64xf32> -> tensor<56x57x1x2x32xf32>
@@ -183,10 +183,10 @@ func.func @tensor_pack_linalg_transpose_fold_no_outer_dims_perm(%arg0: tensor<56
     permutation = [2, 3, 0, 1, 4]
   return %transposed : tensor<1x2x56x57x32xf32>
 }
-//      CHECK: func @tensor_pack_linalg_transpose_fold_no_outer_dims_perm(
+//      CHECK: func @linalg.pack_linalg_transpose_fold_no_outer_dims_perm(
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<56x57x1x64xf32>)
 //      CHECK:   %[[INIT:.+]] = tensor.empty() : tensor<1x2x56x57x32xf32>
-//      CHECK:   %[[PACK:.+]] = tensor.pack %[[ARG0]]
+//      CHECK:   %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [2, 3, 0, 1]
 // CHECK-SAME:      inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:       into %[[INIT]]
@@ -194,9 +194,9 @@ func.func @tensor_pack_linalg_transpose_fold_no_outer_dims_perm(%arg0: tensor<56
 
 // -----
 
-func.func @tensor_pack_linalg_transpose_fold_tile_dims_transpose(%arg0: tensor<56x72x24x128xf32>) -> tensor<12x56x4x9x32x8x2xf32> {
+func.func @linalg.pack_linalg_transpose_fold_tile_dims_transpose(%arg0: tensor<56x72x24x128xf32>) -> tensor<12x56x4x9x32x8x2xf32> {
   %0 = tensor.empty() : tensor<4x9x12x56x8x2x32xf32>
-  %pack = tensor.pack %arg0
+  %pack = linalg.pack %arg0
     outer_dims_perm = [3, 1, 2, 0]
     inner_dims_pos = [1, 2, 3]
     inner_tiles = [8, 2, 32]
@@ -209,10 +209,10 @@ func.func @tensor_pack_linalg_transpose_fold_tile_dims_transpose(%arg0: tensor<5
     permutation = [2, 3, 0, 1, 6, 4, 5]
   return %transposed : tensor<12x56x4x9x32x8x2xf32>
 }
-//      CHECK: func @tensor_pack_linalg_transpose_fold_tile_dims_transpose(
+//      CHECK: func @linalg.pack_linalg_transpose_fold_tile_dims_transpose(
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<56x72x24x128xf32>)
 //      CHECK:   %[[INIT:.+]] = tensor.empty() : tensor<12x56x4x9x32x8x2xf32>
-//      CHECK:   %[[PACK:.+]] = tensor.pack %[[ARG0]]
+//      CHECK:   %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [2, 0, 3, 1]
 // CHECK-SAME:      inner_dims_pos = [3, 1, 2] inner_tiles = [32, 8, 2]
 // CHECK-SAME:       into %[[INIT]]
@@ -220,9 +220,9 @@ func.func @tensor_pack_linalg_transpose_fold_tile_dims_transpose(%arg0: tensor<5
 
 // -----
 
-func.func @tensor_pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(%arg0: tensor<56x72x24x128xf32>) -> tensor<9x56x2x12x32x8x4xf32> {
+func.func @linalg.pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(%arg0: tensor<56x72x24x128xf32>) -> tensor<9x56x2x12x32x8x4xf32> {
   %0 = tensor.empty() : tensor<4x12x9x56x8x2x32xf32>
-  %pack = tensor.pack %arg0
+  %pack = linalg.pack %arg0
     outer_dims_perm = [3, 2, 1, 0]
     inner_dims_pos = [1, 2, 3]
     inner_tiles = [8, 2, 32]
@@ -235,16 +235,16 @@ func.func @tensor_pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(%arg
     permutation = [2, 3, 5, 1, 6, 4, 0]
   return %transposed : tensor<9x56x2x12x32x8x4xf32>
 }
-//      CHECK: func @tensor_pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(
+//      CHECK: func @linalg.pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<56x72x24x128xf32>)
-//      CHECK:   tensor.pack
+//      CHECK:   linalg.pack
 //      CHECK:   linalg.transpose
 
 // -----
 
-func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims(%arg0: tensor<56x?x?x64xf32>) -> tensor<?x?x56x2x32xf32> {
+func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_dims(%arg0: tensor<56x?x?x64xf32>) -> tensor<?x?x56x2x32xf32> {
   %0 = tensor.empty() : tensor<56x2x1x57x32xf32>
-  %pack = tensor.pack %arg0
+  %pack = linalg.pack %arg0
     outer_dims_perm = [0, 3, 2, 1]
     inner_dims_pos = [3]
     inner_tiles = [32]
@@ -259,14 +259,14 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims(%arg0: tensor<56
   %return_value = tensor.cast %transposed : tensor<1x57x56x2x32xf32> to tensor<?x?x56x2x32xf32>
   return %return_value : tensor<?x?x56x2x32xf32>
 }
-//      CHECK: func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims(
+//      CHECK: func @linalg.pack_linalg_transpose_fold_dynamic_outer_dims(
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<56x?x?x64xf32>)
 //  CHECK-DAG:   %[[c1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:   %[[c2:.+]] = arith.constant 2 : index
 //      CHECK:   %[[dim:.+]] = tensor.dim %[[ARG0]], %[[c1]] : tensor<56x?x?x64xf32>
 //      CHECK:   %[[dim_0:.+]] = tensor.dim %[[ARG0]], %[[c2]] : tensor<56x?x?x64xf32>
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%[[dim_0]], %[[dim]]) : tensor<?x?x56x2x32xf32>
-//      CHECK:   %[[PACK:.+]] = tensor.pack %[[ARG0]]
+//      CHECK:   %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [2, 1, 0, 3]
 // CHECK-SAME:      inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:       into %[[INIT]]
@@ -274,9 +274,9 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims(%arg0: tensor<56
 
 // -----
 
-func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(%arg0: tensor<56x?x?x128xf32>) -> tensor<?x?x56x9x32x8x2xf32> {
+func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(%arg0: tensor<56x?x?x128xf32>) -> tensor<?x?x56x9x32x8x2xf32> {
   %0 = tensor.empty() : tensor<56x9x12x4x8x2x32xf32>
-  %pack = tensor.pack %arg0
+  %pack = linalg.pack %arg0
     inner_dims_pos = [1, 2, 3]
     inner_tiles = [8, 2, 32]
     into %0 : tensor<56x?x?x128xf32> -> tensor<56x9x12x4x8x2x32xf32>
@@ -292,7 +292,7 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(%arg0:
 }
 //   CHECK-DAG: #[[$MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
 //   CHECK-DAG: #[[$MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)>
-// CHECK-LABEL:   func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(
+// CHECK-LABEL:   func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(
 //  CHECK-SAME:   %[[ARG0:.+]]: tensor<56x?x?x128xf32>)
 //   CHECK-DAG:     %[[c1:.+]] = arith.constant 1 : index
 //   CHECK-DAG:     %[[c2:.+]] = arith.constant 2 : index
@@ -301,15 +301,15 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(%arg0:
 //       CHECK:     %[[mapped_dim1:.+]] = affine.apply #[[$MAP0]]()[%[[dim]]]
 //       CHECK:     %[[mapped_dim2:.+]] = affine.apply #[[$MAP1]]()[%[[dim_0]]]
 //       CHECK:     %[[INIT:.+]] = tensor.empty(%[[mapped_dim2]], %[[mapped_dim1]]) : tensor<?x4x56x?x32x8x2xf32>
-//       CHECK:     %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [2, 3, 0, 1] inner_dims_pos = [3, 1, 2] inner_tiles = [32, 8, 2] into %[[INIT]] : tensor<56x?x?x128xf32> -> tensor<?x4x56x?x32x8x2xf32>
+//       CHECK:     %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [2, 3, 0, 1] inner_dims_pos = [3, 1, 2] inner_tiles = [32, 8, 2] into %[[INIT]] : tensor<56x?x?x128xf32> -> tensor<?x4x56x?x32x8x2xf32>
 //       CHECK:     %[[CAST:.+]] = tensor.cast %[[PACK]] : tensor<?x4x56x?x32x8x2xf32> to tensor<?x?x56x9x32x8x2xf32>
 //       CHECK:     return %[[CAST]] : tensor<?x?x56x9x32x8x2xf32>
 //       CHECK:   }
 
 // -----
 
-func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor<?x?x?x?xf32>, %pack_dest: tensor<?x?x?x?x?x?x?xf32>, %transpose_dest: tensor<?x?x?x?x?x?x?xf32>, %tile_p : index, %tile_q : index, %tile_r : index) -> tensor<?x?x?x?x?x?x?xf32> {
-  %pack = tensor.pack %arg0
+func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor<?x?x?x?xf32>, %pack_dest: tensor<?x?x?x?x?x?x?xf32>, %transpose_dest: tensor<?x?x?x?x?x?x?xf32>, %tile_p : index, %tile_q : index, %tile_r : index) -> tensor<?x?x?x?x?x?x?xf32> {
+  %pack = linalg.pack %arg0
     outer_dims_perm = [3, 0, 2, 1]
     inner_dims_pos = [1, 2, 3]
     inner_tiles = [%tile_p, %tile_q, %tile_r]
@@ -324,7 +324,7 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_s
 }
 //      CHECK: #[[$MAP:.+]] = affine_map<()[s0, s1] -> (s0 ceildiv s1)>
 //      CHECK: module {
-//      CHECK:   func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_sizes(
+//      CHECK:   func.func @linalg.pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_sizes(
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?x?x?xf32>,
 // CHECK-SAME:   %[[PACK_DEST:.+]]: tensor<?x?x?x?x?x?x?xf32>, %[[TRANSPOSE_DEST:.+]]: tensor<?x?x?x?x?x?x?xf32>,
 // CHECK-SAME:   %[[ARG1:.+]]: index, %[[ARG2:.+]]: index,
@@ -341,13 +341,13 @@ func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_s
 //      CHECK:     %[[mapped_dim1:.+]] = affine.apply #[[$MAP]]()[%[[dim_0]], %[[ARG1]]]
 //      CHECK:     %[[mapped_dim2:.+]] = affine.apply #[[$MAP]]()[%[[dim_1]], %[[ARG2]]]
 //      CHECK:     %[[INIT:.+]] = tensor.empty(%[[mapped_dim2]], %[[mapped_dim1]], %[[mapped_dim0]], %[[dim]], %[[ARG3]], %[[ARG1]], %[[ARG2]]) : tensor<?x?x?x?x?x?x?xf32>
-//      CHECK:     %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [2, 1, 3, 0] inner_dims_pos = [3, 1, 2] inner_tiles = [%[[ARG3]], %[[ARG1]], %[[ARG2]]] into %[[INIT]] : tensor<?x?x?x?xf32> -> tensor<?x?x?x?x?x?x?xf32>
+//      CHECK:     %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [2, 1, 3, 0] inner_dims_pos = [3, 1, 2] inner_tiles = [%[[ARG3]], %[[ARG1]], %[[ARG2]]] into %[[INIT]] : tensor<?x?x?x?xf32> -> tensor<?x?x?x?x?x?x?xf32>
 //      CHECK:     return %[[PACK]] : tensor<?x?x?x?x?x?x?xf32>
 //      CHECK:   }
 
 // -----
 
-func.func @linalg_transpose_tensor_pack_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x57x56x2x32xf32> {
+func.func @linalg_transpose_linalg.pack_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x57x56x2x32xf32> {
   %0 = tensor.empty() : tensor<1x56x57x64xf32>
   %transposed = linalg.transpose
     ins(%arg0 : tensor<56x57x1x64xf32>)
@@ -355,17 +355,17 @@ func.func @linalg_transpose_tensor_pack_fold(%arg0: tensor<56x57x1x64xf32>) -> t
     permutation = [2, 0, 1, 3]
 
   %1 = tensor.empty() : tensor<1x57x56x2x32xf32>
-  %pack = tensor.pack %transposed
+  %pack = linalg.pack %transposed
     outer_dims_perm = [0, 2, 1, 3]
     inner_dims_pos = [3]
     inner_tiles = [32]
     into %1 : tensor<1x56x57x64xf32> -> tensor<1x57x56x2x32xf32>
   return %pack : tensor<1x57x56x2x32xf32>
 }
-//CHECK-LABEL: func @linalg_transpose_tensor_pack_fold(
+//CHECK-LABEL: func @linalg_transpose_linalg.pack_fold(
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<56x57x1x64xf32>)
 //      CHECK:   %[[INIT:.+]] = tensor.empty() : tensor<1x57x56x2x32xf32>
-//      CHECK:   %[[PACK:.+]] = tensor.pack %[[ARG0]]
+//      CHECK:   %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [2, 1, 0, 3]
 // CHECK-SAME:      inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:       into %[[INIT]]
@@ -373,7 +373,7 @@ func.func @linalg_transpose_tensor_pack_fold(%arg0: tensor<56x57x1x64xf32>) -> t
 
 // -----
 
-func.func @linalg_transpose_tensor_pack_fold_with_padding(%arg0: tensor<56x57x1x55xf32>, %padding: f32) -> tensor<1x57x56x2x32xf32> {
+func.func @linalg_transpose_linalg.pack_fold_with_padding(%arg0: tensor<56x57x1x55xf32>, %padding: f32) -> tensor<1x57x56x2x32xf32> {
   %0 = tensor.empty() : tensor<1x56x57x55xf32>
   %transpose = linalg.transpose
     ins(%arg0 : tensor<56x57x1x55xf32>)
@@ -381,17 +381,17 @@ func.func @linalg_transpose_tensor_pack_fold_with_padding(%arg0: tensor<56x57x1x
     permutation = [2, 0, 1, 3]
 
   %1 = tensor.empty() : tensor<1x57x56x2x32xf32>
-  %pack = tensor.pack %transpose padding_value(%padding : f32)
+  %pack = linalg.pack %transpose padding_value(%padding : f32)
     outer_dims_perm = [0, 2, 1, 3]
     inner_dims_pos = [3]
     inner_tiles = [32]
     into %1 : tensor<1x56x57x55xf32> -> tensor<1x57x56x2x32xf32>
   return %pack : tensor<1x57x56x2x32xf32>
 }
-//CHECK-LABEL: func @linalg_transpose_tensor_pack_fold_with_padding(
+//CHECK-LABEL: func @linalg_transpose_linalg.pack_fold_with_padding(
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<56x57x1x55xf32>, %[[PADDING:.+]]: f32)
 //      CHECK:   %[[INIT:.+]] = tensor.empty() : tensor<1x57x56x2x32xf32>
-//      CHECK:   %[[PACK:.+]] = tensor.pack %[[ARG0]] padding_value(%[[PADDING]] : f32)
+//      CHECK:   %[[PACK:.+]] = linalg.pack %[[ARG0]] padding_value(%[[PADDING]] : f32)
 // CHECK-SAME:      outer_dims_perm = [2, 1, 0, 3]
 // CHECK-SAME:      inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:       into %[[INIT]]
@@ -399,7 +399,7 @@ func.func @linalg_transpose_tensor_pack_fold_with_padding(%arg0: tensor<56x57x1x
 
 // -----
 
-func.func @linalg_transpose_tensor_pack_fold_no_outer_dims_perm(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x56x57x2x32xf32> {
+func.func @linalg_transpose_linalg.pack_fold_no_outer_dims_perm(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x56x57x2x32xf32> {
   %0 = tensor.empty() : tensor<1x56x57x64xf32>
   %transposed = linalg.transpose
     ins(%arg0 : tensor<56x57x1x64xf32>)
@@ -407,16 +407,16 @@ func.func @linalg_transpose_tensor_pack_fold_no_outer_dims_perm(%arg0: tensor<56
     permutation = [2, 0, 1, 3]
 
   %1 = tensor.empty() : tensor<1x56x57x2x32xf32>
-  %pack = tensor.pack %transposed
+  %pack = linalg.pack %transposed
     inner_dims_pos = [3]
     inner_tiles = [32]
     into %1 : tensor<1x56x57x64xf32> -> tensor<1x56x57x2x32xf32>
   return %pack : tensor<1x56x57x2x32xf32>
 }
-//CHECK-LABEL: func @linalg_transpose_tensor_pack_fold_no_outer_dims_perm(
+//CHECK-LABEL: func @linalg_transpose_linalg.pack_fold_no_outer_dims_perm(
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<56x57x1x64xf32>)
 //      CHECK:   %[[INIT:.+]] = tensor.empty() : tensor<1x56x57x2x32xf32>
-//      CHECK:   %[[PACK:.+]] = tensor.pack %[[ARG0]]
+//      CHECK:   %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      outer_dims_perm = [2, 0, 1, 3]
 // CHECK-SAME:      inner_dims_pos = [3] inner_tiles = [32]
 // CHECK-SAME:       into %[[INIT]]
@@ -424,25 +424,25 @@ func.func @linalg_transpose_tensor_pack_fold_no_outer_dims_perm(%arg0: tensor<56
 
 // -----
 
-func.func @linalg_transpose_tensor_pack_fold_complex_inner_dims_change(%arg0: tensor<25x30x35x40xf32>, %transpose_dest: tensor<35x40x25x30xf32>, %pack_dest: tensor<3x35x5x8x5x10x5xf32>) -> tensor<3x35x5x8x5x10x5xf32> {
+func.func @linalg_transpose_linalg.pack_fold_complex_inner_dims_change(%arg0: tensor<25x30x35x40xf32>, %transpose_dest: tensor<35x40x25x30xf32>, %pack_dest: tensor<3x35x5x8x5x10x5xf32>) -> tensor<3x35x5x8x5x10x5xf32> {
   %transposed = linalg.transpose
     ins(%arg0 : tensor<25x30x35x40xf32>)
     outs(%transpose_dest : tensor<35x40x25x30xf32>)
     permutation = [2, 3, 0, 1]
 
-  %pack = tensor.pack %transposed
+  %pack = linalg.pack %transposed
     outer_dims_perm = [3, 0, 2, 1]
     inner_dims_pos = [1, 3, 2]
     inner_tiles = [5, 10, 5]
     into %pack_dest : tensor<35x40x25x30xf32> -> tensor<3x35x5x8x5x10x5xf32>
   return %pack : tensor<3x35x5x8x5x10x5xf32>
 }
-//CHECK-LABEL:   func.func @linalg_transpose_tensor_pack_fold_complex_inner_dims_change(
+//CHECK-LABEL:   func.func @linalg_transpose_linalg.pack_fold_complex_inner_dims_change(
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<25x30x35x40xf32>,
 // CHECK-SAME:     %[[ARG1:.+]]: tensor<35x40x25x30xf32>,
 // CHECK-SAME:     %[[ARG2:.+]]: tensor<3x35x5x8x5x10x5xf32>) -> tensor<3x35x5x8x5x10x5xf32> {
 //      CHECK:     %[[VAL0:.+]] = tensor.empty() : tensor<3x35x5x8x5x10x5xf32>
-//      CHECK:     %[[PACK:.+]] = tensor.pack %[[ARG0]]
+//      CHECK:     %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:        outer_dims_perm = [1, 2, 0, 3]
 // CHECK-SAME:        inner_dims_pos = [3, 1, 0]
 // CHECK-SAME:        inner_tiles = [5, 10, 5]
@@ -451,13 +451,13 @@ func.func @linalg_transpose_tensor_pack_fold_complex_inner_dims_change(%arg0: te
 
 // -----
 
-func.func @linalg_transpose_tensor_pack_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor<?x?x?x?xf32>, %transpose_dest: tensor<?x?x?x?xf32>, %pack_dest: tensor<?x?x?x?x?x?x?xf32>, %tile_p : index, %tile_q : index, %tile_r : index) -> tensor<?x?x?x?x?x?x?xf32> {
+func.func @linalg_transpose_linalg.pack_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor<?x?x?x?xf32>, %transpose_dest: tensor<?x?x?x?xf32>, %pack_dest: tensor<?x?x?x?x?x?x?xf32>, %tile_p : index, %tile_q : index, %tile_r : index) -> tensor<?x?x?x?x?x?x?xf32> {
   %transposed = linalg.transpose
     ins(%arg0 : tensor<?x?x?x?xf32>)
     outs(%transpose_dest : tensor<?x?x?x?xf32>)
     permutation = [2, 3, 0, 1]
 
-  %pack = tensor.pack %transposed
+  %pack = linalg.pack %transposed
     outer_dims_perm = [3, 0, 2, 1]
     inner_dims_pos = [1, 3, 2]
     inner_tiles = [%tile_p, %tile_q, %tile_r]
@@ -465,7 +465,7 @@ func.func @linalg_transpose_tensor_pack_fold_dynamic_outer_dims_tile_dims_tile_s
   return %pack : tensor<?x?x?x?x?x?x?xf32>
 }
 //      CHECK:   #[[$MAP:.+]] = affine_map<()[s0, s1] -> (s0 ceildiv s1)>
-//CHECK-LABEL:   func.func @linalg_transpose_tensor_pack_fold_dynamic_outer_dims_tile_dims_tile_sizes(
+//CHECK-LABEL:   func.func @linalg_transpose_linalg.pack_fold_dynamic_outer_dims_tile_dims_tile_sizes(
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?x?x?xf32>, %[[ARG1:.+]]: tensor<?x?x?x?xf32>,
 // CHECK-SAME:   %[[ARG2:.+]]: tensor<?x?x?x?x?x?x?xf32>, %[[ARG3:.+]]: index, %[[ARG4:.+]]: index, %[[ARG5:.+]]: index) -> tensor<?x?x?x?x?x?x?xf32> {
 //      CHECK-DAG:     %[[C0:.+]] = arith.constant 0 : index
@@ -480,12 +480,12 @@ func.func @linalg_transpose_tensor_pack_fold_dynamic_outer_dims_tile_dims_tile_s
 //      CHECK:     %[[VAL1:.+]] = affine.apply #[[$MAP]]()[%[[DIM0]], %[[ARG4]]]
 //      CHECK:     %[[VAL2:.+]] = affine.apply #[[$MAP]]()[%[[DIM]], %[[ARG5]]]
 //      CHECK:     %[[VAL3:.+]] = tensor.empty(%[[VAL1]], %[[DIM1]], %[[VAL2]], %[[VAL0]], %[[ARG3]], %[[ARG4]], %[[ARG5]]) : tensor<?x?x?x?x?x?x?xf32>
-//      CHECK:     %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [1, 2, 0, 3] inner_dims_pos = [3, 1, 0] inner_tiles = [%[[ARG3]], %[[ARG4]], %[[ARG5]]] into %[[VAL3]] : tensor<?x?x?x?xf32> -> tensor<?x?x?x?x?x?x?xf32>
+//      CHECK:     %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [1, 2, 0, 3] inner_dims_pos = [3, 1, 0] inner_tiles = [%[[ARG3]], %[[ARG4]], %[[ARG5]]] into %[[VAL3]] : tensor<?x?x?x?xf32> -> tensor<?x?x?x?x?x?x?xf32>
 //      CHECK:     return %[[PACK]] : tensor<?x?x?x?x?x?x?xf32>
 
 // -----
 
-func.func @linalg_transpose_tensor_pack_multiple_tiles(%arg0: tensor<?x32x128xbf16>) -> tensor<32x?x64x16x2xbf16> {
+func.func @linalg_transpose_linalg.pack_multiple_tiles(%arg0: tensor<?x32x128xbf16>) -> tensor<32x?x64x16x2xbf16> {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : bf16
   %dim = tensor.dim %arg0, %c0 : tensor<?x32x128xbf16>
@@ -497,7 +497,7 @@ func.func @linalg_transpose_tensor_pack_multiple_tiles(%arg0: tensor<?x32x128xbf
     permutation = [1, 2, 0]
 
   %2 = tensor.empty(%dim) : tensor<32x?x64x16x2xbf16>
-  %pack = tensor.pack %transposed
+  %pack = linalg.pack %transposed
     padding_value(%cst : bf16)
     outer_dims_perm = [0, 2, 1]
     inner_dims_pos = [2, 1]
@@ -506,14 +506,14 @@ func.func @linalg_transpose_tensor_pack_multiple_tiles(%arg0: tensor<?x32x128xbf
   return %pack : tensor<32x?x64x16x2xbf16>
 }
 //      CHECK:   #[[$MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)>
-//CHECK-LABEL:   func.func @linalg_transpose_tensor_pack_multiple_tiles(
+//CHECK-LABEL:   func.func @linalg_transpose_linalg.pack_multiple_tiles(
 // CHECK-SAME:    %[[ARG0:.+]]: tensor<?x32x128xbf16>) -> tensor<32x?x64x16x2xbf16> {
 //      CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
 //      CHECK-DAG:   %[[CST:.+]] = arith.constant 0.000000e+00 : bf16
 //      CHECK:   %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x32x128xbf16>
 //      CHECK:   %[[VAL0:.+]] = affine.apply #[[$MAP]]()[%[[DIM]]]
 //      CHECK:   %[[VAL1:.+]] = tensor.empty(%[[VAL0]]) : tensor<32x?x64x16x2xbf16>
-//      CHECK:   %[[PACK:.+]] = tensor.pack %[[ARG0]]
+//      CHECK:   %[[PACK:.+]] = linalg.pack %[[ARG0]]
 // CHECK-SAME:      padding_value(%[[CST]] : bf16)
 // CHECK-SAME:      outer_dims_perm = [1, 0, 2]
 // CHECK-SAME:      inner_dims_pos = [0, 2]
@@ -524,23 +524,23 @@ func.func @linalg_transpose_tensor_pack_multiple_tiles(%arg0: tensor<?x32x128xbf
 
 // -----
 
-func.func @linalg_transpose_tensor_unpack_fold(%arg0: tensor<1x1x4x16xi32>) -> tensor<16x4xi32> {
+func.func @linalg_transpose_linalg.unpack_fold(%arg0: tensor<1x1x4x16xi32>) -> tensor<16x4xi32> {
   %0 = tensor.empty() : tensor<1x1x16x4xi32>
   %transposed = linalg.transpose ins(%arg0 : tensor<1x1x4x16xi32>)
                 outs(%0 : tensor<1x1x16x4xi32>)
                 permutation = [1, 0, 3, 2]
   %1 = tensor.empty() : tensor<16x4xi32>
-  %unpack = tensor.unpack %transposed
+  %unpack = linalg.unpack %transposed
             outer_dims_perm = [0, 1]
             inner_dims_pos = [0, 1]
             inner_tiles = [16, 4] into
             %1 : tensor<1x1x16x4xi32> -> tensor<16x4xi32>
   return %unpack : tensor<16x4xi32>
 }
-//CHECK-LABEL:  func.func @linalg_transpose_tensor_unpack_fold(
+//CHECK-LABEL:  func.func @linalg_transpose_linalg.unpack_fold(
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<1x1x4x16xi32>) -> tensor<16x4xi32> {
 //      CHECK:     %[[OUT:.+]] = tensor.empty() : tensor<16x4xi32>
-//      CHECK:     %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+//      CHECK:     %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
 // CHECK-SAME:        outer_dims_perm = [1, 0]
 // CHECK-SAME:        inner_dims_pos = [1, 0]
 // CHECK-SAME:        inner_tiles = [4, 16]
@@ -550,23 +550,23 @@ func.func @linalg_transpose_tensor_unpack_fold(%arg0: tensor<1x1x4x16xi32>) -> t
 
 // -----
 
-func.func @linalg_transpose_tensor_unpack_fold_partial_tile(%arg0: tensor<1x1x4x16xi32>) -> tensor<15x3xi32> {
+func.func @linalg_transpose_linalg.unpack_fold_partial_tile(%arg0: tensor<1x1x4x16xi32>) -> tensor<15x3xi32> {
   %0 = tensor.empty() : tensor<1x1x16x4xi32>
   %transposed = linalg.transpose ins(%arg0 : tensor<1x1x4x16xi32>)
                 outs(%0 : tensor<1x1x16x4xi32>)
                 permutation = [1, 0, 3, 2]
   %1 = tensor.empty() : tensor<15x3xi32>
-  %unpack = tensor.unpack %transposed
+  %unpack = linalg.unpack %transposed
             outer_dims_perm = [0, 1]
             inner_dims_pos = [0, 1]
             inner_tiles = [16, 4] into
             %1 : tensor<1x1x16x4xi32> -> tensor<15x3xi32>
   return %unpack : tensor<15x3xi32>
 }
-//CHECK-LABEL:  func.func @linalg_transpose_tensor_unpack_fold_partial_tile(
+//CHECK-LABEL:  func.func @linalg_transpose_linalg.unpack_fold_partial_tile(
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<1x1x4x16xi32>) -> tensor<15x3xi32> {
 //      CHECK:     %[[OUT:.+]] = tensor.empty() : tensor<15x3xi32>
-//      CHECK:     %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+//      CHECK:     %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
 // CHECK-SAME:        outer_dims_perm = [1, 0]
 // CHECK-SAME:        inner_dims_pos = [1, 0]
 // CHECK-SAME:        inner_tiles = [4, 16]
@@ -576,20 +576,20 @@ func.func @linalg_transpose_tensor_unpack_fold_partial_tile(%arg0: tensor<1x1x4x
 
 // -----
 
-func.func @linalg_transpose_tensor_unpack_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor<?x?x?x?xf32>, %transpose_dest: tensor<?x?x?x?xf32>, %unpack_dest: tensor<?x?xf32>, %tile_p : index, %tile_q : index) -> tensor<?x?xf32> {
+func.func @linalg_transpose_linalg.unpack_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor<?x?x?x?xf32>, %transpose_dest: tensor<?x?x?x?xf32>, %unpack_dest: tensor<?x?xf32>, %tile_p : index, %tile_q : index) -> tensor<?x?xf32> {
   %transposed = linalg.transpose
     ins(%arg0 : tensor<?x?x?x?xf32>)
     outs(%transpose_dest : tensor<?x?x?x?xf32>)
     permutation = [1, 0, 3, 2]
 
-  %unpack = tensor.unpack %transposed
+  %unpack = linalg.unpack %transposed
     outer_dims_perm = [1, 0]
     inner_dims_pos = [0, 1]
     inner_tiles = [%tile_p, %tile_q]
     into %unpack_dest : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
   return %unpack : tensor<?x?xf32>
 }
-// CHECK-LABEL:   func.func @linalg_transpose_tensor_unpack_fold_dynamic_outer_dims_tile_dims_tile_sizes(
+// CHECK-LABEL:   func.func @linalg_transpose_linalg.unpack_fold_dynamic_outer_dims_tile_dims_tile_sizes(
 //  CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?x?xf32>, %[[ARG1:.+]]: tensor<?x?x?x?xf32>, %[[ARG2:.+]]: tensor<?x?xf32>,
 //  CHECK-SAME:     %[[IDX1:.+]]: index, %[[IDX2:.+]]: index) -> tensor<?x?xf32> {
 //   CHECK-DAG:       %[[CST1:.+]] = arith.constant 1 : index
@@ -597,7 +597,7 @@ func.func @linalg_transpose_tensor_unpack_fold_dynamic_outer_dims_tile_dims_tile
 //   CHECK-DAG:       %[[DIM0:.+]] = tensor.dim %[[ARG2]], %[[CST0]] : tensor<?x?xf32>
 //   CHECK-DAG:       %[[DIM1:.+]] = tensor.dim %[[ARG2]], %[[CST1]] : tensor<?x?xf32>
 //       CHECK:       %[[OUT:.+]] = tensor.empty(%[[DIM0]], %[[DIM1]]) : tensor<?x?xf32>
-//       CHECK:       %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+//       CHECK:       %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
 //  CHECK-SAME:         outer_dims_perm = [0, 1]
 //  CHECK-SAME:         inner_dims_pos = [1, 0]
 //  CHECK-SAME:         inner_tiles = [%[[IDX2]], %[[IDX1]]]
@@ -607,9 +607,9 @@ func.func @linalg_transpose_tensor_unpack_fold_dynamic_outer_dims_tile_dims_tile
 
 // -----
 
-func.func @tensor_unpack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<3648x56xf32> {
+func.func @linalg.unpack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<3648x56xf32> {
   %0 = tensor.empty() : tensor<56x3648xf32>
-  %pack = tensor.unpack %arg0
+  %pack = linalg.unpack %arg0
     outer_dims_perm = [0, 1]
     inner_dims_pos = [0, 1]
     inner_tiles = [1, 64]
@@ -622,10 +622,10 @@ func.func @tensor_unpack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) ->
     permutation = [1,0]
   return %transposed : tensor<3648x56xf32>
 }
-// CHECK-LABEL:  func.func @tensor_unpack_linalg_transpose_fold(
+// CHECK-LABEL:  func.func @linalg.unpack_linalg_transpose_fold(
 //  CHECK-SAME:    %[[ARG0:.+]]: tensor<56x57x1x64xf32>) -> tensor<3648x56xf32> {
 //       CHECK:        %[[OUT:.+]] = tensor.empty() : tensor<3648x56xf32>
-//       CHECK:        %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+//       CHECK:        %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
 //  CHECK-SAME:        outer_dims_perm = [1, 0]
 //  CHECK-SAME:        inner_dims_pos = [1, 0]
 //  CHECK-SAME:        inner_tiles = [1, 64]
@@ -637,7 +637,7 @@ func.func @tensor_unpack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) ->
 
 func.func @tensor_padded_unpack_linalg_transpose_fold(%arg0: tensor<71x7x4x16x16xf32>) -> tensor<100x71x64xf32> {
   %0 = tensor.empty() : tensor<71x100x64xf32>
-  %pack = tensor.unpack %arg0
+  %pack = linalg.unpack %arg0
     inner_dims_pos = [1, 2]
     inner_tiles = [16, 16]
     into %0 : tensor<71x7x4x16x16xf32> -> tensor<71x100x64xf32>
@@ -652,7 +652,7 @@ func.func @tensor_padded_unpack_linalg_transpose_fold(%arg0: tensor<71x7x4x16x16
 // CHECK-LABEL:  func.func @tensor_padded_unpack_linalg_transpose_fold(
 //  CHECK-SAME:    %[[ARG0:.+]]: tensor<71x7x4x16x16xf32>) -> tensor<100x71x64xf32> {
 //       CHECK:        %[[OUT:.+]] = tensor.empty() : tensor<100x71x64xf32>
-//       CHECK:        %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+//       CHECK:        %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
 //  CHECK-SAME:        outer_dims_perm = [1, 0, 2]
 //  CHECK-SAME:        inner_dims_pos = [0, 2]
 //  CHECK-SAME:        inner_tiles = [16, 16]
@@ -668,7 +668,7 @@ func.func @non_involution_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -
                 outs(%0 : tensor<5x2x3x16x4xi32>)
                 permutation = [2, 0, 1, 4, 3]
   %1 = tensor.empty() : tensor<5x48x8xi32>
-  %unpack = tensor.unpack %transposed
+  %unpack = linalg.unpack %transposed
             outer_dims_perm = [0, 2, 1]
             inner_dims_pos = [1, 2]
             inner_tiles = [16, 4] into
@@ -678,7 +678,7 @@ func.func @non_involution_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -
 //CHECK-LABEL:  func.func @non_involution_transpose_unpack_fold(
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<2x3x5x4x16xi32>) -> tensor<5x48x8xi32> {
 //      CHECK:     %[[OUT:.+]] = tensor.empty() : tensor<5x48x8xi32>
-//      CHECK:     %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+//      CHECK:     %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
 // CHECK-SAME:        outer_dims_perm = [2, 1, 0]
 // CHECK-SAME:        inner_dims_pos = [2, 1]
 // CHECK-SAME:        inner_tiles = [4, 16]
@@ -690,7 +690,7 @@ func.func @non_involution_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -
 
 func.func @unpack_non_involution_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
   %0 = tensor.empty() : tensor<3x56x3648xf32>
-  %unpack = tensor.unpack %arg0
+  %unpack = linalg.unpack %arg0
     outer_dims_perm = [2, 0, 1]
     inner_dims_pos = [1, 2]
     inner_tiles = [1, 64]
@@ -706,7 +706,7 @@ func.func @unpack_non_involution_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>)
 // CHECK-LABEL:  func.func @unpack_non_involution_transpose_fold(
 //  CHECK-SAME:    %[[ARG0:.+]]: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
 //       CHECK:        %[[OUT:.+]] = tensor.empty() : tensor<3648x3x56xf32>
-//       CHECK:        %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+//       CHECK:        %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
 //  CHECK-SAME:        outer_dims_perm = [0, 1, 2]
 //  CHECK-SAME:        inner_dims_pos = [2, 0]
 //  CHECK-SAME:        inner_tiles = [1, 64]
@@ -722,7 +722,7 @@ func.func @transpose_unpacked_dims_no_fold(%arg0: tensor<2x16x5x4x3xi32>) -> ten
                 outs(%0 : tensor<5x2x3x16x4xi32>)
                 permutation = [2, 0, 4, 1, 3]
   %1 = tensor.empty() : tensor<5x32x12xi32>
-  %unpack = tensor.unpack %transposed
+  %unpack = linalg.unpack %transposed
             inner_dims_pos = [1, 2]
             inner_tiles = [16, 4] into
             %1 : tensor<5x2x3x16x4xi32> -> tensor<5x32x12xi32>
@@ -730,7 +730,7 @@ func.func @transpose_unpacked_dims_no_fold(%arg0: tensor<2x16x5x4x3xi32>) -> ten
 }
 //CHECK-LABEL:  func.func @transpose_unpacked_dims_no_fold(
 //      CHECK:     linalg.transpose
-//      CHECK:     tensor.unpack
+//      CHECK:     linalg.unpack
 
 // -----
 
@@ -747,7 +747,7 @@ func.func @generic_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -> tenso
     linalg.yield %in : i32
   } -> tensor<5x2x3x16x4xi32>
   %1 = tensor.empty() : tensor<5x48x8xi32>
-  %unpack = tensor.unpack %transposed
+  %unpack = linalg.unpack %transposed
             outer_dims_perm = [0, 2, 1]
             inner_dims_pos = [1, 2]
             inner_tiles = [16, 4] into
@@ -757,7 +757,7 @@ func.func @generic_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -> tenso
 //CHECK-LABEL:  func.func @generic_transpose_unpack_fold(
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<2x3x5x4x16xi32>) -> tensor<5x48x8xi32> {
 //      CHECK:     %[[OUT:.+]] = tensor.empty() : tensor<5x48x8xi32>
-//      CHECK:     %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+//      CHECK:     %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
 // CHECK-SAME:        outer_dims_perm = [2, 1, 0]
 // CHECK-SAME:        inner_dims_pos = [2, 1]
 // CHECK-SAME:        inner_tiles = [4, 16]
@@ -771,7 +771,7 @@ func.func @generic_transpose_unpack_fold(%arg0: tensor<2x3x5x4x16xi32>) -> tenso
 #map1 = affine_map<(d0, d1, d2)->(d0, d1, d2)>
 func.func @unpack_generic_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
   %0 = tensor.empty() : tensor<3x56x3648xf32>
-  %unpack = tensor.unpack %arg0
+  %unpack = linalg.unpack %arg0
     outer_dims_perm = [2, 0, 1]
     inner_dims_pos = [1, 2]
     inner_tiles = [1, 64]
@@ -791,7 +791,7 @@ func.func @unpack_generic_transpose_fold(%arg0: tensor<57x3x56x1x64xf32>) -> ten
 // CHECK-LABEL:  func.func @unpack_generic_transpose_fold(
 //  CHECK-SAME:    %[[ARG0:.+]]: tensor<57x3x56x1x64xf32>) -> tensor<3648x3x56xf32> {
 //       CHECK:        %[[OUT:.+]] = tensor.empty() : tensor<3648x3x56xf32>
-//       CHECK:        %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+//       CHECK:        %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
 //  CHECK-SAME:        outer_dims_perm = [0, 1, 2]
 //  CHECK-SAME:        inner_dims_pos = [2, 0]
 //  CHECK-SAME:        inner_tiles = [1, 64]
diff --git a/mlir/test/Dialect/Tensor/tiling.mlir b/mlir/test/Dialect/Tensor/tiling.mlir
index 193fbe93e0f9e..04a99b5fd0d68 100644
--- a/mlir/test/Dialect/Tensor/tiling.mlir
+++ b/mlir/test/Dialect/Tensor/tiling.mlir
@@ -224,495 +224,3 @@ module attributes {transform.with_named_sequence} {
       transform.yield
   }
 }
-
-// -----
-
-// CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)>
-// CHECK:       func.func @NC_to_NCnc
-// CHECK-SAME:    %[[IN:.*]]: tensor<128x256xf32>,
-// CHECK-SAME:    %[[OUT:.*]]: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> {
-// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG:     %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C8:.*]] = arith.constant 8 : index
-// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
-// CHECK:         %[[RES0:.*]] = scf.for %[[N:.*]] = %[[C0]] to %[[C4]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<4x8x32x32xf32>) {
-// CHECK:           %[[RES1:.+]] = scf.for %[[C:.*]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<4x8x32x32xf32>) {
-// CHECK-DAG:         %[[IN_N:.+]] = affine.apply #[[MAP0]](%[[N]])
-// CHECK-DAG:         %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]])
-// CHECK:             %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_N]], %[[IN_C]]] [64, 128] [1, 1] : tensor<128x256xf32> to tensor<64x128xf32>
-// CHECK:             %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[N]], %[[C]], 0, 0] [2, 4, 32, 32] [1, 1, 1, 1] : tensor<4x8x32x32xf32> to tensor<2x4x32x32xf32>
-// CHECK:             %[[SUB_RES:.*]] = tensor.pack
-// CHECK-SAME:          %[[SUB_IN]] inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %[[SUB_OUT]]
-// CHECK:             %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]]
-// CHECK:             scf.yield %[[INSERT]] : tensor<4x8x32x32xf32>
-// CHECK:           }
-// CHECK:           scf.yield %[[RES1:.*]] : tensor<4x8x32x32xf32>
-// CHECK:         }
-// CHECK:         return %[[RES0:.*]] : tensor<4x8x32x32xf32>
-// CHECK:       }
-func.func @NC_to_NCnc(%arg0: tensor<128x256xf32>, %arg1: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> {
-  %0 = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %arg1 : tensor<128x256xf32> -> tensor<4x8x32x32xf32>
-  return %0 : tensor<4x8x32x32xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
-      transform.yield
-  }
-}
-
-// -----
-
-// CHECK:       #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 8)>
-// CHECK:       func.func @KC_to_CKkc
-// CHECK-SAME:    %[[IN:[A-Za-z0-9]+]]:
-// CHECK-SAME:    %[[OUT:[A-Za-z0-9]+]]:
-// CHECK-DAG:     %[[C0:.+]] = arith.constant 0 : index
-// CHECK-DAG:     %[[C2:.+]] = arith.constant 2 : index
-// CHECK-DAG:     %[[C32:.+]] = arith.constant 32 : index
-// CHECK:         scf.for %[[C:.+]] = %[[C0]] to %[[C32]] step %[[C2]]
-// CHECK-DAG:         %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]])
-// CHECK:             %[[INPUT_SLICE:.+]] = tensor.extract_slice %[[IN]]
-// CHECK-SAME:          [0, %[[IN_C]]] [128, 16]
-// CHECK:             %[[OUTPUT_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[C]], 0, 0, 0] [2, 4, 32, 8]
-// CHECK:             tensor.pack
-// CHECK-SAME:          %[[INPUT_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8]
-// CHECK-SAME:          into %[[OUTPUT_SLICE]]
-func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) -> tensor<32x4x32x8xf32> {
-  %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32>
-  return %0 : tensor<32x4x32x8xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
-      transform.yield
-  }
-}
-
-// -----
-
-// CHECK-DAG:     #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 2)>
-// CHECK-DAG:     #[[MAP1:.+]] = affine_map<(d0) -> (d0 * -2 + 15, 8)>
-// CHECK:         func.func @pad_and_pack_static(
-// CHECK-SAME:      %[[IN:.*]]: tensor<13x15xf32>,
-// CHECK-SAME:      %[[OUT:.*]]: tensor<2x8x8x2xf32>,
-// CHECK-SAME:      %[[PAD:.*]]: f32) -> tensor<2x8x8x2xf32> {
-// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG:       %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG:       %[[C8:.*]] = arith.constant 8 : index
-// CHECK-DAG:       %[[RES0:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[OUT]]) -> (tensor<2x8x8x2xf32>) {
-// CHECK-DAG:         %[[IN_J:.*]] = affine.apply #[[MAP0]](%[[J]])
-// CHECK-DAG:         %[[IN_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])
-// CHECK:             %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][0, %[[IN_J]]] [13, %[[IN_J_SZ]]] [1, 1]
-// CHECK:             %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][0, %[[J]], 0, 0] [2, 4, 8, 2] [1, 1, 1, 1]
-// CHECK:             %[[SUB_RES:.*]] = tensor.pack
-// CHECK-SAME:          %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2]
-// CHECK-SAME:          into %[[SUB_OUT]]
-// CHECK:             %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]]
-// CHECK:             scf.yield %[[INSERT]] : tensor<2x8x8x2xf32>
-// CHECK:           }
-// CHECK:           return %[[RES0:.*]] : tensor<2x8x8x2xf32>
-// CHECK:         }
-func.func @pad_and_pack_static(%input: tensor<13x15xf32>, %output: tensor<2x8x8x2xf32>, %pad: f32) -> tensor<2x8x8x2xf32> {
-  %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
-  return %0 : tensor<2x8x8x2xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
-      transform.yield
-  }
-}
-
-// -----
-
-// CHECK-DAG:     #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-// CHECK-DAG:     #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
-// CHECK-DAG:     #[[MAP2:.+]] = affine_map<(d0) -> (d0 * 8)>
-// CHECK-DAG:     #[[MAP3:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -8 + s0, d0 * 8)>
-// CHECK-DAG:     #[[MAP4:.+]] = affine_map<(d0) -> (d0 * 2)>
-// CHECK-DAG:     #[[MAP5:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -2 + s0, d0 * 2)>
-// CHECK:         func.func @pad_and_pack_partially_dynamic(
-// CHECK-SAME:      %[[IN:.*]]: tensor<?x?xf32>,
-// CHECK-SAME:      %[[OUT:.*]]: tensor<?x?x8x2xf32>,
-// CHECK-SAME:      %[[PAD:.*]]: f32) -> tensor<?x?x8x2xf32> {
-// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[C2:.*]] = arith.constant 2 : index
-// CHECK-DAG:       %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG:       %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor<?x?x8x2xf32>
-// CHECK-DAG:       %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor<?x?x8x2xf32>
-// CHECK:           %[[RES0:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<?x?x8x2xf32>) {
-// CHECK:             %[[RES1:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<?x?x8x2xf32>) {
-// CHECK-DAG:           %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]]
-// CHECK-DAG:           %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]]
-// CHECK-DAG:           %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]])
-// CHECK-DAG:           %[[IN_I_SZ:.*]] = affine.min #[[MAP3]]
-// CHECK-DAG:           %[[IN_J:.*]] = affine.apply #[[MAP4]](%[[J]])
-// CHECK-DAG:           %[[IN_J_SZ:.*]] = affine.min #[[MAP5]]
-// CHECK:               %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-// CHECK:               %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], 8, 2] [1, 1, 1, 1] : tensor<?x?x8x2xf32> to tensor<?x?x8x2xf32>
-// CHECK:               %[[SUB_RES:.*]] = tensor.pack
-// CHECK-SAME:            %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2]
-// CHECK-SAME:            into %[[SUB_OUT]]
-// CHECK:               %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]]
-// CHECK:               scf.yield %[[INSERT]] : tensor<?x?x8x2xf32>
-// CHECK:             }
-// CHECK:             scf.yield %[[RES1:.*]] : tensor<?x?x8x2xf32>
-// CHECK:           }
-// CHECK:           return %[[VAL_34:.*]] : tensor<?x?x8x2xf32>
-// CHECK:         }
-func.func @pad_and_pack_partially_dynamic(%input: tensor<?x?xf32>, %output: tensor<?x?x8x2xf32>, %pad: f32) -> tensor<?x?x8x2xf32> {
-  %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<?x?xf32> -> tensor<?x?x8x2xf32>
-  return %0 : tensor<?x?x8x2xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
-      transform.yield
-  }
-}
-
-// -----
-
-// CHECK-DAG:     #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-// CHECK-DAG:     #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
-// CHECK-DAG:     #[[MAP2:.+]] = affine_map<(d0)[s0] -> (d0 * s0)>
-// CHECK-DAG:     #[[MAP3:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s0, -(d1 * s0) + s1)>
-// CHECK:         func.func @pad_and_pack_fully_dynamic(
-// CHECK-SAME:      %[[IN:.*]]: tensor<?x?xf32>,
-// CHECK-SAME:      %[[OUT:.*]]: tensor<?x?x?x?xf32>,
-// CHECK-SAME:      %[[PAD:.*]]: f32,
-// CHECK-SAME:      %[[TILE_0:.*]]: index,
-// CHECK-SAME:      %[[TILE_1:.*]]: index) -> tensor<?x?x?x?xf32> {
-// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[C2:.*]] = arith.constant 2 : index
-// CHECK-DAG:       %[[C3:.*]] = arith.constant 3 : index
-// CHECK-DAG:       %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG:       %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor<?x?x?x?xf32>
-// CHECK-DAG:       %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor<?x?x?x?xf32>
-// CHECK:           %[[RES0:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<?x?x?x?xf32>) {
-// CHECK:             %[[RES1:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<?x?x?x?xf32>) {
-// CHECK-DAG:           %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]]
-// CHECK-DAG:           %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]]
-// CHECK-DAG:           %[[IN_D0:.*]] = tensor.dim %[[IN]], %[[C0]]
-// CHECK-DAG:           %[[IN_D1:.*]] = tensor.dim %[[IN]], %[[C1]]
-// CHECK:               %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]])[%[[TILE_0]]]
-// CHECK:               %[[IN_I_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_I_SZ]], %[[I]])[%[[TILE_0]], %[[IN_D0]]]
-// CHECK:               %[[IN_J:.*]] = affine.apply #[[MAP2]](%[[J]])[%[[TILE_1]]]
-// CHECK:               %[[IN_J_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_J_SZ]], %[[J]])[%[[TILE_1]], %[[IN_D1]]]
-// CHECK:               %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-// CHECK:               %[[OUT_D2:.+]] = tensor.dim %[[ITER1]], %[[C2]]
-// CHECK:               %[[OUT_D3:.+]] = tensor.dim %[[ITER1]], %[[C3]]
-// CHECK:               %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], %[[OUT_D2]], %[[OUT_D3]]] [1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
-// CHECK:               %[[PACK:.*]] = tensor.pack
-// CHECK-SAME:            %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [%[[TILE_0]], %[[TILE_1]]]
-// CHECK-SAME:            into %[[SUB_OUT]]
-// CHECK:               %[[INSERT:.*]] = tensor.insert_slice %[[PACK]] into %[[ITER1]]
-// CHECK:               scf.yield %[[INSERT]] : tensor<?x?x?x?xf32>
-// CHECK:             }
-// CHECK:             scf.yield %[[RES1:.*]] : tensor<?x?x?x?xf32>
-// CHECK:           }
-// CHECK:           return %[[RES0:.*]] : tensor<?x?x?x?xf32>
-// CHECK:         }
-func.func @pad_and_pack_fully_dynamic(%source: tensor<?x?xf32>, %dest: tensor<?x?x?x?xf32>, %pad: f32, %tile_n : index, %tile_m : index) -> tensor<?x?x?x?xf32> {
-  %0 = tensor.pack %source padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
-  return %0 : tensor<?x?x?x?xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
-      transform.yield
-  }
-}
-
-// -----
-
-// CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)>
-// CHECK-DAG:   #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)>
-// CHECK-DAG:   #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)>
-// CHECK-DAG:   #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 16)>
-// CHECK-DAG:   #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 16)>
-// CHECK-DAG:   #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 16 - d0 floordiv 16 + 1)>
-// CHECK:       func.func @NCnc_to_NC
-// CHECK-SAME:    %[[IN:[A-Za-z0-9]+]]:
-// CHECK-SAME:    %[[OUT:[A-Za-z0-9]+]]:
-// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
-// CHECK-DAG:     %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C128:.*]] = arith.constant 128 : index
-// CHECK-DAG:     %[[C256:.*]] = arith.constant 256 : index
-// CHECK:         %{{.+}} = scf.for %[[I:.+]] = %[[C0]] to %[[C256]] step %[[C2]]
-// CHECK:           %{{.+}} = scf.for %[[J:.+]] = %[[C0]] to %[[C128]] step %[[C4]]
-// CHECK-DAG:         %[[IN_I:.+]] = affine.apply #[[MAP0]](%[[I]])
-// CHECK-DAG:         %[[OFFSET_I:.+]] = affine.apply #[[MAP1]](%[[I]])
-// CHECK-DAG:         %[[IN_I_SZ:.+]] = affine.apply #[[MAP2]](%[[I]])
-// CHECK-DAG:         %[[IN_J:.+]] = affine.apply #[[MAP4]](%[[J]])
-// CHECK-DAG:         %[[OFFSET_J:.+]] = affine.apply #[[MAP5]](%[[J]])
-// CHECK-DAG:         %[[IN_J_SZ:.+]] = affine.apply #[[MAP6]](%[[J]])
-// CHECK:             %[[SLICE:.+]] = tensor.extract_slice %[[IN]]
-// CHECK-SAME:          [%[[IN_I]], %[[IN_J]], 0, 0] [%[[IN_I_SZ]], %[[IN_J_SZ]], 32, 16]
-// CHECK-SAME:        : tensor<8x8x32x16xf32> to tensor<?x?x32x16xf32>
-// CHECK:             %[[EMPTY:.+]] = tensor.empty
-// CHECK:             %[[UNPACK:.+]] = tensor.unpack
-// CHECK-SAME:          %[[SLICE]] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
-// CHECK-SAME:          into %[[EMPTY]]
-// CHECK:             %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
-// CHECK-SAME:          [%[[OFFSET_I]], %[[OFFSET_J]]] [2, 4]
-// CHECK:             %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]]
-// CHECK-SAME:          into %{{.+}}[%[[I]], %[[J]]] [2, 4]
-// CHECK:             scf.yield %[[RES]]
-func.func @NCnc_to_NC(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> {
-  %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
-  return %0 : tensor<256x128xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
-      transform.yield
-  }
-}
-
-// -----
-
-// CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)>
-// CHECK-DAG:   #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)>
-// CHECK-DAG:   #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)>
-// CHECK-DAG:   #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 8)>
-// CHECK-DAG:   #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 8)>
-// CHECK-DAG:   #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 8 - d0 floordiv 8 + 1)>
-// CHECK:       func.func @CKkc_to_KC
-// CHECK-SAME:    %[[IN:[A-Za-z0-9]+]]:
-// CHECK-SAME:    %[[OUT:[A-Za-z0-9]+]]:
-// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
-// CHECK-DAG:     %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C128:.*]] = arith.constant 128 : index
-// CHECK-DAG:     %[[C256:.*]] = arith.constant 256 : index
-// CHECK:         %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C128]] step %[[C2]]
-// CHECK:           %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[C256]] step %[[C4]]
-// CHECK-DAG:         %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]])
-// CHECK-DAG:         %[[OFFSET_K:.+]] = affine.apply #[[MAP1]](%[[K]])
-// CHECK-DAG:         %[[IN_K_SZ:.+]] = affine.apply #[[MAP2]](%[[K]])
-// CHECK-DAG:         %[[IN_C:.+]] = affine.apply #[[MAP4]](%[[C]])
-// CHECK-DAG:         %[[OFFSET_C:.+]] = affine.apply #[[MAP5]](%[[C]])
-// CHECK-DAG:         %[[IN_C_SZ:.+]] = affine.apply #[[MAP6]](%[[C]])
-// CHECK:             %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]]
-// CHECK:               [%[[IN_C]], %[[IN_K]], 0, 0] [%[[IN_C_SZ]], %[[IN_K_SZ]], 32, 8]
-// CHECK:             %[[EMPTY:.+]] = tensor.empty
-// CHECK:             %[[UNPACK:.+]] = tensor.unpack
-// CHECK-SAME:          %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8]
-// CHECK-SAME:          into %[[EMPTY]]
-// CHECK:             %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
-// CHECK-SAME:          [%[[OFFSET_K]], %[[OFFSET_C]]] [2, 4]
-// CHECK:             %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]]
-// CHECK-SAME:          into %{{.+}}[%[[K]], %[[C]]] [2, 4]
-// CHECK:             scf.yield %[[RES]]
-func.func @CKkc_to_KC(%source: tensor<32x4x32x8xf32>, %dest: tensor<128x256xf32>) -> tensor<128x256xf32> {
-  %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %dest : tensor<32x4x32x8xf32> -> tensor<128x256xf32>
-  return %0 : tensor<128x256xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
-      transform.yield
-  }
-}
-
-// -----
-
-// CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 2)>
-// CHECK-DAG:   #[[MAP1:.+]] = affine_map<(d0) -> (d0 floordiv 4)>
-// CHECK:       func.func @perfect_CKkc_to_KC
-// CHECK-SAME:    %[[IN:[A-Za-z0-9]+]]:
-// CHECK-SAME:    %[[OUT:[A-Za-z0-9]+]]:
-// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
-// CHECK-DAG:     %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG:     %[[C8:.*]] = arith.constant 8 : index
-// CHECK-DAG:     %[[C128:.*]] = arith.constant 128 : index
-// CHECK:         %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C8]] step %[[C2]]
-// CHECK:           %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[C128]] step %[[C4]]
-// CHECK-DAG:         %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]])
-// CHECK-DAG:         %[[IN_C:.+]] = affine.apply #[[MAP1]](%[[C]])
-// CHECK:             %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]]
-// CHECK:               [%[[IN_C]], %[[IN_K]], 0, 0] [1, 1, 2, 4]
-// CHECK:             %[[ITER_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[K]], %[[C]]] [2, 4]
-// CHECK:             %[[UNPACK:.+]] = tensor.unpack
-// CHECK-SAME:          %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 4]
-// CHECK-SAME:          into %[[ITER_SLICE]]
-// CHECK:             %[[RES:.+]] = tensor.insert_slice %[[UNPACK]]
-// CHECK-SAME:          into %{{.+}}[%[[K]], %[[C]]] [2, 4]
-// CHECK:             scf.yield %[[RES]]
-func.func @perfect_CKkc_to_KC(%source: tensor<32x4x2x4xf32>, %dest: tensor<8x128xf32>) -> tensor<8x128xf32> {
-  %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %dest : tensor<32x4x2x4xf32> -> tensor<8x128xf32>
-  return %0 : tensor<8x128xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
-      transform.yield
-  }
-}
-
-// -----
-
-// CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-// CHECK-DAG:   #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
-// CHECK-DAG:   #[[MAP2:.+]] = affine_map<(d0) -> (d0 floordiv 2)>
-// CHECK-DAG:   #[[MAP3:.+]] = affine_map<(d0) -> (d0 ceildiv 2)>
-// CHECK:       func.func @dynamic_perfect_CKkc_to_KC
-// CHECK-SAME:    %[[IN:[A-Za-z0-9]+]]:
-// CHECK-SAME:    %[[OUT:[A-Za-z0-9]+]]:
-// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG:     %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG:     %[[C4:.*]] = arith.constant 4 : index
-// CHECK-DAG:     %[[DIM_0:.+]] = tensor.dim %[[OUT]], %[[C0]]
-// CHECK-DAG:     %[[DIM_1:.+]] = tensor.dim %[[OUT]], %[[C1]]
-// CHECK:         %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[DIM_0]] step %[[C2]]
-// CHECK:           %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[DIM_1]] step %[[C4]]
-// CHECK-DAG:         %[[OUT_K_SZ:.+]] = affine.min #[[MAP0]](%[[K]])[%[[DIM_0]]]
-// CHECK-DAG:         %[[OUT_C_SZ:.+]] = affine.min #[[MAP1]](%[[C]])[%[[DIM_1]]]
-// CHECK-DAG:         %[[IN_K:.+]] = affine.apply #[[MAP2]](%[[K]])
-// CHECK-DAG:         %[[IN_C:.+]] = affine.apply #[[MAP2]](%[[C]])
-// CHECK-DAG:         %[[IN_C_SZ:.+]] = affine.apply #[[MAP3]](%[[OUT_C_SZ]])
-// CHECK:             %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]]
-// CHECK:               [%[[IN_C]], %[[IN_K]], 0, 0] [%[[IN_C_SZ]], 1, 2, 2]
-// CHECK:             %[[ITER_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[K]], %[[C]]] [%[[OUT_K_SZ]], %[[OUT_C_SZ]]]
-// CHECK:             %[[UNPACK:.+]] = tensor.unpack
-// CHECK-SAME:          %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 2]
-// CHECK-SAME:          into %[[ITER_SLICE]]
-// CHECK:             %[[RES:.+]] = tensor.insert_slice %[[UNPACK]]
-// CHECK-SAME:          into %{{.+}}[%[[K]], %[[C]]] [%[[OUT_K_SZ]], %[[OUT_C_SZ]]]
-// CHECK:             scf.yield %[[RES]]
-
-func.func @dynamic_perfect_CKkc_to_KC(%source: tensor<?x?x2x2xf32>, %dest: tensor<?x?xf32>) -> tensor<?x?xf32> {
-  %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %dest : tensor<?x?x2x2xf32> -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
-      transform.yield
-  }
-}
-
-// -----
-
-// CHECK: #[[MAP:.+]] = affine_map<(d0) -> (d0 floordiv 2)>
-// CHECK: func.func @perfect_NKPQk_to_NPQK(
-// CHECK-SAME:  %[[SOURCE:.+]]: tensor<1x4x6x6x2xf32>,
-// CHECK-SAME:  %{{.+}}: tensor<1x6x6x8xf32>)
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index
-// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
-// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
-// CHECK: %{{.+}} = scf.for %[[P:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
-// CHECK:   %{{.+}} = scf.for %[[Q:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
-// CHECK:     %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C8]] step %[[C4]]
-// CHECK:       %[[K_SZ:.+]] = affine.apply #[[MAP]](%[[K]])
-// CHECK:       %[[SLICE_SOURCE:.+]] = tensor.extract_slice %[[SOURCE]][0, %[[K_SZ]], %[[P]], %[[Q]], 0]
-// CHECK:       %[[SLICE_DEST:.+]] = tensor.extract_slice %{{.+}}[0, %[[P]], %[[Q]], %[[K]]]
-// CHECK:       %[[UNPACK:.+]] = tensor.unpack
-// CHECK-SAME:    %[[SLICE_SOURCE]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2]
-// CHECK-SAME:    into %[[SLICE_DEST]]
-// CHECK:       %[[RES:.+]] = tensor.insert_slice %[[UNPACK]]
-// CHECK-SAME:    into %{{.+}}[0, %[[P]], %[[Q]], %[[K]]]
-// CHECK:       scf.yield %[[RES]]
-
-func.func @perfect_NKPQk_to_NPQK(%source: tensor<1x4x6x6x2xf32>, %dest: tensor<1x6x6x8xf32>) -> tensor<1x6x6x8xf32> {
-  %0 = tensor.unpack %source outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %dest : tensor<1x4x6x6x2xf32> -> tensor<1x6x6x8xf32>
-  return %0 : tensor<1x6x6x8xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-      %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
-      transform.yield
-  }
-}
-
-// -----
-
-func.func private @get_dynamic_tile_size() -> index
-
-// CHECK-LABEL: func.func @fully_dynamic_unpack
-// CHECK-SAME:    %[[SRC:[0-9a-zA-Z]+]]
-// CHECK-SAME:    %[[DST:[0-9a-zA-Z]+]]
-// CHECK:         %[[INNER_TS:.+]] = call @get_dynamic_tile_size() : () -> index
-// CHECK:         %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[DST]])
-// CHECK:           %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]])
-// CHECK:             %[[SLICE:.+]] = tensor.extract_slice %[[SRC]]
-// CHECK:             %[[EMPTY:.+]] = tensor.empty
-// CHECK:             %[[UNPACK:.+]] = tensor.unpack %[[SLICE]]
-// CHECK-SAME:          inner_dims_pos = [1, 0] inner_tiles = [%[[INNER_TS]], %[[INNER_TS]]] into %[[EMPTY]]
-func.func @fully_dynamic_unpack(%source: tensor<?x?x?x?xf32>, %dest: tensor<?x?xf32>) -> tensor<?x?xf32> {
-  %0 = func.call @get_dynamic_tile_size() : () -> index
-  %1 = tensor.unpack %source inner_dims_pos = [1, 0] inner_tiles = [%0, %0] into %dest : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
-  return %1 : tensor<?x?xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-      %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [4, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
-      transform.yield
-  }
-}
-
-// -----
-
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * 2)>
-// CHECK: func.func @perfect_NPQK_to_NKPQk
-// CHECK-SAME:  %[[SOURCE:.+]]: tensor<1x6x6x8xf32>,
-// CHECK-SAME:  %{{.+}}: tensor<1x4x6x6x2xf32>)
-// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
-// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
-// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
-// CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index
-// CHECK: %{{.+}} = scf.for %[[ARG2:.+]] = %[[C0]] to %[[C4]] step %[[C1]]
-// CHECK:   %{{.+}} = scf.for %[[ARG4:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
-// CHECK:     %{{.+}} = scf.for %[[ARG6:.+]] = %[[C0]] to %[[C6]] step %[[C1]]
-// CHECK:       %[[APPLY:.+]] = affine.apply #[[MAP1]](%[[ARG2]])
-// CHECK:       %[[SLICE_SOURCE:.+]] = tensor.extract_slice %[[SOURCE]][0, %[[ARG4]], %[[ARG6]], %[[APPLY]]]
-// CHECK:       %[[SLICE_DEST:.+]] = tensor.extract_slice %{{.+}}[0, %[[ARG2]], %[[ARG4]], %[[ARG6]], 0]
-// CHECK:       %[[PACK:.+]] = tensor.pack
-// CHECK-SAME:    %[[SLICE_SOURCE]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2]
-// CHECK-SAME:    into %[[SLICE_DEST]]
-// CHECK:       %[[RES:.+]] = tensor.insert_slice %[[PACK]]
-// CHECK-SAME:    into %{{.+}}[0, %[[ARG2]], %[[ARG4]], %[[ARG6]], 0]
-// CHECK:       scf.yield %[[RES]]
-
-func.func @perfect_NPQK_to_NKPQk(%source: tensor<1x6x6x8xf32>, %dest: tensor<1x4x6x6x2xf32>) -> tensor<1x4x6x6x2xf32> {
-  %0 = tensor.pack %source outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %dest : tensor<1x6x6x8xf32> -> tensor<1x4x6x6x2xf32>
-  return %0 : tensor<1x4x6x6x2xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-      %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-      %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
-      transform.yield
-  }
-}
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/pack-scalable-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/pack-scalable-inner-tile.mlir
index a0fd3f7d87083..bca94d4a64416 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/pack-scalable-inner-tile.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/pack-scalable-inner-tile.mlir
@@ -22,7 +22,7 @@
 
 // RUN: rm -f %t && %{compile} &&  %{run} |  FileCheck %s
 
-/// End-to-end test for tensor.pack where one of the inner tile sizes is
+/// End-to-end test for linalg.pack where one of the inner tile sizes is
 /// scalable.
 
 func.func @main() {
@@ -60,7 +60,7 @@ func.func private @pack(%A: tensor<7x16xi32>) {
 
   %A_pack_empty = tensor.empty(%c1, %tile_size) : tensor<?x16x?x1xi32>
 
-  %A_pack = tensor.pack %A
+  %A_pack = linalg.pack %A
     padding_value(%pad_val : i32)
     inner_dims_pos = [0, 1]
     inner_tiles = [%tile_size, 1]
@@ -117,9 +117,9 @@ func.func private @pack(%A: tensor<7x16xi32>) {
 
 module @transforms attributes { transform.with_named_sequence } {
   transform.named_sequence @__transform_main(%module: !transform.any_op {transform.consume}) {
-    %pack = transform.structured.match ops{["tensor.pack"]} in %module : (!transform.any_op) -> !transform.any_op
+    %pack = transform.structured.match ops{["linalg.pack"]} in %module : (!transform.any_op) -> !transform.any_op
 
-    // 1. Tile so that we can decompose tensor.pack into tensor.pad and other
+    // 1. Tile so that we can decompose linalg.pack into tensor.pad and other
     // Ops (see step 2)
     %tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [1, 1]
        : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir
index 15edae8b6d3f8..a8daa0b855d00 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir
@@ -8,7 +8,7 @@
 
 // RUN: rm -f %t && %{compile} && %{run} | FileCheck %s
 
-/// End-to-end test for tensor.pack where one of the inner tile sizes is
+/// End-to-end test for linalg.pack where one of the inner tile sizes is
 /// dynamic.
 
 func.func @main() {
@@ -38,7 +38,7 @@ func.func private @pack(%A: tensor<7x16xi32>) {
   %tile_size = arith.constant 8 : index
   %A_pack_empty = tensor.empty(%c1, %tile_size) : tensor<?x16x?x1xi32>
 
-  %A_pack = tensor.pack %A
+  %A_pack = linalg.pack %A
     padding_value(%pad_val : i32)
     inner_dims_pos = [0, 1]
     inner_tiles = [%tile_size, 1]
@@ -78,9 +78,9 @@ func.func private @pack(%A: tensor<7x16xi32>) {
 
 module @transforms attributes { transform.with_named_sequence } {
   transform.named_sequence @__transform_main(%module: !transform.any_op {transform.consume}) {
-    %pack = transform.structured.match ops{["tensor.pack"]} in %module : (!transform.any_op) -> !transform.any_op
+    %pack = transform.structured.match ops{["linalg.pack"]} in %module : (!transform.any_op) -> !transform.any_op
 
-    // 1. Tile so that we can decompose tensor.pack into tensor.pad and other
+    // 1. Tile so that we can decompose linalg.pack into tensor.pad and other
     // Ops (see step 2)
     %tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [1, 1]
        : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir
index 63622d761bc5b..05e678227de32 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir
@@ -12,9 +12,9 @@
 /// End-to-end test for computing matrix-multiplication using linalg.mmt4d. In
 /// particular, demonstrates how the following MLIR sequence (implemented in @mmt4d):
 ///
-///   A_pack = tensor.pack A
-///   B_pack = tensor.pack B
-///   C_pack = tensor.pack C
+///   A_pack = linalg.pack A
+///   B_pack = linalg.pack B
+///   C_pack = linalg.pack C
 ///   out_pack = linalg.mmt4d(A_pack, B_pack, C_pack)
 ///
 /// is equivalent to:
@@ -86,16 +86,16 @@ func.func private @mmt4d(%A: tensor<7x16xi32>, %B: tensor<16x13xi32>, %C: tensor
   %C_pack_empty = tensor.empty() : tensor<2x2x8x8xi32>
 
   // Pack matrices
-  %A_pack = tensor.pack %A padding_value(%zero : i32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %A_pack_empty : tensor<7x16xi32> -> tensor<2x16x8x1xi32>
-  %B_pack = tensor.pack %B padding_value(%zero : i32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 1] into %B_pack_empty : tensor<16x13xi32> -> tensor<2x16x8x1xi32>
-  %C_pack = tensor.pack %C padding_value(%zero : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_pack_empty : tensor<7x13xi32> -> tensor<2x2x8x8xi32>
+  %A_pack = linalg.pack %A padding_value(%zero : i32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %A_pack_empty : tensor<7x16xi32> -> tensor<2x16x8x1xi32>
+  %B_pack = linalg.pack %B padding_value(%zero : i32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 1] into %B_pack_empty : tensor<16x13xi32> -> tensor<2x16x8x1xi32>
+  %C_pack = linalg.pack %C padding_value(%zero : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_pack_empty : tensor<7x13xi32> -> tensor<2x2x8x8xi32>
 
   // MMT4D
   %mmt4d = linalg.mmt4d ins(%A_pack, %B_pack : tensor<2x16x8x1xi32>, tensor<2x16x8x1xi32>) outs(%C_pack : tensor<2x2x8x8xi32>) -> tensor<2x2x8x8xi32>
 
   // Unpack output
   %C_out_empty = tensor.empty() : tensor<7x13xi32>
-  %C_out_unpack = tensor.unpack %mmt4d outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_out_empty : tensor<2x2x8x8xi32> -> tensor<7x13xi32>
+  %C_out_unpack = linalg.unpack %mmt4d outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_out_empty : tensor<2x2x8x8xi32> -> tensor<7x13xi32>
 
   return %C_out_unpack : tensor<7x13xi32>
 }
@@ -146,16 +146,16 @@ module @transforms attributes { transform.with_named_sequence } {
      transform.apply_patterns.canonicalization
    } : !transform.op<"func.func">
 
-   // Step 4. Lower tensor.pack
-   %pack = transform.structured.match ops{["tensor.pack"]} in %func_h
-     : (!transform.op<"func.func">) -> !transform.op<"tensor.pack">
-   transform.structured.lower_pack %pack : (!transform.op<"tensor.pack">)
+   // Step 4. Lower linalg.pack
+   %pack = transform.structured.match ops{["linalg.pack"]} in %func_h
+     : (!transform.op<"func.func">) -> !transform.op<"linalg.pack">
+   transform.structured.lower_pack %pack : (!transform.op<"linalg.pack">)
      -> (!transform.op<"tensor.pad">, !transform.op<"tensor.expand_shape">, !transform.op<"linalg.transpose">)
 
-   // Step 5. Lower tensor.unpack
-   %unpack = transform.structured.match ops{["tensor.unpack"]} in %func_h
-      : (!transform.op<"func.func">) -> !transform.op<"tensor.unpack">
-    transform.structured.lower_unpack %unpack : (!transform.op<"tensor.unpack">)
+   // Step 5. Lower linalg.unpack
+   %unpack = transform.structured.match ops{["linalg.unpack"]} in %func_h
+      : (!transform.op<"func.func">) -> !transform.op<"linalg.unpack">
+    transform.structured.lower_unpack %unpack : (!transform.op<"linalg.unpack">)
       -> (!transform.op<"tensor.empty">,
           !transform.op<"linalg.transpose">,
           !transform.op<"tensor.collapse_shape">,
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir
index 4395dfe74914e..c5360ee1ec954 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir
@@ -8,7 +8,7 @@
 
 // RUN: rm -f %t && %{compile} && %{run} | FileCheck %s
 
-/// End-to-end test for tensor.unpack where one of the inner tile sizes is
+/// End-to-end test for linalg.unpack where one of the inner tile sizes is
 /// dynamic.
 
 func.func @main() {
@@ -56,7 +56,7 @@ func.func private @unpack(%A: tensor<?x3x?x1xi32>) {
   %tile_size = arith.constant 8 : index
   %A_unpack_empty = tensor.empty() : tensor<7x3xi32>
 
-  %A_unpack = tensor.unpack %A
+  %A_unpack = linalg.unpack %A
     inner_dims_pos = [0, 1]
     inner_tiles = [%tile_size, 1]
     into %A_unpack_empty : tensor<?x3x?x1xi32> -> tensor<7x3xi32>
@@ -78,9 +78,9 @@ func.func private @unpack(%A: tensor<?x3x?x1xi32>) {
 
 module @transforms attributes { transform.with_named_sequence } {
   transform.named_sequence @__transform_main(%module: !transform.any_op {transform.consume}) {
-    %pack = transform.structured.match ops{["tensor.unpack"]} in %module : (!transform.any_op) -> !transform.any_op
+    %pack = transform.structured.match ops{["linalg.unpack"]} in %module : (!transform.any_op) -> !transform.any_op
 
-    // 1. Tile so that we can decompose tensor.pack
+    // 1. Tile so that we can decompose linalg.pack
     // Ops (see step 2)
     %c8 = transform.param.constant 8 : i64 -> !transform.param<i64>
     %tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [%c8, 1]
diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir
index 2d35be403ef99..8ce05d94c4ad0 100644
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir
@@ -211,7 +211,7 @@ module {
         linalg.yield %7, %8 : f32, f32
       } -> (tensor<64x64xf32>, tensor<64x64xf32>)
       %5 = tensor.empty() : tensor<2048xf32>
-      %unpack = tensor.unpack %0#0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %5 : tensor<64x32xf32> -> tensor<2048xf32>
+      %unpack = linalg.unpack %0#0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %5 : tensor<64x32xf32> -> tensor<2048xf32>
       return %4#1, %unpack : tensor<64x64xf32>, tensor<2048xf32>
     }
 }
@@ -254,7 +254,7 @@ module attributes {transform.with_named_sequence} {
 //      CHECK:          tensor.parallel_insert_slice %[[ELEM_OUT]]#1 into %[[ELEM_OUT_ARG_1]][%[[IV1]], %[[IV2]]] [32, 32] [1, 1]
 //      CHECK:       }
 //      CHECK:   }
-//      CHECK:   %[[UNPACK:.*]] = tensor.unpack %[[FINAL_RESULT]]#0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %{{.*}} : tensor<64x32xf32> -> tensor<2048xf32>
+//      CHECK:   %[[UNPACK:.*]] = linalg.unpack %[[FINAL_RESULT]]#0 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %{{.*}} : tensor<64x32xf32> -> tensor<2048xf32>
 //      CHECK:   return %[[FINAL_RESULT]]#3, %[[UNPACK]] :
 
 // -----
@@ -278,7 +278,7 @@ module {
             }
         }
         %output = tensor.empty() : tensor<2048xf32>
-        %unpack = tensor.unpack %1 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %output : tensor<64x32xf32> -> tensor<2048xf32>
+        %unpack = linalg.unpack %1 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %output : tensor<64x32xf32> -> tensor<2048xf32>
         return %unpack : tensor<2048xf32>
     }
 }
@@ -308,7 +308,7 @@ module attributes {transform.with_named_sequence} {
 //  CHECK-DAG:      %[[UNPACK_RESULT_OFFSET:.*]] = affine.apply #[[UNPACK_RESULT_OFFSET_MAP]](%[[IV1]])
 //  CHECK-DAG:      %[[UNPACK_RESULT_SIZE:.*]] = affine.min #[[UNPACK_RESULT_SIZE_MAP]](%[[IV1]])
 //      CHECK:      %[[TILED_UNPACK_DEST:.*]] = tensor.extract_slice %[[UNPACK_OUT_ARG]][%[[UNPACK_RESULT_OFFSET]]] [%[[UNPACK_RESULT_SIZE]]] [1]
-//      CHECK:      %[[TILED_UNPACK_OUT:.*]] = tensor.unpack %[[GENERIC_OUT]]
+//      CHECK:      %[[TILED_UNPACK_OUT:.*]] = linalg.unpack %[[GENERIC_OUT]]
 // CHECK-SAME:                              outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32]
 // CHECK-SAME:                              into %[[TILED_UNPACK_DEST]]
 //      CHECK:      scf.forall.in_parallel {
@@ -339,7 +339,7 @@ module {
             }
         }
         %output = tensor.empty() : tensor<2047xf32>
-        %unpack = tensor.unpack %1 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %output : tensor<64x32xf32> -> tensor<2047xf32>
+        %unpack = linalg.unpack %1 outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32] into %output : tensor<64x32xf32> -> tensor<2047xf32>
         return %unpack : tensor<2047xf32>
     }
 }
@@ -369,7 +369,7 @@ module attributes {transform.with_named_sequence} {
 //  CHECK-DAG:      %[[UNPACK_RESULT_OFFSET:.*]] = affine.apply #[[UNPACK_RESULT_OFFSET_MAP]](%[[IV1]])
 //  CHECK-DAG:      %[[UNPACK_RESULT_SIZE:.*]] = affine.min #[[UNPACK_RESULT_SIZE_MAP]](%[[IV1]])
 //      CHECK:      %[[TILED_UNPACK_DEST:.*]] = tensor.extract_slice %[[UNPACK_OUT_ARG]][%[[UNPACK_RESULT_OFFSET]]] [%[[UNPACK_RESULT_SIZE]]] [1]
-//      CHECK:      %[[TILED_UNPACK_OUT:.*]] = tensor.unpack %[[GENERIC_OUT]]
+//      CHECK:      %[[TILED_UNPACK_OUT:.*]] = linalg.unpack %[[GENERIC_OUT]]
 // CHECK-SAME:                              outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [32]
 // CHECK-SAME:                              into %[[TILED_UNPACK_DEST]]
 //      CHECK:      scf.forall.in_parallel {
@@ -400,7 +400,7 @@ module {
             }
         }
         %output = tensor.empty() : tensor<4x32x16xf32>
-        %pack = tensor.pack %1 inner_dims_pos = [0] inner_tiles = [16] into %output : tensor<64x32xf32> -> tensor<4x32x16xf32>
+        %pack = linalg.pack %1 inner_dims_pos = [0] inner_tiles = [16] into %output : tensor<64x32xf32> -> tensor<4x32x16xf32>
         return %pack : tensor<4x32x16xf32>
     }
 }
@@ -428,7 +428,7 @@ module attributes {transform.with_named_sequence} {
 // CHECK-SAME:              outs(%[[GENERIC_OUT_SLICE]] :
 //      CHECK:      %[[PACK_RESULT_OFFSET:.*]] = affine.apply #[[PACK_RESULT_MAP]](%[[IV1]])
 //      CHECK:      %[[TILED_PACK_DEST:.*]] = tensor.extract_slice %[[PACK_OUT_ARG]][%[[PACK_RESULT_OFFSET]], %[[IV2]], 0] [2, 32, 16] [1, 1, 1]
-//      CHECK:      %[[TILED_PACK_OUT:.*]] = tensor.pack %[[GENERIC_OUT]]
+//      CHECK:      %[[TILED_PACK_OUT:.*]] = linalg.pack %[[GENERIC_OUT]]
 // CHECK-SAME:                              inner_dims_pos = [0] inner_tiles = [16]
 // CHECK-SAME:                              into %[[TILED_PACK_DEST]]
 //      CHECK:      scf.forall.in_parallel {
diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
index 5f7663af773a4..bc27840fdf5e9 100644
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
@@ -591,7 +591,7 @@ module attributes {transform.with_named_sequence} {
 // -----
 
 func.func @imperfect_unpack_producer_fusion(%source: tensor<1x1x288x8x4xf32>, %dest: tensor<1x2x1152xf32>) -> tensor<1x2x1152xf32> {
-  %0 = tensor.unpack %source
+  %0 = linalg.unpack %source
       outer_dims_perm = [0, 1, 2]
       inner_dims_pos = [1, 2]
       inner_tiles = [8, 4] into %dest
@@ -625,7 +625,7 @@ module attributes {transform.with_named_sequence} {
 //  CHECK-SAME:     %[[ARG1:.+]]: tensor<1x2x1152xf32>
 //       CHECK:   %[[FOR_RESULT:.+]] = scf.for{{.*}}iter_args(%[[ITER_ARG:.+]] = {{.*}})
 //       CHECK:     %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]]
-//       CHECK:     %[[UNPACK:.+]] = tensor.unpack %[[SLICE]]
+//       CHECK:     %[[UNPACK:.+]] = linalg.unpack %[[SLICE]]
 //   CHECK-DAG:     %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
 //   CHECK-DAG:     %[[INIT_SLICE:.+]] = tensor.extract_slice %[[ITER_ARG]]
 //       CHECK:     %[[GENERIC:.+]] = linalg.generic
diff --git a/mlir/test/Transforms/loop-invariant-code-motion.mlir b/mlir/test/Transforms/loop-invariant-code-motion.mlir
index 5133c14414c97..c1604e226a334 100644
--- a/mlir/test/Transforms/loop-invariant-code-motion.mlir
+++ b/mlir/test/Transforms/loop-invariant-code-motion.mlir
@@ -1163,18 +1163,18 @@ func.func @speculate_ceildivsi_range(
 func.func @speculate_static_pack_and_unpack(%source: tensor<128x256xf32>,
   %dest: tensor<4x16x32x16xf32>, %lb: index, %ub: index, %step: index) {
 
-  // CHECK: tensor.pack
+  // CHECK: linalg.pack
   // CHECK-NEXT: scf.for
   scf.for %i = %lb to %ub step %step {
-    %packed = tensor.pack %source
+    %packed = linalg.pack %source
       inner_dims_pos = [0, 1]
       inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
   }
 
-  // CHECK: tensor.unpack
+  // CHECK: linalg.unpack
   // CHECK-NEXT: scf.for
   scf.for %i = %lb to %ub step %step {
-    %unpacked = tensor.unpack %dest
+    %unpacked = linalg.unpack %dest
       inner_dims_pos = [0, 1]
       inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
   }
@@ -1188,25 +1188,25 @@ func.func @speculate_dynamic_pack_and_unpack(%source: tensor<?x?xf32>,
   %tile_m: index, %tile_n: index, %pad: f32) {
 
   // CHECK: scf.for
-  // CHECK-NEXT: tensor.pack
+  // CHECK-NEXT: linalg.pack
   scf.for %i = %lb to %ub step %step {
-    %packed = tensor.pack %source
+    %packed = linalg.pack %source
       inner_dims_pos = [0, 1]
       inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
   }
 
   // CHECK: scf.for
-  // CHECK-NEXT: tensor.unpack
+  // CHECK-NEXT: linalg.unpack
   scf.for %i = %lb to %ub step %step {
-    %unpacked = tensor.unpack %dest
+    %unpacked = linalg.unpack %dest
       inner_dims_pos = [0, 1]
       inner_tiles = [%tile_n, %tile_m] into %source : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
   }
 
-  // CHECK: tensor.pack
+  // CHECK: linalg.pack
   // CHECK-NEXT: scf.for
   scf.for %i = %lb to %ub step %step {
-    %packed = tensor.pack %source padding_value(%pad : f32)
+    %packed = linalg.pack %source padding_value(%pad : f32)
       inner_dims_pos = [0, 1]
       inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
   }
diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
index fa2a27dcfa991..046b9a65f3359 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
@@ -74,8 +74,9 @@ struct TestLinalgTransforms
       *this, "test-decompose-pad-tensor",
       llvm::cl::desc("Test transform pad tensor by copying with generic ops"),
       llvm::cl::init(false)};
+  // TODO: This is not used - delete.
   Option<bool> testDecomposeTensorPackOp{
-      *this, "test-decompose-tensor-pack",
+      *this, "test-decompose-linalg-pack",
       llvm::cl::desc("Test transform that generalizes pack ops into a sequence "
                      "of tensor and Linalg ops"),
       llvm::cl::init(false)};
@@ -130,6 +131,14 @@ struct TestLinalgTransforms
   Option<bool> testDecomposeWinogradOps{
       *this, "test-decompose-winograd-ops",
       llvm::cl::desc("Test decompose Winograd ops"), llvm::cl::init(false)};
+  Option<bool> testFoldIntoPackAndUnpack{
+      *this, "test-fold-into-pack-and-unpack",
+      llvm::cl::desc("Test folding ops into linalg.pack and linalg.unpack"),
+      llvm::cl::init(false)};
+  Option<bool> testSimplifyPackUnpackPatterns{
+      *this, "test-simplify-pack-unpack-patterns",
+      llvm::cl::desc("Test patterns to simplify linalg.pack and linalg.unpack"),
+      llvm::cl::init(false)};
 };
 } // namespace
 
@@ -227,6 +236,18 @@ static void applyDecomposeWinogradOps(func::FuncOp funcOp) {
   (void)applyPatternsGreedily(funcOp, std::move(patterns));
 }
 
+static void applyFoldIntoPackAndUnpackPatterns(Operation *rootOp) {
+  RewritePatternSet patterns(rootOp->getContext());
+  linalg::populateFoldIntoPackAndUnpackPatterns(patterns);
+  (void)applyPatternsGreedily(rootOp, std::move(patterns));
+}
+
+static void applySimplifyPackUnpackPatterns(Operation *rootOp) {
+  RewritePatternSet patterns(rootOp->getContext());
+  linalg::populateSimplifyPackAndUnpackPatterns(patterns);
+  (void)applyPatternsGreedily(rootOp, std::move(patterns));
+}
+
 /// Apply transformations specified as patterns.
 void TestLinalgTransforms::runOnOperation() {
   if (testPatterns)
@@ -255,6 +276,11 @@ void TestLinalgTransforms::runOnOperation() {
     return applyWinogradConv2D(getOperation());
   if (testDecomposeWinogradOps)
     return applyDecomposeWinogradOps(getOperation());
+  Operation *rootOp = getOperation();
+  if (testFoldIntoPackAndUnpack)
+    applyFoldIntoPackAndUnpackPatterns(rootOp);
+  if (testSimplifyPackUnpackPatterns)
+    applySimplifyPackUnpackPatterns(rootOp);
 }
 
 namespace mlir {
diff --git a/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp b/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp
index 173bfd8955f2b..e435130c2a417 100644
--- a/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp
+++ b/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp
@@ -77,11 +77,6 @@ struct TestTensorTransforms
       llvm::cl::desc("Test folding of expand_shape/collapse_shape"),
       llvm::cl::init(false)};
 
-  Option<bool> testFoldIntoPackAndUnpack{
-      *this, "test-fold-into-pack-and-unpack",
-      llvm::cl::desc("Test folding ops into tensor.pack and tensor.unpack"),
-      llvm::cl::init(false)};
-
   Option<bool> useForeach{
       *this, "use-foreach",
       llvm::cl::desc(
@@ -89,11 +84,6 @@ struct TestTensorTransforms
           "the extract_slice of collapse_shape pattern"),
       llvm::cl::init(false)};
 
-  Option<bool> testSimplifyPackUnpackPatterns{
-      *this, "test-simplify-pack-unpack-patterns",
-      llvm::cl::desc("Test patterns to simplify tensor.pack and tensor.unpack"),
-      llvm::cl::init(false)};
-
   Option<bool> testTrackingListener{
       *this, "test-tracking-listener",
       llvm::cl::desc("Test tensor TrackingListener for the transform dialect"),
@@ -113,12 +103,6 @@ static void applyBubbleUpExpandShapePatterns(Operation *rootOp) {
   (void)applyPatternsGreedily(rootOp, std::move(patterns));
 }
 
-static void applyFoldIntoPackAndUnpackPatterns(Operation *rootOp) {
-  RewritePatternSet patterns(rootOp->getContext());
-  tensor::populateFoldIntoPackAndUnpackPatterns(patterns);
-  (void)applyPatternsGreedily(rootOp, std::move(patterns));
-}
-
 static void applyFoldConstantExtractSlicePatterns(Operation *rootOp) {
   RewritePatternSet patterns(rootOp->getContext());
   tensor::ControlConstantExtractSliceFusionFn controlFn =
@@ -148,12 +132,6 @@ applyDropRedundantInsertSliceRankExpansionPatterns(Operation *rootOp) {
   (void)applyPatternsGreedily(rootOp, std::move(patterns));
 }
 
-static void applySimplifyPackUnpackPatterns(Operation *rootOp) {
-  RewritePatternSet patterns(rootOp->getContext());
-  tensor::populateSimplifyPackAndUnpackPatterns(patterns);
-  (void)applyPatternsGreedily(rootOp, std::move(patterns));
-}
-
 namespace {
 /// Base pattern to rewrite  a `tensor.collapse_shape -> tensor.extract_slice`.
 /// The `tensor.extract_slice` is replaced by a loop or gather operation that
@@ -387,8 +365,6 @@ static LogicalResult testTrackingListenerReplacements(Operation *rootOp) {
 
 void TestTensorTransforms::runOnOperation() {
   Operation *rootOp = getOperation();
-  if (testSimplifyPackUnpackPatterns)
-    applySimplifyPackUnpackPatterns(rootOp);
   if (testFoldConstantExtractSlice)
     applyFoldConstantExtractSlicePatterns(rootOp);
   if (testFoldConsecutiveInsertExtractSlice)
@@ -399,8 +375,6 @@ void TestTensorTransforms::runOnOperation() {
     applyReassociativeReshapeFoldingPatterns(rootOp);
   if (testBubbleUpExpandShapePatterns)
     applyBubbleUpExpandShapePatterns(rootOp);
-  if (testFoldIntoPackAndUnpack)
-    applyFoldIntoPackAndUnpackPatterns(rootOp);
   if (testRewriteExtractSliceWithTiledCollapseShape) {
     if (failed(
             applyRewriteExtractFromCollapseShapePatterns(rootOp, useForeach)))