From 3c11b025cf2d981743b7b00900b2ac5888568a62 Mon Sep 17 00:00:00 2001
From: Quinn Dawkins <quinn@nod-labs.com>
Date: Fri, 14 Feb 2025 11:34:46 -0500
Subject: [PATCH] Changes necessary for
 https://github.com/llvm/llvm-project/pull/123902

---
 .../materialize_homogeneous_encodings.mlir    |  4 +-
 .../Codegen/Common/CPU/CPULowerToUKernels.cpp |  8 +-
 .../Codegen/Common/CPU/CPUPrepareUkernels.cpp | 24 ++---
 .../Common/CPU/test/lower_to_ukernel_ops.mlir | 24 ++---
 .../Common/CPU/test/prepare_ukernels.mlir     | 20 ++--
 .../ConvertToDestinationPassingStylePass.cpp  |  6 +-
 .../Codegen/Common/DecomposePackUnPackOps.cpp | 40 ++++----
 .../compiler/Codegen/Common/EncodingUtils.cpp |  2 +-
 .../GPU/test/gpu_fuse_and_hoist_forall.mlir   |  4 +-
 .../GPU/test/gpu_pack_to_instrinsics.mlir     |  6 +-
 .../Codegen/Common/GenericVectorization.cpp   |  4 +-
 .../Codegen/Common/MaterializeEncoding.cpp    |  3 +-
 .../Common/MaterializeEncodingPatterns.cpp    | 10 +-
 .../Codegen/Common/TileInferenceUtils.cpp     | 18 ++--
 .../convert_to_destination_passing_style.mlir | 12 +--
 .../decompose_boundary_pack_unpack_ops.mlir   | 48 +++++-----
 .../test/decompose_pack_unpack_ops.mlir       | 28 +++---
 .../Common/test/generic_vectorization.mlir    | 18 ++--
 .../test/gpu_materialize_encoding_gfx942.mlir | 24 ++---
 .../test/iree_comprehensive_bufferize.mlir    |  6 +-
 .../test/llvmcpu_materialize_encoding.mlir    | 94 +++++++++----------
 .../tile_and_distribute_to_workgroups.mlir    | 26 ++---
 .../test/vmvx_materialize_encoding.mlir       |  4 +-
 .../Dialect/Codegen/IR/IREECodegenTypes.h     |  2 +-
 .../Interfaces/BufferizationInterfaces.cpp    | 17 ++--
 .../PartitionableLoopsInterface.cpp           |  8 +-
 .../Codegen/LLVMCPU/KernelDispatch.cpp        | 28 +++---
 .../compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp  |  4 +-
 .../compiler/Codegen/LLVMCPU/test/peel.mlir   |  6 +-
 .../test/pipeline_pack_unpack_tests.mlir      |  6 +-
 .../Codegen/LLVMCPU/test/pipeline_tests.mlir  |  2 +-
 .../select_aarch64_lowering_strategy.mlir     | 12 +--
 .../test/select_x86_64_lowering_strategy.mlir | 42 ++++-----
 .../tile-root-fuse-consumer-producer.mlir     |  4 +-
 .../compiler/Codegen/LLVMGPU/KernelConfig.cpp | 12 +--
 .../Codegen/LLVMGPU/LLVMGPUTensorPad.cpp      | 12 +--
 .../iree/compiler/Codegen/LLVMGPU/Passes.cpp  |  4 +-
 .../test/ROCDL/config_tile_and_fuse.mlir      | 10 +-
 .../LLVMGPU/test/gpu_set_num_workgroups.mlir  |  4 +-
 .../LLVMGPU/test/pack_pipeline_test.mlir      |  2 +-
 .../Codegen/LLVMGPU/test/tensor_pad.mlir      |  4 +-
 .../iree/compiler/Codegen/Utils/CPUUtils.cpp  |  4 +-
 .../src/iree/compiler/Codegen/Utils/Utils.cpp |  8 +-
 .../src/iree/compiler/Codegen/Utils/Utils.h   |  8 +-
 .../VMVX/test/select_lowering_strategy.mlir   |  6 +-
 .../Flow/Transforms/AnnotateDispatches.cpp    | 12 +--
 .../Transforms/InitializeEmptyTensors.cpp     |  2 +-
 .../Transforms/test/annotate_dispatches.mlir  |  8 +-
 .../DispatchCreation/FormDispatchRegions.cpp  | 20 ++--
 .../DispatchCreation/FormScalarDispatches.cpp |  2 +-
 .../DispatchCreation/FusionPreprocessing.cpp  |  2 +-
 .../DispatchCreation/SinkReshapes.cpp         |  2 +-
 .../TensorPadToTensorInsertSlice.cpp          |  2 +-
 .../test/collapse_dimensions.mlir             |  6 +-
 ...spatch_region_formation_preprocessing.mlir |  4 +-
 .../test/form_dispatch_regions.mlir           | 24 ++---
 .../ExternalInterfaces/UtilExternalModels.cpp |  8 +-
 .../DataLayoutPropagation.cpp                 |  4 +-
 .../GlobalLoopInvariantCodeMotion.cpp         |  3 +-
 .../GlobalOptimization/SimplifyPackUnpack.cpp |  4 +-
 .../test/data_layout_propagation.mlir         |  8 +-
 .../global_loop_invariant_code_motion.mlir    | 32 +++----
 .../test/hoist_into_globals.mlir              |  2 +-
 .../Common/ConvertConvToChannelsLast.cpp      | 36 +++----
 .../Common/test/conv_to_channels_last.mlir    | 14 +--
 .../docs/community/blog/posts/microkernels.md |  8 +-
 ...ranspose_1x9_into_2x4x8x4_issue_12546.mlir |  2 +-
 tests/e2e/tensor_ops/pack.mlir                | 20 ++--
 .../tensor_ops/pack_dynamic_inner_tiles.mlir  | 16 ++--
 tests/e2e/tensor_ops/pack_i8.mlir             |  8 +-
 tests/e2e/tensor_ops/unpack.mlir              | 34 +++----
 71 files changed, 463 insertions(+), 458 deletions(-)

diff --git a/compiler/plugins/target/LLVMCPU/test/materialize_homogeneous_encodings.mlir b/compiler/plugins/target/LLVMCPU/test/materialize_homogeneous_encodings.mlir
index 535ecd75b501..c7bd654921de 100644
--- a/compiler/plugins/target/LLVMCPU/test/materialize_homogeneous_encodings.mlir
+++ b/compiler/plugins/target/LLVMCPU/test/materialize_homogeneous_encodings.mlir
@@ -19,5 +19,5 @@ module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
   }
 }
 // CHECK-LABEL: util.func public @lhs_encoding
-// CHECK:         tensor.pack
-// CHECK:         tensor.unpack
+// CHECK:         linalg.pack
+// CHECK:         linalg.unpack
diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/CPULowerToUKernels.cpp b/compiler/src/iree/compiler/Codegen/Common/CPU/CPULowerToUKernels.cpp
index 8485d70edecd..889dbe64de5b 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CPU/CPULowerToUKernels.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/CPU/CPULowerToUKernels.cpp
@@ -263,7 +263,7 @@ matchDAGForUKernel(RewriterBase &rewriter, linalg::Mmt4DOp op,
 }
 
 static FailureOr<IREE::Codegen::UKernelOpInterface>
-matchDAGForUKernel(RewriterBase &rewriter, tensor::PackOp op,
+matchDAGForUKernel(RewriterBase &rewriter, linalg::PackOp op,
                    bool /*skipIntermediateRoundings*/) {
   auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(op);
   const char ukernelName[] = "pack";
@@ -386,7 +386,7 @@ matchDAGForUKernel(RewriterBase &rewriter, tensor::PackOp op,
 }
 
 static FailureOr<IREE::Codegen::UKernelOpInterface>
-matchDAGForUKernel(RewriterBase &rewriter, tensor::UnPackOp op,
+matchDAGForUKernel(RewriterBase &rewriter, linalg::UnPackOp op,
                    bool /*skipIntermediateRoundings*/) {
   auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(op);
   const char ukernelName[] = "unpack";
@@ -616,8 +616,8 @@ void CPULowerToUKernelsPass::runOnOperation() {
   // these ops.
   auto allTargets = [](auto target) { return true; };
   patterns.insert<LowerToUKernelPattern<linalg::Mmt4DOp>,
-                  LowerToUKernelPattern<tensor::PackOp>,
-                  LowerToUKernelPattern<tensor::UnPackOp>>(
+                  LowerToUKernelPattern<linalg::PackOp>,
+                  LowerToUKernelPattern<linalg::UnPackOp>>(
       context, allTargets, skipIntermediateRoundings);
   // These patterns are inherently specific to the VMVX backend.
   patterns.insert<LowerToUKernelPattern<IREE::Codegen::QueryTileSizesOp>>(
diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUPrepareUkernels.cpp b/compiler/src/iree/compiler/Codegen/Common/CPU/CPUPrepareUkernels.cpp
index 4c0d3e7b7d73..3e547f4ee5c5 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUPrepareUkernels.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/CPU/CPUPrepareUkernels.cpp
@@ -46,7 +46,7 @@ static void tileBatchDimsForBatchMmt4dOp(RewriterBase &rewriter,
 
 static void tileNonPackedDimsFor3DPackOps(RewriterBase &rewriter,
                                           FunctionOpInterface funcOp) {
-  funcOp.walk([&](tensor::PackOp packOp) {
+  funcOp.walk([&](linalg::PackOp packOp) {
     if (packOp.getSourceRank() != 3 || packOp.getDestRank() != 5) {
       return;
     }
@@ -81,7 +81,7 @@ static void tileNonPackedDimsFor3DPackOps(RewriterBase &rewriter,
 
 static void tileNonPackedDimsFor5DPUnpackOps(RewriterBase &rewriter,
                                              FunctionOpInterface funcOp) {
-  funcOp.walk([&](tensor::UnPackOp unpackOp) {
+  funcOp.walk([&](linalg::UnPackOp unpackOp) {
     if (unpackOp.getSourceRank() != 5 || unpackOp.getDestRank() != 3) {
       return;
     }
@@ -251,10 +251,10 @@ struct ConvertBatchMmt4DtoMmt4DPattern
   }
 };
 
-struct Convert3DPackto2DPackPattern : public OpRewritePattern<tensor::PackOp> {
-  using OpRewritePattern<tensor::PackOp>::OpRewritePattern;
+struct Convert3DPackto2DPackPattern : public OpRewritePattern<linalg::PackOp> {
+  using OpRewritePattern<linalg::PackOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(tensor::PackOp packOp,
+  LogicalResult matchAndRewrite(linalg::PackOp packOp,
                                 PatternRewriter &rewriter) const override {
     if (packOp.getSourceRank() != 3 || packOp.getDestRank() != 5) {
       return failure();
@@ -309,7 +309,7 @@ struct Convert3DPackto2DPackPattern : public OpRewritePattern<tensor::PackOp> {
     auto reducedDest = tensor::createCanonicalRankReducingExtractSliceOp(
         rewriter, loc, packOp.getDest(), reducedDestType);
 
-    auto newPackOp = rewriter.create<tensor::PackOp>(
+    auto newPackOp = rewriter.create<linalg::PackOp>(
         loc, reducedSrc, reducedDest, newInnerDimsPos, packOp.getMixedTiles(),
         packOp.getPaddingValue(), newOuterDimsPerm);
 
@@ -321,10 +321,10 @@ struct Convert3DPackto2DPackPattern : public OpRewritePattern<tensor::PackOp> {
 };
 
 struct Convert5DUnPackto4DUnPackPattern
-    : public OpRewritePattern<tensor::UnPackOp> {
-  using OpRewritePattern<tensor::UnPackOp>::OpRewritePattern;
+    : public OpRewritePattern<linalg::UnPackOp> {
+  using OpRewritePattern<linalg::UnPackOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(tensor::UnPackOp unpackOp,
+  LogicalResult matchAndRewrite(linalg::UnPackOp unpackOp,
                                 PatternRewriter &rewriter) const override {
     if (unpackOp.getSourceRank() != 5 || unpackOp.getDestRank() != 3) {
       return failure();
@@ -387,7 +387,7 @@ struct Convert5DUnPackto4DUnPackPattern
     auto reducedDest = tensor::createCanonicalRankReducingExtractSliceOp(
         rewriter, loc, unpackOp.getDest(), reducedDestType);
 
-    auto newUnpackOp = rewriter.create<tensor::UnPackOp>(
+    auto newUnpackOp = rewriter.create<linalg::UnPackOp>(
         loc, reducedSrc, reducedDest, newInnerDimsPos, unpackOp.getMixedTiles(),
         newOuterDimsPerm);
 
@@ -436,8 +436,8 @@ void CPUPrepareUkernelsPass::runOnOperation() {
   tensor::InsertSliceOp::getCanonicalizationPatterns(patterns, ctx);
   tensor::ExtractSliceOp::getCanonicalizationPatterns(patterns, ctx);
   tensor::EmptyOp::getCanonicalizationPatterns(patterns, ctx);
-  tensor::PackOp::getCanonicalizationPatterns(patterns, ctx);
-  tensor::UnPackOp::getCanonicalizationPatterns(patterns, ctx);
+  linalg::PackOp::getCanonicalizationPatterns(patterns, ctx);
+  linalg::UnPackOp::getCanonicalizationPatterns(patterns, ctx);
   tensor::CastOp::getCanonicalizationPatterns(patterns, ctx);
   tensor::populateFoldTensorEmptyPatterns(patterns);
   if (failed(applyPatternsGreedily(funcOp, std::move(patterns)))) {
diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir
index d5d4001bef9b..c58fbdaecfa6 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/CPU/test/lower_to_ukernel_ops.mlir
@@ -287,7 +287,7 @@ func.func @mmt4d_bf16bf16f32(%arg0 : tensor<?x?x16x2xbf16>, %arg1 : tensor<?x?x1
 func.func @pack_i8i8_x86(%arg0 : tensor<?x?xi8>, %arg1 : tensor<?x?x7x8xi8>, %arg2 : i8) -> tensor<?x?x7x8xi8> attributes {
   hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {ukernels = "all", target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}>
 } {
-  %result = tensor.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
+  %result = linalg.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
       : tensor<?x?xi8> -> tensor<?x?x7x8xi8>
   func.return %result : tensor<?x?x7x8xi8>
 }
@@ -315,7 +315,7 @@ func.func @pack_i8i8_x86(%arg0 : tensor<?x?xi8>, %arg1 : tensor<?x?x7x8xi8>, %ar
 func.func @pack_i8i8(%arg0 : tensor<?x?xi8>, %arg1 : tensor<?x?x7x8xi8>, %arg2 : i8) -> tensor<?x?x7x8xi8> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
+  %result = linalg.pack %arg0 padding_value(%arg2 : i8) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
       : tensor<?x?xi8> -> tensor<?x?x7x8xi8>
   func.return %result : tensor<?x?x7x8xi8>
 }
@@ -344,7 +344,7 @@ func.func @pack_i8i8(%arg0 : tensor<?x?xi8>, %arg1 : tensor<?x?x7x8xi8>, %arg2 :
 func.func @pack_f16f16(%arg0 : tensor<?x?xf16>, %arg1 : tensor<?x?x7x8xf16>, %arg2 : f16) -> tensor<?x?x7x8xf16> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.pack %arg0 padding_value(%arg2 : f16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
+  %result = linalg.pack %arg0 padding_value(%arg2 : f16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
       : tensor<?x?xf16> -> tensor<?x?x7x8xf16>
   func.return %result : tensor<?x?x7x8xf16>
 }
@@ -373,7 +373,7 @@ func.func @pack_f16f16(%arg0 : tensor<?x?xf16>, %arg1 : tensor<?x?x7x8xf16>, %ar
 func.func @pack_bf16bf16(%arg0 : tensor<?x?xbf16>, %arg1 : tensor<?x?x7x8xbf16>, %arg2 : bf16) -> tensor<?x?x7x8xbf16> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.pack %arg0 padding_value(%arg2 : bf16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
+  %result = linalg.pack %arg0 padding_value(%arg2 : bf16) inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
       : tensor<?x?xbf16> -> tensor<?x?x7x8xbf16>
   func.return %result : tensor<?x?x7x8xbf16>
 }
@@ -401,7 +401,7 @@ func.func @pack_bf16bf16(%arg0 : tensor<?x?xbf16>, %arg1 : tensor<?x?x7x8xbf16>,
 func.func @pack_i32i32_transpose_inner(%arg0 : tensor<?x?xi32>, %arg1 : tensor<?x?x7x8xi32>, %arg2 : i32) -> tensor<?x?x7x8xi32> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.pack %arg0 padding_value(%arg2 : i32) inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
+  %result = linalg.pack %arg0 padding_value(%arg2 : i32) inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
       : tensor<?x?xi32> -> tensor<?x?x7x8xi32>
   func.return %result : tensor<?x?x7x8xi32>
 }
@@ -430,19 +430,19 @@ func.func @pack_i32i32_transpose_inner(%arg0 : tensor<?x?xi32>, %arg1 : tensor<?
 func.func @pack_f32f32_transpose_inner_and_outer(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?x7x8xf32>, %arg2 : f32) -> tensor<?x?x7x8xf32> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.pack %arg0 padding_value(%arg2 : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
+  %result = linalg.pack %arg0 padding_value(%arg2 : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
       : tensor<?x?xf32> -> tensor<?x?x7x8xf32>
   func.return %result : tensor<?x?x7x8xf32>
 }
 
 // -----
 
-// Check that tensor.pack is not lowered to a microkernel by default - it should
+// Check that linalg.pack is not lowered to a microkernel by default - it should
 // only be on VMVX.
 // CHECK: func @unpack_f16f16_default
-// CHECK: tensor.unpack
+// CHECK: linalg.unpack
 func.func @unpack_f16f16_default(%arg0 : tensor<?x?x7x8xf16>, %arg1 : tensor<?x?xf16>) -> tensor<?x?xf16> {
-  %result = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
+  %result = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
       : tensor<?x?x7x8xf16> -> tensor<?x?xf16>
   func.return %result : tensor<?x?xf16>
 }
@@ -468,7 +468,7 @@ func.func @unpack_f16f16_default(%arg0 : tensor<?x?x7x8xf16>, %arg1 : tensor<?x?
 func.func @unpack_f16f16(%arg0 : tensor<?x?x7x8xf16>, %arg1 : tensor<?x?xf16>) -> tensor<?x?xf16> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
+  %result = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [7, 8] into %arg1
       : tensor<?x?x7x8xf16> -> tensor<?x?xf16>
   func.return %result : tensor<?x?xf16>
 }
@@ -494,7 +494,7 @@ func.func @unpack_f16f16(%arg0 : tensor<?x?x7x8xf16>, %arg1 : tensor<?x?xf16>) -
 func.func @unpack_i32i32_transpose_inner(%arg0 : tensor<?x?x7x8xi32>, %arg1 : tensor<?x?xi32>) -> tensor<?x?xi32> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
+  %result = linalg.unpack %arg0 inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
       : tensor<?x?x7x8xi32> -> tensor<?x?xi32>
   func.return %result : tensor<?x?xi32>
 }
@@ -520,7 +520,7 @@ func.func @unpack_i32i32_transpose_inner(%arg0 : tensor<?x?x7x8xi32>, %arg1 : te
 func.func @unpack_f32f32_transpose_inner_and_outer(%arg0 : tensor<?x?x7x8xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> attributes {
   hal.executable.target = #hal.executable.target<"vmvx", "vmvx-bytecode-fb", {ukernels = "all"}>
 } {
-  %result = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
+  %result = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [7, 8] into %arg1
       : tensor<?x?x7x8xf32> -> tensor<?x?xf32>
   func.return %result : tensor<?x?xf32>
 }
diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/prepare_ukernels.mlir b/compiler/src/iree/compiler/Codegen/Common/CPU/test/prepare_ukernels.mlir
index 199596d99833..7596d7b96517 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/prepare_ukernels.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/CPU/test/prepare_ukernels.mlir
@@ -158,7 +158,7 @@ func.func @pack_without_outer_dims_perm(%arg0: tensor<1x16384x512xbf16>, %arg1:
   hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {ukernels = "pack", target_triple="x86_64-xyz-xyz", cpu_features=""}>
 } {
   %cst = arith.constant 0.000000e+00 : bf16
-  %pack = tensor.pack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 2] into %arg1 : tensor<1x16384x512xbf16> -> tensor<1x1024x256x16x2xbf16>
+  %pack = linalg.pack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 2] into %arg1 : tensor<1x16384x512xbf16> -> tensor<1x1024x256x16x2xbf16>
   return %pack : tensor<1x1024x256x16x2xbf16>
 }
 // CHECK:      func.func @pack_without_outer_dims_perm
@@ -168,7 +168,7 @@ func.func @pack_without_outer_dims_perm(%arg0: tensor<1x16384x512xbf16>, %arg1:
 // CHECK-SAME:     tensor<1x16384x512xbf16> to tensor<16384x512xbf16>
 // CHECK:        %[[DEST_SLICE:.+]] = tensor.extract_slice %[[DEST]]
 // CHECK-SAME:      tensor<1x1024x256x16x2xbf16> to tensor<1024x256x16x2xbf16>
-// CHECK:        %[[PACK:.+]] = tensor.pack %[[SRC_SLICE]]
+// CHECK:        %[[PACK:.+]] = linalg.pack %[[SRC_SLICE]]
 // CHECK-SAME:     inner_dims_pos = [0, 1] inner_tiles = [16, 2]
 // CHECK-SAME:     into %[[DEST_SLICE]]
 
@@ -178,7 +178,7 @@ func.func @pack_with_outer_dims_perm(%arg0: tensor<484x16x64xbf16>, %arg1: tenso
   hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {ukernels = "pack", target_triple="x86_64-xyz-xyz", cpu_features=""}>
 } {
   %cst = arith.constant 0.000000e+00 : bf16
-  %pack = tensor.pack %arg0 padding_value(%cst : bf16) outer_dims_perm = [2, 0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<484x16x64xbf16> -> tensor<64x31x8x16x2xbf16>
+  %pack = linalg.pack %arg0 padding_value(%cst : bf16) outer_dims_perm = [2, 0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<484x16x64xbf16> -> tensor<64x31x8x16x2xbf16>
   return %pack : tensor<64x31x8x16x2xbf16>
 }
 // CHECK:      func.func @pack_with_outer_dims_perm
@@ -190,7 +190,7 @@ func.func @pack_with_outer_dims_perm(%arg0: tensor<484x16x64xbf16>, %arg1: tenso
 // CHECK-SAME:       tensor<484x16x64xbf16> to tensor<484x16xbf16>
 // CHECK:          %[[DEST_SLICE:.+]] = tensor.extract_slice %[[ITER]]
 // CHECK-SAME:       tensor<64x31x8x16x2xbf16> to tensor<31x8x16x2xbf16>
-// CHECK:          %[[PACK:.+]] = tensor.pack %[[SRC_SLICE]]
+// CHECK:          %[[PACK:.+]] = linalg.pack %[[SRC_SLICE]]
 // CHECK-SAME:       padding_value(%[[PAD_VAL]] : bf16)
 // CHECK-SAME:       outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2]
 // CHECK-SAME:       into %[[DEST_SLICE]]
@@ -202,11 +202,11 @@ func.func @do_not_decompose_pack(%arg0: tensor<1x16384x512xbf16>, %arg1: tensor<
   hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {ukernels = "", target_triple="x86_64-xyz-xyz", cpu_features=""}>
 } {
   %cst = arith.constant 0.000000e+00 : bf16
-  %pack = tensor.pack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 2] into %arg1 : tensor<1x16384x512xbf16> -> tensor<1x1024x256x16x2xbf16>
+  %pack = linalg.pack %arg0 inner_dims_pos = [1, 2] inner_tiles = [16, 2] into %arg1 : tensor<1x16384x512xbf16> -> tensor<1x1024x256x16x2xbf16>
   return %pack : tensor<1x1024x256x16x2xbf16>
 }
 // CHECK-LABEL: func.func @do_not_decompose_pack
-// CHECK:         tensor.pack {{.+}} : tensor<1x16384x512xbf16> -> tensor<1x1024x256x16x2xbf16>
+// CHECK:         linalg.pack {{.+}} : tensor<1x16384x512xbf16> -> tensor<1x1024x256x16x2xbf16>
 
 // -----
 
@@ -214,7 +214,7 @@ func.func @unpack_without_transpose(%arg0: tensor<1828x8x64x16x16xf32>) -> tenso
   hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {ukernels = "unpack", target_triple="x86_64-xyz-xyz", cpu_features=""}>
 } {
   %6 = tensor.empty() : tensor<1828x128x1024xf32>
-  %unpack = tensor.unpack %arg0
+  %unpack = linalg.unpack %arg0
       outer_dims_perm = [0, 1, 2]
       inner_dims_pos = [1, 2]
       inner_tiles = [16, 16]
@@ -233,7 +233,7 @@ func.func @unpack_without_transpose(%arg0: tensor<1828x8x64x16x16xf32>) -> tenso
 // CHECK-SAME:          : tensor<1828x8x64x16x16xf32> to tensor<8x64x16x16xf32>
 // CHECK:             %[[DEST_SLICE:.*]] = tensor.extract_slice %[[ITER_ARG]][%[[ITER]], 0, 0] [1, 128, 1024] [1, 1, 1]
 // CHECK-SAME:          : tensor<1828x128x1024xf32> to tensor<128x1024xf32>
-// CHECK:             %[[UNPACK:.*]] = tensor.unpack %[[SRC_SLICE]]
+// CHECK:             %[[UNPACK:.*]] = linalg.unpack %[[SRC_SLICE]]
 // CHECK-SAME:         outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16]
 // CHECK-SAME:         into %[[DEST_SLICE]] : tensor<8x64x16x16xf32> -> tensor<128x1024xf32>
 // CHECK:             %[[NEW_ITER_ARG:.*]] = tensor.insert_slice %[[UNPACK]] into %[[ITER_ARG]][%[[ITER]], 0, 0] [1, 128, 1024] [1, 1, 1]
@@ -250,7 +250,7 @@ func.func @unpack_outer_dim_transpose(%arg0: tensor<4x8x29241x16x16xf32>) -> ten
 } {
   %cst = arith.constant 0.000000e+00 : bf16
   %4 = tensor.empty() : tensor<29241x128x64xf32>
-  %unpack = tensor.unpack %arg0 outer_dims_perm = [2, 1, 0] inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %4 : tensor<4x8x29241x16x16xf32> -> tensor<29241x128x64xf32>
+  %unpack = linalg.unpack %arg0 outer_dims_perm = [2, 1, 0] inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %4 : tensor<4x8x29241x16x16xf32> -> tensor<29241x128x64xf32>
   return %unpack : tensor<29241x128x64xf32>
 }
 // CHECK-LABEL:   func.func @unpack_outer_dim_transpose(
@@ -265,7 +265,7 @@ func.func @unpack_outer_dim_transpose(%arg0: tensor<4x8x29241x16x16xf32>) -> ten
 // CHECK-SAME:          : tensor<4x8x29241x16x16xf32> to tensor<4x8x16x16xf32>
 // CHECK:             %[[DEST_SLICE:.*]] = tensor.extract_slice %[[ITER_ARG]][%[[ITER]], 0, 0] [1, 128, 64] [1, 1, 1]
 // CHECK-SAME:          : tensor<29241x128x64xf32> to tensor<128x64xf32>
-// CHECK:             %[[UNPACK:.*]] = tensor.unpack %[[SRC_SLICE]]
+// CHECK:             %[[UNPACK:.*]] = linalg.unpack %[[SRC_SLICE]]
 // CHECK-SAME:         outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 16]
 // CHECK-SAME:         into %[[DEST_SLICE]] : tensor<4x8x16x16xf32> -> tensor<128x64xf32>
 // CHECK:             %[[NEW_ITER_ARG:.*]] = tensor.insert_slice %[[UNPACK]] into %[[ITER_ARG]][%[[ITER]], 0, 0] [1, 128, 64] [1, 1, 1]
diff --git a/compiler/src/iree/compiler/Codegen/Common/ConvertToDestinationPassingStylePass.cpp b/compiler/src/iree/compiler/Codegen/Common/ConvertToDestinationPassingStylePass.cpp
index 4385ed1307dc..26b1d4298c1a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/ConvertToDestinationPassingStylePass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/ConvertToDestinationPassingStylePass.cpp
@@ -463,13 +463,13 @@ static LogicalResult adaptComputeConsumerToAvoidStackAllocation(
 }
 
 /// Replaces a tensor.empty op with bufferization.alloc_tensor op which is
-/// created by tiling tensor.unpack op. It is intended because tiling unpack ops
+/// created by tiling linalg.unpack op. It is intended because tiling unpack ops
 /// with non-perfect sizes needs extra elements. See the tiling implementation
-/// of tensor.unpack op for more details.
+/// of linalg.unpack op for more details.
 static LogicalResult
 replaceUnpackEmptyWithAllocTensor(OpBuilder &b,
                                   mlir::FunctionOpInterface funcOp) {
-  funcOp.walk([&](tensor::UnPackOp unpackOp) {
+  funcOp.walk([&](linalg::UnPackOp unpackOp) {
     if (!unpackOp->hasOneUse() ||
         !isa<tensor::ExtractSliceOp>(*(unpackOp->user_begin()))) {
       return;
diff --git a/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp b/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp
index 7cdd7052e2a4..3e9a8d7c9ac3 100644
--- a/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/DecomposePackUnPackOps.cpp
@@ -42,16 +42,16 @@ namespace {
 // Shared rewrite patterns
 //===----------------------------------------------------------------------===//
 
-/// A wrapper pattern that calls linalg::lowerPack on tensor::PackOp. It lowers
-/// a tensor.pack op to tensor.pad + tensor.expand_shape + linalg.transpose ops.
-struct LowerPackPattern : public OpRewritePattern<tensor::PackOp> {
-  using OpRewritePattern<tensor::PackOp>::OpRewritePattern;
+/// A wrapper pattern that calls linalg::lowerPack on linalg::PackOp. It lowers
+/// a linalg.pack op to tensor.pad + tensor.expand_shape + linalg.transpose ops.
+struct LowerPackPattern : public OpRewritePattern<linalg::PackOp> {
+  using OpRewritePattern<linalg::PackOp>::OpRewritePattern;
 
   explicit LowerPackPattern(MLIRContext *context,
                             std::optional<PackUnPackControlFn> controlFn)
       : OpRewritePattern(context), controlFn(controlFn) {}
 
-  LogicalResult matchAndRewrite(tensor::PackOp op,
+  LogicalResult matchAndRewrite(linalg::PackOp op,
                                 PatternRewriter &rewriter) const override {
     if (controlFn && failed(controlFn.value()(op))) {
       return failure();
@@ -69,17 +69,17 @@ struct LowerPackPattern : public OpRewritePattern<tensor::PackOp> {
   std::optional<PackUnPackControlFn> controlFn;
 };
 
-/// A warpper pattern that calls linalg::lowerUnPack on tensor::UnPackOp. It
-/// lowers a tensor.unpack op to tensor.empty + linalg.transpose +
+/// A warpper pattern that calls linalg::lowerUnPack on linalg::UnPackOp. It
+/// lowers a linalg.unpack op to tensor.empty + linalg.transpose +
 /// tensor.collapse_shape + tensor.extract_slice ops.
-struct LowerUnPackPattern : public OpRewritePattern<tensor::UnPackOp> {
-  using OpRewritePattern<tensor::UnPackOp>::OpRewritePattern;
+struct LowerUnPackPattern : public OpRewritePattern<linalg::UnPackOp> {
+  using OpRewritePattern<linalg::UnPackOp>::OpRewritePattern;
 
   explicit LowerUnPackPattern(MLIRContext *context,
                               std::optional<PackUnPackControlFn> controlFn)
       : OpRewritePattern(context), controlFn(controlFn) {}
 
-  LogicalResult matchAndRewrite(tensor::UnPackOp op,
+  LogicalResult matchAndRewrite(linalg::UnPackOp op,
                                 PatternRewriter &rewriter) const override {
     if (controlFn && failed(controlFn.value()(op))) {
       return failure();
@@ -132,7 +132,7 @@ static LogicalResult commonRunOnOperation(
     }
   }
 
-  // TODO(hanchung): Below is a fallback solution for tensor.pack/unpack
+  // TODO(hanchung): Below is a fallback solution for linalg.pack/unpack
   // decomposition. They will be retired after lowerPack and lowerUnPack handle
   // all the cases.
 
@@ -142,7 +142,7 @@ static LogicalResult commonRunOnOperation(
     auto packOptions = scf::SCFTileAndFuseOptions().setTilingOptions(
         scf::SCFTilingOptions().setTileSizeComputationFunction(
             [](OpBuilder &builder, Operation *op) -> SmallVector<OpFoldResult> {
-              auto packOp = cast<tensor::PackOp>(op);
+              auto packOp = cast<linalg::PackOp>(op);
 
               // Do nothing if any of inner tile sizes is dynamic.
               if (llvm::any_of(packOp.getMixedTiles(), llvm::IsaPred<Value>)) {
@@ -155,7 +155,7 @@ static LogicalResult commonRunOnOperation(
               return tileSizes;
             }));
     {
-      WalkResult status = funcOp->walk([&](tensor::PackOp op) {
+      WalkResult status = funcOp->walk([&](linalg::PackOp op) {
         if (controlFn && failed(controlFn.value()(op))) {
           return WalkResult::advance();
         }
@@ -176,7 +176,7 @@ static LogicalResult commonRunOnOperation(
     auto unpackTilingOptions =
         scf::SCFTilingOptions().setTileSizeComputationFunction(
             [](OpBuilder &builder, Operation *op) {
-              auto unpackOp = cast<tensor::UnPackOp>(op);
+              auto unpackOp = cast<linalg::UnPackOp>(op);
               int numLoops = unpackOp.getDestRank();
               auto dimAndTileMapping = unpackOp.getDimAndTileMapping();
               SmallVector<OpFoldResult> tileSizes;
@@ -190,7 +190,7 @@ static LogicalResult commonRunOnOperation(
               return tileSizes;
             });
     {
-      WalkResult status = funcOp->walk([&](tensor::UnPackOp op) {
+      WalkResult status = funcOp->walk([&](linalg::UnPackOp op) {
         if (controlFn && failed(controlFn.value()(op))) {
           return WalkResult::advance();
         }
@@ -299,12 +299,12 @@ static bool hasPadding(Operation *op) {
     }
     return false;
   };
-  auto packOp = dyn_cast<tensor::PackOp>(op);
+  auto packOp = dyn_cast<linalg::PackOp>(op);
   if (packOp && needsPad(packOp.getSourceType(), packOp.getInnerDimsPos(),
                          packOp.getStaticInnerTiles())) {
     return true;
   }
-  auto unPackOp = dyn_cast<tensor::UnPackOp>(op);
+  auto unPackOp = dyn_cast<linalg::UnPackOp>(op);
   if (unPackOp && needsPad(unPackOp.getDestType(), unPackOp.getInnerDimsPos(),
                            unPackOp.getStaticInnerTiles())) {
     return true;
@@ -320,7 +320,7 @@ static bool hasPadding(Operation *op) {
 /// 3. If the op is an UnPackOp, then all of its consumers must be dispatch
 ///    tensor stores.
 static LogicalResult isUnpaddedAndAtBoundary(Operation *op) {
-  if (!isa<tensor::PackOp>(op) && !isa<tensor::UnPackOp>(op)) {
+  if (!isa<linalg::PackOp>(op) && !isa<linalg::UnPackOp>(op)) {
     return failure();
   }
   if (hasPadding(op)) {
@@ -329,13 +329,13 @@ static LogicalResult isUnpaddedAndAtBoundary(Operation *op) {
 
   // If the producer is a dispatch tensor load, then the `op` is decomposable
   // if it is a PackOp.
-  if (isa<tensor::PackOp>(op) &&
+  if (isa<linalg::PackOp>(op) &&
       op->getOperand(0).getDefiningOp<IREE::Flow::DispatchTensorLoadOp>()) {
     return success();
   }
   // If all consumers are dispatch tensor stores, then the `op` is decomposable
   // if it is an UnPackOp.
-  if (isa<tensor::UnPackOp>(op) &&
+  if (isa<linalg::UnPackOp>(op) &&
       llvm::all_of(op->getUsers(), [&](Operation *user) {
         return isa<IREE::Flow::DispatchTensorStoreOp>(user);
       })) {
diff --git a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.cpp b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.cpp
index ee47168b853b..5a17dec7fe05 100644
--- a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.cpp
@@ -36,7 +36,7 @@ MaterializeEncodingTypeConverter::MaterializeEncodingTypeConverter(
     if (IREE::Codegen::isIdentityLayout(encodingInfo)) {
       return IREE::Encoding::dropEncoding(type);
     }
-    auto packedType = cast<RankedTensorType>(tensor::PackOp::inferPackedType(
+    auto packedType = cast<RankedTensorType>(linalg::PackOp::inferPackedType(
         type, encodingInfo.innerTileSizes, encodingInfo.innerDimsPos,
         encodingInfo.outerDimsPerm));
 
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_fuse_and_hoist_forall.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_fuse_and_hoist_forall.mlir
index 76a30902610f..f0df8398f11e 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_fuse_and_hoist_forall.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_fuse_and_hoist_forall.mlir
@@ -527,7 +527,7 @@ func.func @fuse_imperfectly_aligned_unpack(%arg0: tensor<5x31xf16>, %arg1: index
   %c128 = arith.constant 128 : index
   %c0 = arith.constant 0 : index
   %0 = tensor.empty() : tensor<128xf16>
-  %unpack = tensor.unpack %arg0 inner_dims_pos = [0] inner_tiles = [31] into %0 : tensor<5x31xf16> -> tensor<128xf16>
+  %unpack = linalg.unpack %arg0 inner_dims_pos = [0] inner_tiles = [31] into %0 : tensor<5x31xf16> -> tensor<128xf16>
   %1 = scf.forall (%arg2) in (2) shared_outs(%arg3 = %0) -> (tensor<128xf16>) {
     %2 = affine.apply #map(%arg2)
     %extracted_slice = tensor.extract_slice %unpack[%2] [64] [1] : tensor<128xf16> to tensor<64xf16>
@@ -542,7 +542,7 @@ func.func @fuse_imperfectly_aligned_unpack(%arg0: tensor<5x31xf16>, %arg1: index
 
 // CHECK-LABEL: func @fuse_imperfectly_aligned_unpack
 //       CHECK:   scf.forall
-//       CHECK:     tensor.unpack
+//       CHECK:     linalg.unpack
 //       CHECK:     linalg.copy
 //       CHECK:   scf.forall.in_parallel
 //       CHECK:   return
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pack_to_instrinsics.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pack_to_instrinsics.mlir
index a14cd543091b..a71c2e9f06f1 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pack_to_instrinsics.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_pack_to_instrinsics.mlir
@@ -12,9 +12,9 @@ module {
 //  CHECK-SAME:   %[[A:[A-Za-z0-9]+]]: tensor<64x64xf16>
 //  CHECK-SAME:   %[[B:[A-Za-z0-9]+]]: tensor<64x64xf16>
 //  CHECK-SAME:   %[[C:[A-Za-z0-9]+]]: tensor<64x64xf32>
-//   CHECK-DAG:   %[[A_PACK:.+]] = tensor.pack %[[A]] inner_dims_pos = [0, 1] inner_tiles = [32, 8]
-//   CHECK-DAG:   %[[B_PACK:.+]] = tensor.pack %[[B]] inner_dims_pos = [1, 0] inner_tiles = [32, 8]
-//   CHECK-DAG:   %[[C_PACK:.+]] = tensor.pack %[[C]] inner_dims_pos = [0, 1] inner_tiles = [32, 32]
+//   CHECK-DAG:   %[[A_PACK:.+]] = linalg.pack %[[A]] inner_dims_pos = [0, 1] inner_tiles = [32, 8]
+//   CHECK-DAG:   %[[B_PACK:.+]] = linalg.pack %[[B]] inner_dims_pos = [1, 0] inner_tiles = [32, 8]
+//   CHECK-DAG:   %[[C_PACK:.+]] = linalg.pack %[[C]] inner_dims_pos = [0, 1] inner_tiles = [32, 32]
 //       CHECK:   iree_gpu.multi_mma %[[A_PACK]], %[[B_PACK]], %[[C_PACK]]
 //  CHECK-SAME:     indexing_maps =
 //  CHECK-SAME:       affine_map<(d0, d1, d2) -> (d0, d2)>
diff --git a/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp b/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp
index adb1798dc3e7..2caa7d4910e1 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GenericVectorization.cpp
@@ -56,7 +56,7 @@ getVectorSizes(Operation *op, bool useConfiguredVectorSizes) {
           scalableFlags = result->vectorScalableFlags;
         }
       })
-      .Case<tensor::PackOp, tensor::UnPackOp>([&](auto op) {
+      .Case<linalg::PackOp, linalg::UnPackOp>([&](auto op) {
         std::optional<VectorizationTileSizes> result = inferSizesFromIR(op);
         if (result) {
           vectorSizes = result->vectorSizes;
@@ -122,7 +122,7 @@ void GenericVectorizationPass::runOnOperation() {
                isa<tensor::PadOp>(op)) {
       candidates.push_back(op);
     } else if (enableVectorMasking &&
-               isa<tensor::PackOp, tensor::UnPackOp>(op)) {
+               isa<linalg::PackOp, linalg::UnPackOp>(op)) {
       candidates.push_back(op);
     }
   });
diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncoding.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncoding.cpp
index 9521a8f37d64..7ac4fdd13032 100644
--- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncoding.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncoding.cpp
@@ -18,6 +18,7 @@
 #include "iree/compiler/Dialect/HAL/Analysis/DeviceAnalysis.h"
 #include "iree/compiler/Dialect/HAL/IR/HALTypes.h"
 #include "iree/compiler/Dialect/Stream/Analysis/Affinity.h"
+#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/MemRef/Transforms/Transforms.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Interfaces/FunctionInterfaces.h"
@@ -107,7 +108,7 @@ materializeFuncOpEncodings(FunctionOpInterface funcOp,
   {
     RewritePatternSet patterns(ctx);
     tensor::CastOp::getCanonicalizationPatterns(patterns, ctx);
-    tensor::populateFoldIntoPackAndUnpackPatterns(patterns);
+    linalg::populateFoldIntoPackAndUnpackPatterns(patterns);
     memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns);
     if (failed(applyPatternsGreedily(funcOp, std::move(patterns)))) {
       funcOp.emitOpError("folding patterns failed");
diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp
index 8602a23ea767..a6ea6deb6e5a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp
@@ -137,13 +137,13 @@ FailureOr<Value> lowerSetEncodingOpToPackOp(
       loc, rewriter.getZeroAttr(resultType.getElementType()));
   SmallVector<OpFoldResult> sourceDims =
       tensor::getMixedSizes(rewriter, loc, source);
-  SmallVector<OpFoldResult> resultDims = tensor::PackOp::getResultShape(
+  SmallVector<OpFoldResult> resultDims = linalg::PackOp::getResultShape(
       rewriter, loc, sourceDims, *innerTileSizesOfr, encodingInfo.innerDimsPos,
       encodingInfo.outerDimsPerm);
   auto emptyOp = rewriter.create<tensor::EmptyOp>(loc, resultDims,
                                                   resultType.getElementType());
   return rewriter
-      .create<tensor::PackOp>(loc, source, emptyOp, encodingInfo.innerDimsPos,
+      .create<linalg::PackOp>(loc, source, emptyOp, encodingInfo.innerDimsPos,
                               *innerTileSizesOfr, paddingValue,
                               encodingInfo.outerDimsPerm)
       .getResult();
@@ -176,7 +176,7 @@ FailureOr<Value> lowerUnsetEncodingToUnpackOp(
         encodingOp, "failed to generate runtime tile size query");
   }
   return rewriter
-      .create<tensor::UnPackOp>(loc, packedValue, emptyOp,
+      .create<linalg::UnPackOp>(loc, packedValue, emptyOp,
                                 encodingInfo.innerDimsPos, *innerTileSizesOfr,
                                 encodingInfo.outerDimsPerm)
       .getResult();
@@ -209,7 +209,7 @@ lowerOpWithEncoding(RewriterBase &rewriter, tensor::EmptyOp emptyOp,
 
   SmallVector<OpFoldResult> sourceDims = emptyOp.getMixedSizes();
   (void)foldDynamicIndexList(sourceDims);
-  SmallVector<OpFoldResult> newShape = tensor::PackOp::getResultShape(
+  SmallVector<OpFoldResult> newShape = linalg::PackOp::getResultShape(
       rewriter, loc, sourceDims, *innerTileSizesOfr, encodingInfo.innerDimsPos,
       encodingInfo.outerDimsPerm);
   newShape = getSwizzledShape(newShape, encodingInfo);
@@ -380,7 +380,7 @@ static FailureOr<SmallVector<OpFoldResult>> getPackedDimsForDispatchTensor(
     return failure();
   }
   SmallVector<OpFoldResult> convertedTargetShape =
-      tensor::PackOp::getResultShape(builder, loc, targetShape, *innerTileSizes,
+      linalg::PackOp::getResultShape(builder, loc, targetShape, *innerTileSizes,
                                      encodingInfo.innerDimsPos,
                                      encodingInfo.outerDimsPerm);
   return getSwizzledShape(convertedTargetShape, encodingInfo);
diff --git a/compiler/src/iree/compiler/Codegen/Common/TileInferenceUtils.cpp b/compiler/src/iree/compiler/Codegen/Common/TileInferenceUtils.cpp
index 875466db5aba..c8d211b29403 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TileInferenceUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TileInferenceUtils.cpp
@@ -38,12 +38,12 @@ inferWorkgroupTileMultiplesFromPackUnPack(
     std::optional<SmallVector<int64_t>> initialPackedMultiples = std::nullopt,
     std::optional<SmallVector<int64_t>> initialUnPackedMultiples =
         std::nullopt) {
-  static_assert(llvm::is_one_of<PackOrUnPackOpTy, tensor::PackOp,
-                                tensor::UnPackOp>::value);
+  static_assert(llvm::is_one_of<PackOrUnPackOpTy, linalg::PackOp,
+                                linalg::UnPackOp>::value);
   LDBG("Inferring workgroup tile size multiples from " << op->getName() << ":\n"
                                                        << op);
   // Initialize the list of multiples for the packed and unpack inputs.
-  int64_t unPackedRank = (std::is_same<PackOrUnPackOpTy, tensor::PackOp>::value)
+  int64_t unPackedRank = (std::is_same<PackOrUnPackOpTy, linalg::PackOp>::value)
                              ? op.getSourceRank()
                              : op.getDestRank();
   SmallVector<int64_t> innerTiles = op.getStaticTiles();
@@ -103,11 +103,11 @@ inferWorkgroupTileMultiplesFromPackUnPack(
   }
 
   SmallVector<int64_t> srcMultiples =
-      (std::is_same<PackOrUnPackOpTy, tensor::PackOp>::value)
+      (std::is_same<PackOrUnPackOpTy, linalg::PackOp>::value)
           ? unPackedMultiples
           : packedMultiples;
   SmallVector<int64_t> destMultiples =
-      (std::is_same<PackOrUnPackOpTy, tensor::PackOp>::value)
+      (std::is_same<PackOrUnPackOpTy, linalg::PackOp>::value)
           ? packedMultiples
           : unPackedMultiples;
   LLVM_DEBUG({
@@ -267,14 +267,14 @@ static SmallVector<int64_t> inferResultWorkgroupTileMultiples(OpResult result) {
         });
         return resultMultiples;
       })
-      .Case<tensor::PackOp>([&](tensor::PackOp packOp) {
+      .Case<linalg::PackOp>([&](linalg::PackOp packOp) {
         SmallVector<int64_t> srcMultiples = getOperandMultiples()[0];
         return inferWorkgroupTileMultiplesFromPackUnPack(
                    packOp, /*initialPackedMultiples=*/std::nullopt,
                    /*initialUnPackedMultiples=*/srcMultiples)
             .second;
       })
-      .Case<tensor::UnPackOp>([&](tensor::UnPackOp unPackOp) {
+      .Case<linalg::UnPackOp>([&](linalg::UnPackOp unPackOp) {
         SmallVector<int64_t> srcMultiples = getOperandMultiples()[0];
         return inferWorkgroupTileMultiplesFromPackUnPack(
                    unPackOp, /*initialPackedMultiples=*/srcMultiples,
@@ -340,14 +340,14 @@ static SmallVector<int64_t> inferUseWorkgroupTileMultiples(OpOperand *use) {
         });
         return srcMultiples;
       })
-      .Case<tensor::PackOp>([&](tensor::PackOp packOp) {
+      .Case<linalg::PackOp>([&](linalg::PackOp packOp) {
         SmallVector<int64_t> destMultiples = getResultMultiples()[0];
         return inferWorkgroupTileMultiplesFromPackUnPack(
                    packOp, /*initialPackedMultiples=*/destMultiples,
                    /*initialUnPackedMultiples=*/std::nullopt)
             .first;
       })
-      .Case<tensor::UnPackOp>([&](tensor::UnPackOp unpackOp) {
+      .Case<linalg::UnPackOp>([&](linalg::UnPackOp unpackOp) {
         SmallVector<int64_t> destMultiples = getResultMultiples()[0];
         return inferWorkgroupTileMultiplesFromPackUnPack(
                    unpackOp, /*initialPackedMultiples=*/std::nullopt,
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir b/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir
index d4c2018ec24c..791e0fefed5c 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/convert_to_destination_passing_style.mlir
@@ -795,7 +795,7 @@ func.func @pack() {
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x2x2x2xi32>>
   %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4xi32>> -> tensor<4x4xi32>
   %3 = tensor.empty() : tensor<2x2x2x2xi32>
-  %pack = tensor.pack %2 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %3 : tensor<4x4xi32> -> tensor<2x2x2x2xi32>
+  %pack = linalg.pack %2 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %3 : tensor<4x4xi32> -> tensor<2x2x2x2xi32>
   flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [2, 2, 2, 2], strides = [1, 1, 1, 1] : tensor<2x2x2x2xi32> -> !flow.dispatch.tensor<writeonly:tensor<2x2x2x2xi32>>
   return
 }
@@ -804,7 +804,7 @@ func.func @pack() {
 // CHECK-DAG:     %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
 // CHECK-DAG:     %[[IN:.+]] = flow.dispatch.tensor.load %[[IN_BINDING]]
 // CHECK-DAG:     %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]]
-// CHECK:         tensor.pack %[[IN]] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]]
+// CHECK:         linalg.pack %[[IN]] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]]
 
 // -----
 
@@ -818,7 +818,7 @@ func.func @unpack() {
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
   %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, 2, 2], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>> -> tensor<2x2x2x2xi32>
   %3 = tensor.empty() : tensor<4x4xi32>
-  %4 = tensor.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %3 : tensor<2x2x2x2xi32> -> tensor<4x4xi32>
+  %4 = linalg.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %3 : tensor<2x2x2x2xi32> -> tensor<4x4xi32>
   flow.dispatch.tensor.store %4, %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : tensor<4x4xi32> -> !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
   return
 }
@@ -827,7 +827,7 @@ func.func @unpack() {
 // CHECK-DAG:     %[[OUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
 // CHECK-DAG:     %[[IN:.+]] = flow.dispatch.tensor.load %[[IN_BINDING]]
 // CHECK-DAG:     %[[OUT:.+]] = flow.dispatch.tensor.load %[[OUT_BINDING]]
-// CHECK:         tensor.unpack %[[IN]] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]]
+// CHECK:         linalg.unpack %[[IN]] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %[[OUT]]
 
 // -----
 
@@ -864,7 +864,7 @@ func.func @non_perfect_tiling_unpack() {
       %16 = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%arg1)[%0#1]
       %17 = flow.dispatch.tensor.load %3, offsets = [%15, %16, 0, 0], sizes = [%c1, %c1, %0#0, %0#1], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%1, %2, %0#0, %0#1} -> tensor<?x?x?x?xi32>
       %18 = tensor.empty(%0#0, %0#1) : tensor<?x?xi32>
-      %19 = tensor.unpack %17 inner_dims_pos = [0, 1] inner_tiles = [%0#0, %0#1] into %18 : tensor<?x?x?x?xi32> -> tensor<?x?xi32>
+      %19 = linalg.unpack %17 inner_dims_pos = [0, 1] inner_tiles = [%0#0, %0#1] into %18 : tensor<?x?x?x?xi32> -> tensor<?x?xi32>
       %extracted_slice = tensor.extract_slice %19[%13, %14] [1, 1] [1, 1] : tensor<?x?xi32> to tensor<1x1xi32>
       %cast = tensor.cast %extracted_slice : tensor<1x1xi32> to tensor<?x?xi32>
       flow.dispatch.tensor.store %cast, %4, offsets = [%arg0, %arg1], sizes = [%c1, %c1], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:tensor<1x1xi32>>
@@ -874,7 +874,7 @@ func.func @non_perfect_tiling_unpack() {
 }
 // CHECK-LABEL: func.func @non_perfect_tiling_unpack
 // CHECK:         %[[ALLOC:.+]] = bufferization.alloc_tensor
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack
 // CHECK-SAME:      into %[[ALLOC]]
 // CHECK:         %[[SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
 
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir
index 6be2a7d96920..ea515a880caf 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir
@@ -10,13 +10,13 @@ func.func @pack_at_source() {
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
   %src = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16x16xf32>> -> tensor<16x16xf32>
   %dest = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
-  %pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32>
+  %pack = linalg.pack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32>
   %barrier = util.optimization_barrier %pack : tensor<4x4x4x4xf32>
   flow.dispatch.tensor.store %barrier, %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : tensor<4x4x4x4xf32> -> !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
   return
 }
 // CHECK-LABEL: func.func @pack_at_source
-// CHECK-NOT:     tensor.pack
+// CHECK-NOT:     linalg.pack
 
 // -----
 
@@ -30,13 +30,13 @@ func.func @unpack_at_source() {
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
   %src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
   %dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>> -> tensor<16x16xf32>
-  %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
+  %unpack = linalg.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
   %barrier = util.optimization_barrier %unpack : tensor<16x16xf32>
   flow.dispatch.tensor.store %barrier, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
   return
 }
 // CHECK-LABEL: func.func @unpack_at_source
-// CHECK:         tensor.unpack
+// CHECK:         linalg.unpack
 
 // -----
 
@@ -51,12 +51,12 @@ func.func @pack_at_dest() {
   %src = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16x16xf32>> -> tensor<16x16xf32>
   %dest = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
   %barrier = util.optimization_barrier %src : tensor<16x16xf32>
-  %pack = tensor.pack %barrier inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32>
+  %pack = linalg.pack %barrier inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32>
   flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : tensor<4x4x4x4xf32> -> !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
   return
 }
 // CHECK-LABEL: func.func @pack_at_dest
-// CHECK:         tensor.pack
+// CHECK:         linalg.pack
 
 // -----
 
@@ -71,12 +71,12 @@ func.func @unpack_at_dest() {
   %src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
   %dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>> -> tensor<16x16xf32>
   %barrier = util.optimization_barrier %src : tensor<4x4x4x4xf32>
-  %unpack = tensor.unpack %barrier inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
+  %unpack = linalg.unpack %barrier inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
   flow.dispatch.tensor.store %unpack, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
   return
 }
 // CHECK-LABEL: func.func @unpack_at_dest
-// CHECK-NOT:     tensor.unpack
+// CHECK-NOT:     linalg.unpack
 
 // -----
 
@@ -91,12 +91,12 @@ func.func @padded_pack() {
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
   %src = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [15, 15], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<15x15xf32>> -> tensor<15x15xf32>
   %dest = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
-  %pack = tensor.pack %src padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<15x15xf32> -> tensor<4x4x4x4xf32>
+  %pack = linalg.pack %src padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<15x15xf32> -> tensor<4x4x4x4xf32>
   flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : tensor<4x4x4x4xf32> -> !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
   return
 }
 // CHECK-LABEL: func.func @padded_pack
-// CHECK:         tensor.pack
+// CHECK:         linalg.pack
 
 // -----
 
@@ -110,12 +110,12 @@ func.func @padded_unpack() {
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<15x15xf32>>
   %src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
   %dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [15, 15], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<15x15xf32>> -> tensor<15x15xf32>
-  %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<15x15xf32>
+  %unpack = linalg.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<15x15xf32>
   flow.dispatch.tensor.store %unpack, %1, offsets = [0, 0], sizes = [15, 15], strides = [1, 1] : tensor<15x15xf32> -> !flow.dispatch.tensor<readwrite:tensor<15x15xf32>>
   return
 }
 // CHECK-LABEL: func.func @padded_unpack
-// CHECK:         tensor.unpack
+// CHECK:         linalg.unpack
 
 // -----
 
@@ -134,12 +134,12 @@ func.func @dynamic_pack() {
   %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<?x?x4x4xf32>>{%3, %3}
   %src = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %2], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %2} -> tensor<?x?xf32>
   %dest = flow.dispatch.tensor.load %5, offsets = [0, 0, 0, 0], sizes = [%3, %3, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<?x?x4x4xf32>>{%3, %3} -> tensor<?x?x4x4xf32>
-  %pack = tensor.pack %src padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<?x?xf32> -> tensor<?x?x4x4xf32>
+  %pack = linalg.pack %src padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<?x?xf32> -> tensor<?x?x4x4xf32>
   flow.dispatch.tensor.store %pack, %5, offsets = [0, 0, 0, 0], sizes = [%3, %3, 4, 4], strides = [1, 1, 1, 1] : tensor<?x?x4x4xf32> -> !flow.dispatch.tensor<readwrite:tensor<?x?x4x4xf32>>{%3, %3}
   return
 }
 // CHECK-LABEL: func.func @dynamic_pack
-// CHECK:         tensor.pack
+// CHECK:         linalg.pack
 
 // -----
 
@@ -157,12 +157,12 @@ func.func @dynamic_unpack() {
   %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%3, %3}
   %src = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0], sizes = [%2, %2, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x4x4xf32>>{%2, %2} -> tensor<?x?x4x4xf32>
   %dest = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%3, %3], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%3, %3} -> tensor<?x?xf32>
-  %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<?x?x4x4xf32> -> tensor<?x?xf32>
+  %unpack = linalg.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<?x?x4x4xf32> -> tensor<?x?xf32>
   flow.dispatch.tensor.store %unpack, %5, offsets = [0, 0], sizes = [%3, %3], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%3, %3}
   return
 }
 // CHECK-LABEL: func.func @dynamic_unpack
-// CHECK:         tensor.unpack
+// CHECK:         linalg.unpack
 
 // -----
 
@@ -176,12 +176,12 @@ func.func @load_non_full_slice() {
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
   %src = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<17x17xf32>> -> tensor<16x16xf32>
   %dest = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
-  %pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32>
+  %pack = linalg.pack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32>
   flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : tensor<4x4x4x4xf32> -> !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
   return
 }
 // CHECK-LABEL: func.func @load_non_full_slice
-// CHECK-NOT:     tensor.pack
+// CHECK-NOT:     linalg.pack
 
 // -----
 
@@ -195,12 +195,12 @@ func.func @store_non_full_slice() {
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<17x17xf32>>
   %src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
   %dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<17x17xf32>> -> tensor<16x16xf32>
-  %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
+  %unpack = linalg.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
   flow.dispatch.tensor.store %unpack, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor<readwrite:tensor<17x17xf32>>
   return
 }
 // CHECK-LABEL: func.func @store_non_full_slice
-// CHECK-NOT:     tensor.unpack
+// CHECK-NOT:     linalg.unpack
 
 // -----
 
@@ -216,13 +216,13 @@ func.func @multi_use_unpack_fold() {
   %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
   %src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
   %dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>> -> tensor<16x16xf32>
-  %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
+  %unpack = linalg.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
   flow.dispatch.tensor.store %unpack, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
   flow.dispatch.tensor.store %unpack, %2, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
   return
 }
 // CHECK-LABEL: func.func @multi_use_unpack_fold
-// CHECK-NOT:     tensor.unpack
+// CHECK-NOT:     linalg.unpack
 
 // -----
 
@@ -239,11 +239,11 @@ func.func @multi_use_unpack_no_fold() {
   %src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
   %dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>> -> tensor<16x16xf32>
   %dest2 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>> -> tensor<16x16xf32>
-  %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
+  %unpack = linalg.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
   flow.dispatch.tensor.store %unpack, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
   %copy = linalg.copy ins(%unpack : tensor<16x16xf32>) outs(%dest2 : tensor<16x16xf32>) -> tensor<16x16xf32>
   flow.dispatch.tensor.store %copy, %2, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
   return
 }
 // CHECK-LABEL: func.func @multi_use_unpack_no_fold
-// CHECK:         tensor.unpack
+// CHECK:         linalg.unpack
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/decompose_pack_unpack_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/test/decompose_pack_unpack_ops.mlir
index 9f51fe5ad72a..8703fe66ed79 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/decompose_pack_unpack_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/decompose_pack_unpack_ops.mlir
@@ -2,7 +2,7 @@
 // RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-decompose-pack-unpack-ops{use-only-reshapes=true}))" --split-input-file %s | FileCheck %s -check-prefixes=CHECK-ALL,CHECK-RESHAPE
 
 func.func @simple_KCRS_to_KCRSsr(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<1x1x1x1x8x32xf32>) -> tensor<1x1x1x1x8x32xf32> {
-  %0 = tensor.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x32x8xf32> -> tensor<1x1x1x1x8x32xf32>
+  %0 = linalg.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x32x8xf32> -> tensor<1x1x1x1x8x32xf32>
   return %0 : tensor<1x1x1x1x8x32xf32>
 }
 // CHECK-ALL-LABEL: func.func @simple_KCRS_to_KCRSsr
@@ -21,7 +21,7 @@ func.func @simple_KCRS_to_KCRSsr(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<1x1x
 // -----
 
 func.func @simple_pad_and_pack(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2xf32>, %pad: f32) -> tensor<1x1x8x2xf32> {
-  %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<5x1xf32> -> tensor<1x1x8x2xf32>
+  %0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<5x1xf32> -> tensor<1x1x8x2xf32>
   return %0 : tensor<1x1x8x2xf32>
 }
 // CHECK-ALL-LABEL: func.func @simple_pad_and_pack
@@ -41,7 +41,7 @@ func.func @simple_pad_and_pack(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2x
 // -----
 
 func.func @simple_NC_to_CNnc(%arg0: tensor<32x8xf32>, %arg1: tensor<1x1x32x8xf32>) -> tensor<1x1x32x8xf32>{
-  %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x8xf32> -> tensor<1x1x32x8xf32>
+  %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x8xf32> -> tensor<1x1x32x8xf32>
   return %0 : tensor<1x1x32x8xf32>
 }
 // CHECK-ALL-LABEL: func.func @simple_NC_to_CNnc
@@ -58,7 +58,7 @@ func.func @simple_NC_to_CNnc(%arg0: tensor<32x8xf32>, %arg1: tensor<1x1x32x8xf32
 // -----
 
 func.func @KCRS_to_KCRSsr(%arg0: tensor<1x1x128x64xf32>, %arg1: tensor<1x1x4x8x8x32xf32>) -> tensor<1x1x4x8x8x32xf32> {
-  %0 = tensor.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x128x64xf32> -> tensor<1x1x4x8x8x32xf32>
+  %0 = linalg.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x128x64xf32> -> tensor<1x1x4x8x8x32xf32>
   return %0 : tensor<1x1x4x8x8x32xf32>
 }
 // CHECK-ALL:       func.func @KCRS_to_KCRSsr
@@ -74,7 +74,7 @@ func.func @KCRS_to_KCRSsr(%arg0: tensor<1x1x128x64xf32>, %arg1: tensor<1x1x4x8x8
 // -----
 
 func.func @pad_and_pack(%arg0: tensor<13x15xf32>, %arg1: tensor<2x8x8x2xf32>, %arg2: f32) -> tensor<2x8x8x2xf32> {
-  %0 = tensor.pack %arg0 padding_value(%arg2 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
+  %0 = linalg.pack %arg0 padding_value(%arg2 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
   return %0 : tensor<2x8x8x2xf32>
 }
 // CHECK-ALL:       func.func @pad_and_pack
@@ -94,7 +94,7 @@ func.func @pad_and_pack(%arg0: tensor<13x15xf32>, %arg1: tensor<2x8x8x2xf32>, %a
 // -----
 
 func.func @KC_to_CKck(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) -> tensor<32x4x32x8xf32> {
-  %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32>
+  %0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32>
   return %0 : tensor<32x4x32x8xf32>
 }
 // CHECK-ALL:       func.func @KC_to_CKck
@@ -110,7 +110,7 @@ func.func @KC_to_CKck(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>)
 // -----
 
 func.func @simple_KCRSsr_to_KCRS(%arg0: tensor<1x1x1x1x8x32xf32>, %arg1: tensor<1x1x32x8xf32>) -> tensor<1x1x32x8xf32> {
-  %0 = tensor.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x1x1x8x32xf32> -> tensor<1x1x32x8xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x1x1x8x32xf32> -> tensor<1x1x32x8xf32>
   return %0 : tensor<1x1x32x8xf32>
 }
 // CHECK-ALL-LABEL: func.func @simple_KCRSsr_to_KCRS
@@ -132,7 +132,7 @@ func.func @simple_KCRSsr_to_KCRS(%arg0: tensor<1x1x1x1x8x32xf32>, %arg1: tensor<
 // -----
 
 func.func @simple_unpack_and_extract_slice(%input: tensor<1x1x8x2xf32>, %output: tensor<5x1xf32>) -> tensor<5x1xf32> {
-  %0 = tensor.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<1x1x8x2xf32> -> tensor<5x1xf32>
+  %0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<1x1x8x2xf32> -> tensor<5x1xf32>
   return %0 : tensor<5x1xf32>
 }
 // CHECK-ALL-LABEL: func.func @simple_unpack_and_extract_slice
@@ -152,7 +152,7 @@ func.func @simple_unpack_and_extract_slice(%input: tensor<1x1x8x2xf32>, %output:
 // -----
 
 func.func @simple_CNnc_to_NC(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<32x8xf32>) -> tensor<32x8xf32>{
-  %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<1x1x32x8xf32> -> tensor<32x8xf32>
+  %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<1x1x32x8xf32> -> tensor<32x8xf32>
   return %0 : tensor<32x8xf32>
 }
 // CHECK-ALL-LABEL: func.func @simple_CNnc_to_NC
@@ -169,7 +169,7 @@ func.func @simple_CNnc_to_NC(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<32x8xf32
 // -----
 
 func.func @KCRSsr_to_KCRS(%arg0: tensor<13x12x4x8x8x32xf32>, %arg1: tensor<13x12x128x64xf32>) -> tensor<13x12x128x64xf32> {
-  %0 = tensor.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<13x12x4x8x8x32xf32> -> tensor<13x12x128x64xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<13x12x4x8x8x32xf32> -> tensor<13x12x128x64xf32>
   return %0 : tensor<13x12x128x64xf32>
 }
 // CHECK-ALL:       func.func @KCRSsr_to_KCRS
@@ -189,7 +189,7 @@ func.func @KCRSsr_to_KCRS(%arg0: tensor<13x12x4x8x8x32xf32>, %arg1: tensor<13x12
 // -----
 
 func.func @unpack_and_extract_slice(%arg0: tensor<2x8x8x2xf32>, %arg1: tensor<13x15xf32>) -> tensor<13x15xf32> {
-  %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<2x8x8x2xf32> -> tensor<13x15xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<2x8x8x2xf32> -> tensor<13x15xf32>
   return %0 : tensor<13x15xf32>
 }
 // CHECK-ALL:      func.func @unpack_and_extract_slice
@@ -210,7 +210,7 @@ func.func @unpack_and_extract_slice(%arg0: tensor<2x8x8x2xf32>, %arg1: tensor<13
 // -----
 
 func.func @CKck_to_KC(%arg0: tensor<32x4x32x8xf32>, %arg1: tensor<128x256xf32>) -> tensor<128x256xf32> {
-  %0 = tensor.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x4x32x8xf32> -> tensor<128x256xf32>
+  %0 = linalg.unpack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x4x32x8xf32> -> tensor<128x256xf32>
   return %0 : tensor<128x256xf32>
 }
 // CHECK-ALL:      func.func @CKck_to_KC
@@ -224,7 +224,7 @@ func.func @CKck_to_KC(%arg0: tensor<32x4x32x8xf32>, %arg1: tensor<128x256xf32>)
 // -----
 
 func.func @pack_matmul_DYN_LHS(%src: tensor<?x?xf32>, %dest: tensor<?x?x16x1xf32>) -> tensor<?x?x16x1xf32> {
-  %pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %dest : tensor<?x?xf32> -> tensor<?x?x16x1xf32>
+  %pack = linalg.pack %src inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %dest : tensor<?x?xf32> -> tensor<?x?x16x1xf32>
   return %pack : tensor<?x?x16x1xf32>
 }
 // CHECK-ALL-DAG:  #[[MAP0:.+]] = affine_map<()[s0, s1] -> (s0 * 16 - s1)>
@@ -248,7 +248,7 @@ func.func @pack_matmul_DYN_LHS(%src: tensor<?x?xf32>, %dest: tensor<?x?x16x1xf32
 // -----
 
 func.func @pack_matmul_DYN_RHS(%src: tensor<?x?xf32>, %dest: tensor<?x?x16x1xf32>) -> tensor<?x?x16x1xf32> {
-  %pack = tensor.pack %src outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %dest : tensor<?x?xf32> -> tensor<?x?x16x1xf32>
+  %pack = linalg.pack %src outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %dest : tensor<?x?xf32> -> tensor<?x?x16x1xf32>
   return %pack : tensor<?x?x16x1xf32>
 }
 // CHECK-ALL-DAG:  #[[MAP0:.+]] = affine_map<()[s0, s1] -> (s0 * 16 - s1)>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/generic_vectorization.mlir b/compiler/src/iree/compiler/Codegen/Common/test/generic_vectorization.mlir
index 54a06697aa5b..4cdbcc8eb5fa 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/generic_vectorization.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/generic_vectorization.mlir
@@ -51,7 +51,7 @@ func.func @single_static_pack_infer_vector_size(%arg0: tensor<101x201xi8>, %arg1
       %7 = affine.min #map5(%2, %arg2)
       %extracted_slice = tensor.extract_slice %arg0[%4, %6] [%5, %7] [1, 1] : tensor<101x201xi8> to tensor<?x?xi8>
       %extracted_slice_0 = tensor.extract_slice %arg5[%arg2, %arg4, 0, 0] [%2, %3, 16, 2] [1, 1, 1, 1] : tensor<13x51x16x2xi8> to tensor<?x?x16x2xi8>
-      %pack = tensor.pack %extracted_slice padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %extracted_slice_0 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
+      %pack = linalg.pack %extracted_slice padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %extracted_slice_0 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
       %inserted_slice = tensor.insert_slice %pack into %arg5[%arg2, %arg4, 0, 0] [%2, %3, 16, 2] [1, 1, 1, 1] : tensor<?x?x16x2xi8> into tensor<13x51x16x2xi8>
       scf.yield %inserted_slice : tensor<13x51x16x2xi8>
     }
@@ -59,10 +59,10 @@ func.func @single_static_pack_infer_vector_size(%arg0: tensor<101x201xi8>, %arg1
   }
   return %0 : tensor<13x51x16x2xi8>
 }
-// Direct tensor.pack vectorization is only available with masking.
+// Direct linalg.pack vectorization is only available with masking.
 // TODO: Support non-masking path.
 // CHECK-LABEL: func.func @single_static_pack_infer_vector_size
-// CHECK:         tensor.pack
+// CHECK:         linalg.pack
 
 // CHECK-MASK: #[[$MAP0:.+]] = affine_map<(d0) -> (-d0 + 13, 2)>
 // CHECK-MASK: #[[$MAP1:.+]] = affine_map<(d0) -> (-d0 + 51, 4)>
@@ -117,7 +117,7 @@ func.func @single_dynamic_pack_infer_vector_size(%arg0: tensor<?x?xi8>, %arg1: t
       %7 = affine.min #map5(%2, %arg2)[%dim_2]
       %extracted_slice = tensor.extract_slice %arg0[%4, %6] [%5, %7] [1, 1] : tensor<?x?xi8> to tensor<?x?xi8>
       %extracted_slice_3 = tensor.extract_slice %arg5[%arg2, %arg4, 0, 0] [%2, %3, 16, 2] [1, 1, 1, 1] : tensor<?x?x16x2xi8> to tensor<?x?x16x2xi8>
-      %pack = tensor.pack %extracted_slice padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %extracted_slice_3 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
+      %pack = linalg.pack %extracted_slice padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %extracted_slice_3 : tensor<?x?xi8> -> tensor<?x?x16x2xi8>
       %inserted_slice = tensor.insert_slice %pack into %arg5[%arg2, %arg4, 0, 0] [%2, %3, 16, 2] [1, 1, 1, 1] : tensor<?x?x16x2xi8> into tensor<?x?x16x2xi8>
       scf.yield %inserted_slice : tensor<?x?x16x2xi8>
     }
@@ -125,10 +125,10 @@ func.func @single_dynamic_pack_infer_vector_size(%arg0: tensor<?x?xi8>, %arg1: t
   }
   return %0 : tensor<?x?x16x2xi8>
 }
-// Direct tensor.pack vectorization is only available with masking.
+// Direct linalg.pack vectorization is only available with masking.
 // TODO: Support non-masking path.
 // CHECK-LABEL: func.func @single_dynamic_pack_infer_vector_size
-// CHECK:         tensor.pack
+// CHECK:         linalg.pack
 
 // CHECK-MASK: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
 // CHECK-MASK: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
@@ -194,7 +194,7 @@ func.func @generic_pack_infer_vector_size(%arg0: tensor<?x32x128xf32>) -> tensor
           linalg.yield %13 : bf16
         } -> tensor<2x?x?xbf16>
         %extracted_slice_1 = tensor.extract_slice %arg6[%arg1, %arg3, %arg5, 0, 0] [2, %6, %7, 16, 2] [1, 1, 1, 1, 1] : tensor<32x?x64x16x2xbf16> to tensor<2x?x?x16x2xbf16>
-        %pack = tensor.pack %12 padding_value(%cst : bf16) outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %extracted_slice_1 : tensor<2x?x?xbf16> -> tensor<2x?x?x16x2xbf16>
+        %pack = linalg.pack %12 padding_value(%cst : bf16) outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %extracted_slice_1 : tensor<2x?x?xbf16> -> tensor<2x?x?x16x2xbf16>
         %inserted_slice = tensor.insert_slice %pack into %arg6[%arg1, %arg3, %arg5, 0, 0] [2, %6, %7, 16, 2] [1, 1, 1, 1, 1] : tensor<2x?x?x16x2xbf16> into tensor<32x?x64x16x2xbf16>
         scf.yield %inserted_slice : tensor<32x?x64x16x2xbf16>
       }
@@ -270,7 +270,7 @@ func.func @single_dynamic_unpack_infer_vector_size(%arg0: tensor<?x?x16x16xf32>,
       %6 = affine.apply #map3(%3)
       %extracted_slice = tensor.extract_slice %arg0[%4, %5, 0, 0] [1, %6, 16, 16] [1, 1, 1, 1] : tensor<?x?x16x16xf32> to tensor<1x?x16x16xf32>
       %extracted_slice_3 = tensor.extract_slice %arg5[%arg2, %arg4] [%2, %3] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-      %unpack = tensor.unpack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %extracted_slice_3 : tensor<1x?x16x16xf32> -> tensor<?x?xf32>
+      %unpack = linalg.unpack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %extracted_slice_3 : tensor<1x?x16x16xf32> -> tensor<?x?xf32>
       %inserted_slice = tensor.insert_slice %unpack into %arg5[%arg2, %arg4] [%2, %3] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
       scf.yield %inserted_slice : tensor<?x?xf32>
     }
@@ -324,7 +324,7 @@ func.func @generic_unpack_infer_vector_size(%arg0: tensor<?x?x16x16xf32>, %arg1:
       %6 = affine.apply #map3(%3)
       %extracted_slice = tensor.extract_slice %arg0[%4, %5, 0, 0] [1, %6, 16, 16] [1, 1, 1, 1] : tensor<?x?x16x16xf32> to tensor<1x?x16x16xf32>
       %extracted_slice_1 = tensor.extract_slice %arg1[%arg3, %arg5] [%2, %3] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-      %unpack = tensor.unpack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %extracted_slice_1 : tensor<1x?x16x16xf32> -> tensor<?x?xf32>
+      %unpack = linalg.unpack %extracted_slice outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %extracted_slice_1 : tensor<1x?x16x16xf32> -> tensor<?x?xf32>
       %extracted_slice_2 = tensor.extract_slice %arg6[%arg3, %arg5] [%2, %3] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
       %7 = linalg.generic {indexing_maps = [#map4, #map4], iterator_types = ["parallel", "parallel"]} ins(%unpack : tensor<?x?xf32>) outs(%extracted_slice_2 : tensor<?x?xf32>) {
       ^bb0(%in: f32, %out: f32):
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx942.mlir b/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx942.mlir
index 405b58f0cf3a..e886f8185775 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx942.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx942.mlir
@@ -46,7 +46,7 @@ func.func @set_encoding_LHS_unroll8x8x4_MFMA_F32_16x16x4_F32() {
 }
 
 // CHECK-LABEL: func.func @set_encoding_LHS_unroll8x8x4_MFMA_F32_16x16x4_F32
-// CHECK:         %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : f32)
+// CHECK:         %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : f32)
 // CHECK-SAME:      outer_dims_perm = [0, 1]
 // CHECK-SAME:      inner_dims_pos = [0, 1]
 // CHECK-SAME:      inner_tiles = [128, 16]
@@ -79,7 +79,7 @@ func.func @set_encoding_LHS_narrow_unroll1x8x4_MFMA_F32_16x16x4_F32() {
 }
 
 // CHECK-LABEL: func.func @set_encoding_LHS_narrow_unroll1x8x4_MFMA_F32_16x16x4_F32
-// CHECK:         %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : f32)
+// CHECK:         %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : f32)
 // CHECK-SAME:      outer_dims_perm = [0, 1]
 // CHECK-SAME:      inner_dims_pos = [0, 1]
 // CHECK-SAME:      inner_tiles = [16, 16]
@@ -122,7 +122,7 @@ func.func @set_encoding_LHS_dynamic_unroll8x8x4_MFMA_F32_16x16x4_F32() {
   return
 }
 // CHECK-LABEL: func.func @set_encoding_LHS_dynamic_unroll8x8x4_MFMA_F32_16x16x4_F32
-// CHECK:         %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : f32)
+// CHECK:         %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : f32)
 // CHECK-SAME:      outer_dims_perm = [0, 1]
 // CHECK-SAME:      inner_dims_pos = [0, 1]
 // CHECK-SAME:      inner_tiles = [128, 16]
@@ -155,7 +155,7 @@ func.func @set_encoding_RHS_unroll8x8x4_MFMA_F32_16x16x4_F32() {
 }
 
 // CHECK-LABEL: func.func @set_encoding_RHS_unroll8x8x4_MFMA_F32_16x16x4_F32
-// CHECK:         %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : f32)
+// CHECK:         %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : f32)
 // CHECK-SAME:      outer_dims_perm = [1, 0]
 // CHECK-SAME:      inner_dims_pos = [1, 0]
 // CHECK-SAME:      inner_tiles = [128, 16]
@@ -188,7 +188,7 @@ func.func @set_encoding_RHS_narrow_unroll8x1x4_MFMA_F32_16x16x4_F32() {
 }
 
 // CHECK-LABEL: func.func @set_encoding_RHS_narrow_unroll8x1x4_MFMA_F32_16x16x4_F32
-// CHECK:         %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : f32)
+// CHECK:         %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : f32)
 // CHECK-SAME:      outer_dims_perm = [1, 0]
 // CHECK-SAME:      inner_dims_pos = [1, 0]
 // CHECK-SAME:      inner_tiles = [16, 16]
@@ -221,7 +221,7 @@ func.func @set_encoding_ACC_unroll8x8x4_MFMA_F32_16x16x4_F32() {
 }
 
 // CHECK-LABEL: func.func @set_encoding_ACC_unroll8x8x4_MFMA_F32_16x16x4_F32
-// CHECK:         %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : f32)
+// CHECK:         %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : f32)
 // CHECK-SAME:      outer_dims_perm = [0, 1]
 // CHECK-SAME:      inner_dims_pos = [0, 1]
 // CHECK-SAME:      inner_tiles = [128, 128]
@@ -260,7 +260,7 @@ func.func @unset_encoding_ACC_unroll8x8x4_MFMA_F32_16x16x4_F32() {
 // CHECK-SAME:       permutation = [0, 1, 5, 3, 7, 2, 6, 4]
 // CHECK:         %[[COLLAPSE:.*]] = tensor.collapse_shape %[[TRANSPOSE]]
 // CHECK-SAME:      : tensor<2x5x4x8x4x4x16x2xf32> into tensor<2x5x128x128xf32>
-// CHECK:         %[[UNPACK:.*]] = tensor.unpack %[[COLLAPSE]]
+// CHECK:         %[[UNPACK:.*]] = linalg.unpack %[[COLLAPSE]]
 // CHECK-SAME:      outer_dims_perm = [0, 1]
 // CHECK-SAME:      inner_dims_pos = [0, 1]
 // CHECK-SAME:      inner_tiles = [128, 128]
@@ -303,7 +303,7 @@ func.func @unset_encoding_ACC_dynamic_unroll8x8x4_MFMA_F32_16x16x4_F32() {
 // CHECK-SAME:       permutation = [0, 1, 5, 3, 7, 2, 6, 4]
 // CHECK:         %[[COLLAPSE:.*]] = tensor.collapse_shape %[[TRANSPOSE]]
 // CHECK-SAME:      : tensor<?x?x4x8x4x4x16x2xf32> into tensor<?x?x128x128xf32>
-// CHECK:         %[[UNPACK:.*]] = tensor.unpack %[[COLLAPSE]]
+// CHECK:         %[[UNPACK:.*]] = linalg.unpack %[[COLLAPSE]]
 // CHECK-SAME:      outer_dims_perm = [0, 1]
 // CHECK-SAME:      inner_dims_pos = [0, 1]
 // CHECK-SAME:      inner_tiles = [128, 128]
@@ -453,7 +453,7 @@ func.func @set_encoding_LHS_unroll8x8x2_MFMA_I32_16x16x32_I8() {
 }
 
 // CHECK-LABEL: func.func @set_encoding_LHS_unroll8x8x2_MFMA_I32_16x16x32_I8
-// CHECK:         %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : i8)
+// CHECK:         %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : i8)
 // CHECK-SAME:      outer_dims_perm = [0, 1]
 // CHECK-SAME:      inner_dims_pos = [0, 1]
 // CHECK-SAME:      inner_tiles = [128, 64]
@@ -486,7 +486,7 @@ func.func @set_encoding_RHS_unroll8x8x2_MFMA_I32_16x16x32_I8() {
 }
 
 // CHECK-LABEL: func.func @set_encoding_RHS_unroll8x8x2_MFMA_I32_16x16x32_I8
-// CHECK:         %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : i8)
+// CHECK:         %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : i8)
 // CHECK-SAME:      outer_dims_perm = [1, 0]
 // CHECK-SAME:      inner_dims_pos = [1, 0]
 // CHECK-SAME:      inner_tiles = [128, 64]
@@ -519,7 +519,7 @@ func.func @set_encoding_ACC_unroll8x8x2_MFMA_I32_16x16x32_I8() {
 }
 
 // CHECK-LABEL: func.func @set_encoding_ACC_unroll8x8x2_MFMA_I32_16x16x32_I8
-// CHECK:         %[[PACK:.*]] = tensor.pack %{{.+}} padding_value(%{{.+}} : i32)
+// CHECK:         %[[PACK:.*]] = linalg.pack %{{.+}} padding_value(%{{.+}} : i32)
 // CHECK-SAME:      outer_dims_perm = [0, 1]
 // CHECK-SAME:      inner_dims_pos = [0, 1]
 // CHECK-SAME:      inner_tiles = [128, 128]
@@ -558,7 +558,7 @@ func.func @unset_encoding_ACC_unroll8x8x2_MFMA_I32_16x16x32_I8() {
 // CHECK-SAME:       permutation = [0, 1, 5, 3, 7, 2, 6, 4]
 // CHECK:         %[[COLLAPSE:.*]] = tensor.collapse_shape %[[TRANSPOSE]]
 // CHECK-SAME:      : tensor<2x5x4x8x4x4x16x2xi32> into tensor<2x5x128x128xi32>
-// CHECK:         %[[UNPACK:.*]] = tensor.unpack %[[COLLAPSE]]
+// CHECK:         %[[UNPACK:.*]] = linalg.unpack %[[COLLAPSE]]
 // CHECK-SAME:      outer_dims_perm = [0, 1]
 // CHECK-SAME:      inner_dims_pos = [0, 1]
 // CHECK-SAME:      inner_tiles = [128, 128]
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir b/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir
index 4a13f0a08c42..f8c46e65f63b 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/iree_comprehensive_bufferize.mlir
@@ -2497,7 +2497,7 @@ func.func @tensor_pack() {
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x2x3x3xi32>>
   %2 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [2, 2, 3, 3], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<writeonly:tensor<2x2x3x3xi32>> -> tensor<2x2x3x3xi32>
   %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4xi32>> -> tensor<4x4xi32>
-  %4 = tensor.pack %3 padding_value(%c0_i32 : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %2 : tensor<4x4xi32> -> tensor<2x2x3x3xi32>
+  %4 = linalg.pack %3 padding_value(%c0_i32 : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %2 : tensor<4x4xi32> -> tensor<2x2x3x3xi32>
   flow.dispatch.tensor.store %4, %1, offsets = [0, 0, 0, 0], sizes = [2, 2, 3, 3], strides = [1, 1, 1, 1] : tensor<2x2x3x3xi32> -> !flow.dispatch.tensor<writeonly:tensor<2x2x3x3xi32>>
   return
 }
@@ -2521,7 +2521,7 @@ func.func @tensor_unpack() {
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
   %2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>> -> tensor<4x4xi32>
   %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, 2, 2], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>> -> tensor<2x2x2x2xi32>
-  %4 = tensor.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %2 : tensor<2x2x2x2xi32> -> tensor<4x4xi32>
+  %4 = linalg.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %2 : tensor<2x2x2x2xi32> -> tensor<4x4xi32>
   flow.dispatch.tensor.store %4, %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : tensor<4x4xi32> -> !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
   return
 }
@@ -2544,7 +2544,7 @@ func.func @tensor_unpack_fully_dynamic() {
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
   %2 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : !flow.dispatch.tensor<writeonly:tensor<4x4xi32>> -> tensor<4x4xi32>
   %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [2, 2, %inner_d0, %inner_d0], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<2x2x2x2xi32>> -> tensor<2x2x?x?xi32>
-  %4 = tensor.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [%inner_d0, %inner_d0] into %2 : tensor<2x2x?x?xi32> -> tensor<4x4xi32>
+  %4 = linalg.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [%inner_d0, %inner_d0] into %2 : tensor<2x2x?x?xi32> -> tensor<4x4xi32>
   flow.dispatch.tensor.store %4, %1, offsets = [0, 0], sizes = [4, 4], strides = [1, 1] : tensor<4x4xi32> -> !flow.dispatch.tensor<writeonly:tensor<4x4xi32>>
   return
 }
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/llvmcpu_materialize_encoding.mlir b/compiler/src/iree/compiler/Codegen/Common/test/llvmcpu_materialize_encoding.mlir
index c18018465977..01b2c7b83b19 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/llvmcpu_materialize_encoding.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/llvmcpu_materialize_encoding.mlir
@@ -17,7 +17,7 @@ func.func @set_encoding_with_padding_semantics_bf16_x86_64_avx512f() attributes
   return
 }
 // This tests that
-//   1. The padding value is created for tensor.pack ops.
+//   1. The padding value is created for linalg.pack ops.
 //   2. The inner tile sizes are less than or equal to values in round_dims_to.
 //      We could choose 128 when it is a narrow matrix.
 // CHECK-LABEL: func.func @set_encoding_with_padding_semantics_bf16_x86_64_avx512f
@@ -26,7 +26,7 @@ func.func @set_encoding_with_padding_semantics_bf16_x86_64_avx512f() attributes
 // CHECK-DAG:     %[[OUT_BINDING:.+]] = hal.interface.binding.subspan {{.+}} : !flow.dispatch.tensor<writeonly:tensor<1x1000x1x1xbf16>>
 // CHECK:         %[[SRC:.+]] = flow.dispatch.tensor.load %[[IN_BINDING]]
 // CHECK-DAG:     %[[INIT:.+]] = tensor.empty() : tensor<1x1000x1x1xbf16>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[SRC]]
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[SRC]]
 // CHECK-SAME:      outer_dims_perm = [0, 1]
 // CHECK-SAME:      inner_dims_pos = [0, 1]
 // CHECK-SAME:      inner_tiles = [1, 1]
@@ -60,7 +60,7 @@ func.func @set_encoding_7x7x7_matmul_LHS() attributes {
 //       CHECK:    %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan {{.*}} !flow.dispatch.tensor<writeonly:tensor<1x7x8x1xf32>>
 //       CHECK:    %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]], offsets = [0, 0], sizes = [7, 7], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<7x7xf32>> -> tensor<7x7xf32>
 //       CHECK:    %[[EMPTY:.+]] = tensor.empty() : tensor<1x7x8x1xf32>
-//       CHECK:    %[[PACK:.+]] = tensor.pack %[[INPUT]] padding_value(%[[CST]] : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %3 : tensor<7x7xf32> -> tensor<1x7x8x1xf32>
+//       CHECK:    %[[PACK:.+]] = linalg.pack %[[INPUT]] padding_value(%[[CST]] : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %3 : tensor<7x7xf32> -> tensor<1x7x8x1xf32>
 //       CHECK:    flow.dispatch.tensor.store %[[PACK]], %[[OUTPUT_BINDING]], offsets = [0, 0, 0, 0], sizes = [1, 7, 8, 1], strides = [1, 1, 1, 1] : tensor<1x7x8x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x7x8x1xf32>>
 
 // -----
@@ -91,7 +91,7 @@ func.func @set_encoding_128x80x32_batch_matmul_LHS() attributes {
 //       CHECK:      %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.*}} !flow.dispatch.tensor<writeonly:tensor<128x10x32x8x1xf32>>
 //       CHECK:      %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]], offsets = [0, 0, 0], sizes = [128, 80, 32], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x80x32xf32>> -> tensor<128x80x32xf32>
 //       CHECK:      %[[EMPTY:.+]] = tensor.empty() : tensor<128x10x32x8x1xf32>
-//       CHECK:      %[[PACK:.+]] = tensor.pack %[[INPUT]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<128x80x32xf32> -> tensor<128x10x32x8x1xf32>
+//       CHECK:      %[[PACK:.+]] = linalg.pack %[[INPUT]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<128x80x32xf32> -> tensor<128x10x32x8x1xf32>
 //       CHECK:      flow.dispatch.tensor.store %[[PACK]], %[[OUTPUT_BINDING]], offsets = [0, 0, 0, 0, 0], sizes = [128, 10, 32, 8, 1], strides = [1, 1, 1, 1, 1] : tensor<128x10x32x8x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<128x10x32x8x1xf32>>
 
 // -----
@@ -124,7 +124,7 @@ func.func @set_encoding_128x32x320_batch_matmul_RHS() attributes {
 //       CHECK:      %[[OUTPUT_BINDING:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1) {{.*}} !flow.dispatch.tensor<writeonly:tensor<128x40x32x8x1xf32>>
 //       CHECK:      %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]], offsets = [0, 0, 0], sizes = [128, 32, 320], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<128x32x320xf32>> -> tensor<128x32x320xf32>
 //       CHECK:      %[[EMPTY:.+]] = tensor.empty() : tensor<128x40x32x8x1xf32>
-//       CHECK:      %[[PACK:.+]] = tensor.pack %[[INPUT]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<128x32x320xf32> -> tensor<128x40x32x8x1xf32>
+//       CHECK:      %[[PACK:.+]] = linalg.pack %[[INPUT]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<128x32x320xf32> -> tensor<128x40x32x8x1xf32>
 //       CHECK:      flow.dispatch.tensor.store %[[PACK]], %[[OUTPUT_BINDING]], offsets = [0, 0, 0, 0, 0], sizes = [128, 40, 32, 8, 1], strides = [1, 1, 1, 1, 1] : tensor<128x40x32x8x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<128x40x32x8x1xf32>>
 
 // -----
@@ -163,7 +163,7 @@ func.func @unset_encoding_128x80x320_batch_matmul_RESULT() attributes {
 //       CHECK:   %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]]
 //  CHECK-SAME:       offsets = [0, 0, 0, 0, 0], sizes = [128, 10, 40, 8, 8], strides = [1, 1, 1, 1, 1]
 //       CHECK:   %[[EMPTY:.+]] = tensor.empty()
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[INPUT]]
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[INPUT]]
 //  CHECK-SAME:       outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]]
 //   CHECK-DAG:   flow.dispatch.tensor.store %[[UNPACK]], %[[OUTPUT_BINDING]]
 
@@ -203,8 +203,8 @@ func.func @pack_gemm_fill_dynamic(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf3
 //   CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG1]], %[[C1]]
 //   CHECK-DAG:   %[[OUT_D0:.+]] = affine.apply #[[$MAP0]]()[%[[D0]]]
 //   CHECK-DAG:   %[[OUT_D1:.+]] = affine.apply #[[$MAP0]]()[%[[D1]]]
-//   CHECK-DAG:   %[[PACK_LHS:.+]] = tensor.pack {{.*}}%[[ARG0]]
-//       CHECK:   %[[PACK_RHS:.+]] = tensor.pack
+//   CHECK-DAG:   %[[PACK_LHS:.+]] = linalg.pack {{.*}}%[[ARG0]]
+//       CHECK:   %[[PACK_RHS:.+]] = linalg.pack
 //  CHECK-SAME:     %[[ARG1]]
 //   CHECK-DAG:   %[[EMPTY:.+]] = tensor.empty(%[[OUT_D0]], %[[OUT_D1]]) : tensor<?x?x8x8xf32>
 //       CHECK:   %[[FILL:.+]] = linalg.fill
@@ -212,7 +212,7 @@ func.func @pack_gemm_fill_dynamic(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf3
 //       CHECK:   %[[MMT4D:.+]] = linalg.mmt4d
 //  CHECK-SAME:       ins(%[[PACK_LHS]], %[[PACK_RHS]] :
 //  CHECK-SAME:       outs(%[[FILL]] :
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[MMT4D]]
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[MMT4D]]
 //       CHECK:   return %[[UNPACK]]
 
 // -----
@@ -2088,14 +2088,14 @@ func.func @extend_batch_vecmat_explicit_unit_dim(%arg0: tensor<32x1x128xi8>, %ar
 //  CHECK-SAME:   %[[LHS:.+]]: tensor<32x1x128xi8>, %[[RHS:.+]]: tensor<32x128x11008xi8>) -> tensor<32x1x11008xi32>
 //       CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
 //       CHECK:   %[[INIT_LHS_PACK:.+]] = tensor.empty() : tensor<32x1x64x1x2xi8>
-//       CHECK:   %[[LHS_PACK:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [1, 2] into %[[INIT_LHS_PACK]] : tensor<32x1x128xi8> -> tensor<32x1x64x1x2xi8>
+//       CHECK:   %[[LHS_PACK:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [1, 2] into %[[INIT_LHS_PACK]] : tensor<32x1x128xi8> -> tensor<32x1x64x1x2xi8>
 //       CHECK:   %[[INIT_LHS_EXT:.+]] = tensor.empty() : tensor<32x1x64x1x2xi32>
 //       CHECK:   %[[LHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP]]], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%[[LHS_PACK]] : tensor<32x1x64x1x2xi8>) outs(%[[INIT_LHS_EXT]] : tensor<32x1x64x1x2xi32>) {
 //  CHECK-NEXT:       ^bb0(%[[LHS_EXT_ARG_IN:.+]]: i8, %[[LHS_EXT_ARG_OUT:.+]]: i32):
 //  CHECK-NEXT:       %[[LHS_EXT_OP:.+]] = arith.extsi %[[LHS_EXT_ARG_IN]] : i8 to i32
 //  CHECK-NEXT:       linalg.yield %[[LHS_EXT_OP]] : i32
 //       CHECK:   %[[INIT_RHS_PACK:.+]] = tensor.empty() : tensor<32x688x64x16x2xi8>
-//       CHECK:   %[[RHS_PACK:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %[[INIT_RHS_PACK]] : tensor<32x128x11008xi8> -> tensor<32x688x64x16x2xi8>
+//       CHECK:   %[[RHS_PACK:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %[[INIT_RHS_PACK]] : tensor<32x128x11008xi8> -> tensor<32x688x64x16x2xi8>
 //       CHECK:   %[[INIT_RHS_EXT:.+]] = tensor.empty() : tensor<32x688x64x16x2xi32>
 //       CHECK:   %[[RHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP]]], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%[[RHS_PACK]] : tensor<32x688x64x16x2xi8>) outs(%[[INIT_RHS_EXT]] : tensor<32x688x64x16x2xi32>) {
 //  CHECK-NEXT:       ^bb0(%[[RHS_EXT_ARG_IN:.+]]: i8, %[[RHS_EXT_ARG_OUT:.+]]: i32):
@@ -2105,7 +2105,7 @@ func.func @extend_batch_vecmat_explicit_unit_dim(%arg0: tensor<32x1x128xi8>, %ar
 //       CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[C0_I32]] : i32) outs(%[[INIT_FILL]] : tensor<32x1x688x1x16xi32>) -> tensor<32x1x688x1x16xi32>
 //       CHECK:   %[[MMT4D:.+]] = linalg.batch_mmt4d ins(%[[LHS_EXT]], %[[RHS_EXT]] : tensor<32x1x64x1x2xi32>, tensor<32x688x64x16x2xi32>) outs(%[[FILL]] : tensor<32x1x688x1x16xi32>) -> tensor<32x1x688x1x16xi32>
 //       CHECK:   %[[INIT_UNPACK:.+]] = tensor.empty() : tensor<32x1x11008xi32>
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[MMT4D]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [1, 16] into %[[INIT_UNPACK]] : tensor<32x1x688x1x16xi32> -> tensor<32x1x11008xi32>
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[MMT4D]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [1, 16] into %[[INIT_UNPACK]] : tensor<32x1x688x1x16xi32> -> tensor<32x1x11008xi32>
 //       CHECK:   return %[[UNPACK]]
 
 // -----
@@ -2293,14 +2293,14 @@ func.func @vecmat(%arg0: tensor<128xi8>, %arg1: tensor<128x11008xi8>) -> tensor<
 //  CHECK-SAME:   %[[LHS:.+]]: tensor<128xi8>, %[[RHS:.+]]: tensor<128x11008xi8>) -> tensor<11008xi32>
 //   CHECK-DAG:   %[[C0_I32:.+]] = arith.constant 0 : i32
 //       CHECK:   %[[INIT_LHS_PACK:.+]] = tensor.empty() : tensor<64x2xi8>
-//       CHECK:   %[[LHS_PACK:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [2] into %[[INIT_LHS_PACK]] : tensor<128xi8> -> tensor<64x2xi8>
+//       CHECK:   %[[LHS_PACK:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [2] into %[[INIT_LHS_PACK]] : tensor<128xi8> -> tensor<64x2xi8>
 //       CHECK:   %[[INIT_LHS_EXT:.+]] = tensor.empty() : tensor<64x2xi32>
 //       CHECK:   %[[LHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP]]], iterator_types = ["parallel", "parallel"]} ins(%[[LHS_PACK]] : tensor<64x2xi8>) outs(%[[INIT_LHS_EXT]] : tensor<64x2xi32>) {
 //  CHECK-NEXT:       ^bb0(%[[LHS_EXT_ARG_IN:.+]]: i8, %[[LHS_EXT_ARG_OUT:.+]]: i32):
 //  CHECK-NEXT:       %[[LHS_EXT_OP:.+]] = arith.extsi %[[LHS_EXT_ARG_IN]] : i8 to i32
 //  CHECK-NEXT:       linalg.yield %[[LHS_EXT_OP]] : i32
 //       CHECK:   %[[INIT_RHS_PACK:.+]] = tensor.empty() : tensor<688x64x16x2xi8>
-//       CHECK:   %[[RHS_PACK:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %[[INIT_RHS_PACK]] : tensor<128x11008xi8> -> tensor<688x64x16x2xi8>
+//       CHECK:   %[[RHS_PACK:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %[[INIT_RHS_PACK]] : tensor<128x11008xi8> -> tensor<688x64x16x2xi8>
 //       CHECK:   %[[INIT_RHS_EXT:.+]] = tensor.empty() : tensor<688x64x16x2xi32>
 //       CHECK:   %[[RHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[RHS_PACK]] : tensor<688x64x16x2xi8>) outs(%[[INIT_RHS_EXT]] : tensor<688x64x16x2xi32>) {
 //  CHECK-NEXT:       ^bb0(%[[RHS_EXT_ARG_IN:.+]]: i8, %[[RHS_EXT_ARG_OUT:.+]]: i32):
@@ -2313,7 +2313,7 @@ func.func @vecmat(%arg0: tensor<128xi8>, %arg1: tensor<128x11008xi8>) -> tensor<
 //       CHECK:   %[[MMT4D:.+]] = linalg.mmt4d ins(%[[EXPAND_LHS]], %[[RHS_EXT]] : tensor<1x64x1x2xi32>, tensor<688x64x16x2xi32>) outs(%[[FILL]] : tensor<1x688x1x16xi32>) -> tensor<1x688x1x16xi32>
 //       CHECK:   %[[COLLAPSED:.+]] = tensor.collapse_shape %[[MMT4D]] {{\[}}[0, 1], [2, 3]] : tensor<1x688x1x16xi32> into tensor<688x16xi32>
 //       CHECK:   %[[INIT_UNPACK:.+]] = tensor.empty() : tensor<11008xi32>
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[COLLAPSED]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<688x16xi32> -> tensor<11008xi32>
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[COLLAPSED]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<688x16xi32> -> tensor<11008xi32>
 //       CHECK:   return %[[UNPACK]]
 
 // -----
@@ -2355,14 +2355,14 @@ func.func @matvec(%arg0: tensor<11008x128xi8>, %arg1: tensor<128xi8>) -> tensor<
 //  CHECK-SAME:   %[[LHS:.+]]: tensor<11008x128xi8>, %[[RHS:.+]]: tensor<128xi8>) -> tensor<11008xi32>
 //       CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
 //       CHECK:   %[[INIT_LHS_PACK:.+]] = tensor.empty() : tensor<688x64x16x2xi8>
-//       CHECK:   %[[LHS_PACK:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %[[INIT_LHS_PACK]] : tensor<11008x128xi8> -> tensor<688x64x16x2xi8>
+//       CHECK:   %[[LHS_PACK:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %[[INIT_LHS_PACK]] : tensor<11008x128xi8> -> tensor<688x64x16x2xi8>
 //       CHECK:   %[[INIT_LHS_EXT:.+]] = tensor.empty() : tensor<688x64x16x2xi32>
 //       CHECK:   %[[LHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[LHS_PACK]] : tensor<688x64x16x2xi8>) outs(%[[INIT_LHS_EXT]] : tensor<688x64x16x2xi32>) {
 //  CHECK-NEXT:       ^bb0(%[[LHS_EXT_ARG_IN:.+]]: i8, %[[LHS_EXT_ARG_OUT:.+]]: i32):
 //  CHECK-NEXT:       %[[LHS_EXT_OP:.+]] = arith.extsi %[[LHS_EXT_ARG_IN]] : i8 to i32
 //  CHECK-NEXT:       linalg.yield %[[LHS_EXT_OP]] : i32
 //       CHECK:   %[[INIT_RHS_PACK:.+]] = tensor.empty() : tensor<64x2xi8>
-//       CHECK:   %[[RHS_PACK:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [2] into %[[INIT_RHS_PACK]] : tensor<128xi8> -> tensor<64x2xi8>
+//       CHECK:   %[[RHS_PACK:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [2] into %[[INIT_RHS_PACK]] : tensor<128xi8> -> tensor<64x2xi8>
 //       CHECK:   %[[INIT_RHS_EXT:.+]] = tensor.empty() : tensor<64x2xi32>
 //       CHECK:   %[[RHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[RHS_PACK]] : tensor<64x2xi8>) outs(%[[INIT_RHS_EXT]] : tensor<64x2xi32>) {
 //  CHECK-NEXT:       ^bb0(%[[RHS_EXT_ARG_IN:.+]]: i8, %[[RHS_EXT_ARG_OUT:.+]]: i32):
@@ -2375,7 +2375,7 @@ func.func @matvec(%arg0: tensor<11008x128xi8>, %arg1: tensor<128xi8>) -> tensor<
 //       CHECK:   %[[MMT4D:.+]] = linalg.mmt4d ins(%[[EXPAND_RHS]], %[[LHS_EXT]]  : tensor<1x64x1x2xi32>, tensor<688x64x16x2xi32>) outs(%[[FILL]] : tensor<1x688x1x16xi32>) -> tensor<1x688x1x16xi32>
 //       CHECK:   %[[COLLAPSED:.+]] = tensor.collapse_shape %[[MMT4D]] {{\[}}[0, 1], [2, 3]] : tensor<1x688x1x16xi32> into tensor<688x16xi32>
 //       CHECK:   %[[INIT_UNPACK:.+]] = tensor.empty() : tensor<11008xi32>
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[COLLAPSED]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<688x16xi32> -> tensor<11008xi32>
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[COLLAPSED]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<688x16xi32> -> tensor<11008xi32>
 //       CHECK:   return %[[UNPACK]]
 
 // -----
@@ -2418,14 +2418,14 @@ func.func @matvec_with_narrow_M(%arg0: tensor<15x128xi8>, %arg1: tensor<128xi8>)
 //   CHECK-DAG:   %[[C0_I8:.+]] = arith.constant 0 : i8
 //   CHECK-DAG:   %[[C0_I32:.+]] = arith.constant 0 : i32
 //       CHECK:   %[[INIT_LHS_PACK:.+]] = tensor.empty() : tensor<1x64x16x2xi8>
-//       CHECK:   %[[LHS_PACK:.+]] = tensor.pack %[[LHS]] padding_value(%[[C0_I8]] : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %[[INIT_LHS_PACK]] : tensor<15x128xi8> -> tensor<1x64x16x2xi8>
+//       CHECK:   %[[LHS_PACK:.+]] = linalg.pack %[[LHS]] padding_value(%[[C0_I8]] : i8) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %[[INIT_LHS_PACK]] : tensor<15x128xi8> -> tensor<1x64x16x2xi8>
 //       CHECK:   %[[INIT_LHS_EXT:.+]] = tensor.empty() : tensor<1x64x16x2xi32>
 //       CHECK:   %[[LHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[LHS_PACK]] : tensor<1x64x16x2xi8>) outs(%[[INIT_LHS_EXT]] : tensor<1x64x16x2xi32>) {
 //  CHECK-NEXT:       ^bb0(%[[LHS_EXT_ARG_IN:.+]]: i8, %[[LHS_EXT_ARG_OUT:.+]]: i32):
 //  CHECK-NEXT:       %[[LHS_EXT_OP:.+]] = arith.extsi %[[LHS_EXT_ARG_IN]] : i8 to i32
 //  CHECK-NEXT:       linalg.yield %[[LHS_EXT_OP]] : i32
 //       CHECK:   %[[INIT_RHS_PACK:.+]] = tensor.empty() : tensor<64x2xi8>
-//       CHECK:   %[[RHS_PACK:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [2] into %[[INIT_RHS_PACK]] : tensor<128xi8> -> tensor<64x2xi8>
+//       CHECK:   %[[RHS_PACK:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [2] into %[[INIT_RHS_PACK]] : tensor<128xi8> -> tensor<64x2xi8>
 //       CHECK:   %[[INIT_RHS_EXT:.+]] = tensor.empty() : tensor<64x2xi32>
 //       CHECK:   %[[RHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[RHS_PACK]] : tensor<64x2xi8>) outs(%[[INIT_RHS_EXT]] : tensor<64x2xi32>) {
 //  CHECK-NEXT:       ^bb0(%[[RHS_EXT_ARG_IN:.+]]: i8, %[[RHS_EXT_ARG_OUT:.+]]: i32):
@@ -2438,7 +2438,7 @@ func.func @matvec_with_narrow_M(%arg0: tensor<15x128xi8>, %arg1: tensor<128xi8>)
 //       CHECK:   %[[MMT4D:.+]] = linalg.mmt4d ins(%[[EXPAND_RHS]], %[[LHS_EXT]]  : tensor<1x64x1x2xi32>, tensor<1x64x16x2xi32>) outs(%[[FILL]] : tensor<1x1x1x16xi32>) -> tensor<1x1x1x16xi32>
 //       CHECK:   %[[COLLAPSED:.+]] = tensor.collapse_shape %[[MMT4D]] {{\[}}[0, 1], [2, 3]] : tensor<1x1x1x16xi32> into tensor<1x16xi32>
 //       CHECK:   %[[INIT_UNPACK:.+]] = tensor.empty() : tensor<15xi32>
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[COLLAPSED]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<1x16xi32> -> tensor<15xi32>
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[COLLAPSED]] outer_dims_perm = [0] inner_dims_pos = [0] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<1x16xi32> -> tensor<15xi32>
 //       CHECK:   return %[[UNPACK]]
 
 // -----
@@ -2480,14 +2480,14 @@ func.func @batch_vecmat(%arg0: tensor<32x128xi8>, %arg1: tensor<32x128x11008xi8>
 //  CHECK-SAME:   %[[LHS:.+]]: tensor<32x128xi8>, %[[RHS:.+]]: tensor<32x128x11008xi8>) -> tensor<32x11008xi32>
 //       CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
 //       CHECK:   %[[INIT_LHS_PACK:.+]] = tensor.empty() : tensor<32x64x2xi8>
-//       CHECK:   %[[LHS_PACK:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [2] into %[[INIT_LHS_PACK]] : tensor<32x128xi8> -> tensor<32x64x2xi8>
+//       CHECK:   %[[LHS_PACK:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [2] into %[[INIT_LHS_PACK]] : tensor<32x128xi8> -> tensor<32x64x2xi8>
 //       CHECK:   %[[INIT_LHS_EXT:.+]] = tensor.empty() : tensor<32x64x2xi32>
 //       CHECK:   %[[LHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP]]], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[LHS_PACK]] : tensor<32x64x2xi8>) outs(%[[INIT_LHS_EXT]] : tensor<32x64x2xi32>) {
 //  CHECK-NEXT:       ^bb0(%[[LHS_EXT_ARG_IN:.+]]: i8, %[[LHS_EXT_ARG_OUT:.+]]: i32):
 //  CHECK-NEXT:       %[[LHS_EXT_OP:.+]] = arith.extsi %[[LHS_EXT_ARG_IN]] : i8 to i32
 //  CHECK-NEXT:       linalg.yield %[[LHS_EXT_OP]] : i32
 //       CHECK:   %[[INIT_RHS_PACK:.+]] = tensor.empty() : tensor<32x688x64x16x2xi8>
-//       CHECK:   %[[RHS_PACK:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %[[INIT_RHS_PACK]] : tensor<32x128x11008xi8> -> tensor<32x688x64x16x2xi8>
+//       CHECK:   %[[RHS_PACK:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %[[INIT_RHS_PACK]] : tensor<32x128x11008xi8> -> tensor<32x688x64x16x2xi8>
 //       CHECK:   %[[INIT_RHS_EXT:.+]] = tensor.empty() : tensor<32x688x64x16x2xi32>
 //       CHECK:   %[[RHS_EXT:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%[[RHS_PACK]] : tensor<32x688x64x16x2xi8>) outs(%[[INIT_RHS_EXT]] : tensor<32x688x64x16x2xi32>) {
 //  CHECK-NEXT:       ^bb0(%[[RHS_EXT_ARG_IN:.+]]: i8, %[[RHS_EXT_ARG_OUT:.+]]: i32):
@@ -2500,7 +2500,7 @@ func.func @batch_vecmat(%arg0: tensor<32x128xi8>, %arg1: tensor<32x128x11008xi8>
 //       CHECK:   %[[MMT4D:.+]] = linalg.batch_mmt4d ins(%[[EXPAND_LHS]], %[[RHS_EXT]] : tensor<32x1x64x1x2xi32>, tensor<32x688x64x16x2xi32>) outs(%[[FILL]] : tensor<32x1x688x1x16xi32>) -> tensor<32x1x688x1x16xi32>
 //       CHECK:   %[[COLLAPSED:.+]] = tensor.collapse_shape %[[MMT4D]] {{\[}}[0], [1, 2], [3, 4]] : tensor<32x1x688x1x16xi32> into tensor<32x688x16xi32>
 //       CHECK:   %[[INIT_UNPACK:.+]] = tensor.empty() : tensor<32x11008xi32>
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[COLLAPSED]] outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<32x688x16xi32> -> tensor<32x11008xi32>
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[COLLAPSED]] outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [16] into %[[INIT_UNPACK]] : tensor<32x688x16xi32> -> tensor<32x11008xi32>
 //       CHECK:   return %[[UNPACK]]
 
 // -----
@@ -2552,16 +2552,16 @@ func.func @matmul_transpose_a_f32f32f32(%arg0: tensor<256x128xf32>, %arg1: tenso
 // CHECK-LABEL: func.func @matmul_transpose_a_f32f32f32(
 //  CHECK-SAME:   %[[LHS:.+]]: tensor<256x128xf32>, %[[RHS:.+]]: tensor<256x512xf32>, %[[RESULT:.+]]: tensor<128x512xf32>) -> tensor<128x512xf32>
 //       CHECK:   %[[PACK_LHS_DEST:.+]] = tensor.empty() : tensor<16x256x8x1xf32>
-//       CHECK:   %[[PACK_LHS:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<256x128xf32> -> tensor<16x256x8x1xf32>
+//       CHECK:   %[[PACK_LHS:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<256x128xf32> -> tensor<16x256x8x1xf32>
 //       CHECK:   %[[PACK_RHS_DEST:.+]] = tensor.empty() : tensor<128x256x4x1xf32>
-//       CHECK:   %[[PACK_RHS:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<256x512xf32> -> tensor<128x256x4x1xf32>
+//       CHECK:   %[[PACK_RHS:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<256x512xf32> -> tensor<128x256x4x1xf32>
 //       CHECK:   %[[PACK_RES_DEST:.+]] = tensor.empty() : tensor<16x128x8x4xf32>
-//       CHECK:   %[[PACK_RES:.+]] = tensor.pack %[[RESULT]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<128x512xf32> -> tensor<16x128x8x4xf32>
+//       CHECK:   %[[PACK_RES:.+]] = linalg.pack %[[RESULT]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<128x512xf32> -> tensor<16x128x8x4xf32>
 //       CHECK:   %[[MMT4D:.+]] = linalg.mmt4d
 //  CHECK-SAME:       ins(%[[PACK_LHS]], %[[PACK_RHS]] :
 //  CHECK-SAME:       outs(%[[PACK_RES]] :
 //   CHECK-DAG:   %[[UNPACK_DEST:.+]] = tensor.empty() : tensor<128x512xf32>
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[MMT4D]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<16x128x8x4xf32> -> tensor<128x512xf32>
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[MMT4D]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<16x128x8x4xf32> -> tensor<128x512xf32>
 //       CHECK:   return %[[UNPACK]]
 
 // -----
@@ -2591,16 +2591,16 @@ func.func @matmul_transpose_b_f32f32f32(%arg0: tensor<128x256xf32>, %arg1: tenso
 // CHECK-LABEL: func.func @matmul_transpose_b_f32f32f32(
 //  CHECK-SAME:   %[[LHS:.+]]: tensor<128x256xf32>, %[[RHS:.+]]: tensor<512x256xf32>, %[[RESULT:.+]]: tensor<128x512xf32>) -> tensor<128x512xf32>
 //       CHECK:   %[[PACK_LHS_DEST:.+]] = tensor.empty() : tensor<16x256x8x1xf32>
-//       CHECK:   %[[PACK_LHS:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<128x256xf32> -> tensor<16x256x8x1xf32>
+//       CHECK:   %[[PACK_LHS:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<128x256xf32> -> tensor<16x256x8x1xf32>
 //       CHECK:   %[[PACK_RHS_DEST:.+]] = tensor.empty() : tensor<128x256x4x1xf32>
-//       CHECK:   %[[PACK_RHS:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<512x256xf32> -> tensor<128x256x4x1xf32>
+//       CHECK:   %[[PACK_RHS:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<512x256xf32> -> tensor<128x256x4x1xf32>
 //       CHECK:   %[[PACK_RES_DEST:.+]] = tensor.empty() : tensor<16x128x8x4xf32>
-//       CHECK:   %[[PACK_RES:.+]] = tensor.pack %[[RESULT]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<128x512xf32> -> tensor<16x128x8x4xf32>
+//       CHECK:   %[[PACK_RES:.+]] = linalg.pack %[[RESULT]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<128x512xf32> -> tensor<16x128x8x4xf32>
 //       CHECK:   %[[MMT4D:.+]] = linalg.mmt4d
 //  CHECK-SAME:       ins(%[[PACK_LHS]], %[[PACK_RHS]] :
 //  CHECK-SAME:       outs(%[[PACK_RES]] :
 //       CHECK:   %[[UNPACK_DEST:.+]] = tensor.empty() : tensor<128x512xf32>
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[MMT4D]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<16x128x8x4xf32> -> tensor<128x512xf32>
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[MMT4D]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<16x128x8x4xf32> -> tensor<128x512xf32>
 
 // -----
 
@@ -2630,16 +2630,16 @@ func.func @batch_matmul_transpose_a_f32f32f32(%arg0: tensor<2x256x128xf32>, %arg
 // CHECK-LABEL: func.func @batch_matmul_transpose_a_f32f32f32(
 //  CHECK-SAME:   %[[LHS:.+]]: tensor<2x256x128xf32>, %[[RHS:.+]]: tensor<2x256x512xf32>, %[[RESULT:.+]]: tensor<2x128x512xf32>) -> tensor<2x128x512xf32>
 //       CHECK:   %[[PACK_LHS_DEST:.+]] = tensor.empty() : tensor<2x16x256x8x1xf32>
-//       CHECK:   %[[PACK_LHS:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<2x256x128xf32> -> tensor<2x16x256x8x1xf32>
+//       CHECK:   %[[PACK_LHS:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<2x256x128xf32> -> tensor<2x16x256x8x1xf32>
 //       CHECK:   %[[PACK_RHS_DEST:.+]] = tensor.empty() : tensor<2x128x256x4x1xf32>
-//       CHECK:   %[[PACK_RHS:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<2x256x512xf32> -> tensor<2x128x256x4x1xf32>
+//       CHECK:   %[[PACK_RHS:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<2x256x512xf32> -> tensor<2x128x256x4x1xf32>
 //       CHECK:   %[[PACK_RES_DEST:.+]] = tensor.empty() : tensor<2x16x128x8x4xf32>
-//       CHECK:   %[[PACK_RES:.+]] = tensor.pack %[[RESULT]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<2x128x512xf32> -> tensor<2x16x128x8x4xf32>
+//       CHECK:   %[[PACK_RES:.+]] = linalg.pack %[[RESULT]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<2x128x512xf32> -> tensor<2x16x128x8x4xf32>
 //       CHECK:   %[[BATCH_MMT4D:.+]] = linalg.batch_mmt4d
 //       CHECK:       ins(%[[PACK_LHS]], %[[PACK_RHS]] :
 //       CHECK:       outs(%[[PACK_RES]] :
 //       CHECK:   %[[UNPACK_DEST:.+]] = tensor.empty() : tensor<2x128x512xf32>
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[BATCH_MMT4D]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<2x16x128x8x4xf32> -> tensor<2x128x512xf32>
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[BATCH_MMT4D]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<2x16x128x8x4xf32> -> tensor<2x128x512xf32>
 
 // -----
 
@@ -2669,16 +2669,16 @@ func.func @batch_matmul_transpose_b_f32f32f32(%arg0: tensor<2x128x256xf32>, %arg
 // CHECK-LABEL: func.func @batch_matmul_transpose_b_f32f32f32(
 //  CHECK-SAME:   %[[LHS:.+]]: tensor<2x128x256xf32>, %[[RHS:.+]]: tensor<2x512x256xf32>, %[[RESULT:.+]]: tensor<2x128x512xf32>) -> tensor<2x128x512xf32>
 //       CHECK:   %[[PACK_LHS_DEST:.+]] = tensor.empty() : tensor<2x16x256x8x1xf32>
-//       CHECK:   %[[PACK_LHS:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<2x128x256xf32> -> tensor<2x16x256x8x1xf32>
+//       CHECK:   %[[PACK_LHS:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[PACK_LHS_DEST]] : tensor<2x128x256xf32> -> tensor<2x16x256x8x1xf32>
 //       CHECK:   %[[PACK_RHS_DEST:.+]] = tensor.empty() : tensor<2x128x256x4x1xf32>
-//       CHECK:   %[[PACK_RHS:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<2x512x256xf32> -> tensor<2x128x256x4x1xf32>
+//       CHECK:   %[[PACK_RHS:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [4, 1] into %[[PACK_RHS_DEST]] : tensor<2x512x256xf32> -> tensor<2x128x256x4x1xf32>
 //       CHECK:   %[[PACK_RES_DEST:.+]] = tensor.empty() : tensor<2x16x128x8x4xf32>
-//       CHECK:   %[[PACK_RES:.+]] = tensor.pack %[[RESULT]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<2x128x512xf32> -> tensor<2x16x128x8x4xf32>
+//       CHECK:   %[[PACK_RES:.+]] = linalg.pack %[[RESULT]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[PACK_RES_DEST]] : tensor<2x128x512xf32> -> tensor<2x16x128x8x4xf32>
 //       CHECK:   %[[BATCH_MMT4D:.+]] = linalg.batch_mmt4d
 //  CHECK-SAME:       ins(%[[PACK_LHS]], %[[PACK_RHS]] :
 //  CHECK-SAME:       outs(%[[PACK_RES]] :
 //   CHECK-DAG:   %[[UNPACK_DEST:.+]] = tensor.empty() : tensor<2x128x512xf32>
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[BATCH_MMT4D]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<2x16x128x8x4xf32> -> tensor<2x128x512xf32>
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[BATCH_MMT4D]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %[[UNPACK_DEST]] : tensor<2x16x128x8x4xf32> -> tensor<2x128x512xf32>
 
 // -----
 
@@ -2717,12 +2717,12 @@ func.func @generic_batch_vecmat_transposed_i16u4i32(%arg0: tensor<32x128xi16>, %
 // CHECK-LABEL: func.func @generic_batch_vecmat_transposed_i16u4i32(
 //  CHECK-SAME:   %[[LHS:.+]]: tensor<32x128xi16>, %[[RHS:.+]]: tensor<4096x32x128xi4>, %[[RESULT:.+]]: tensor<4096x32xi32>) -> tensor<4096x32xi32>
 //   CHECK-DAG:   %[[PACK_LHS_DEST:.+]] = tensor.empty() : tensor<32x16x8xi16>
-//   CHECK-DAG:   %[[PACK_LHS:.+]] = tensor.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [8] into %[[PACK_LHS_DEST]] : tensor<32x128xi16> -> tensor<32x16x8xi16>
+//   CHECK-DAG:   %[[PACK_LHS:.+]] = linalg.pack %[[LHS]] outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [8] into %[[PACK_LHS_DEST]] : tensor<32x128xi16> -> tensor<32x16x8xi16>
 //   CHECK-DAG:   %[[EXPAND_LHS:.+]] = tensor.expand_shape %[[PACK_LHS]] {{.*}} output_shape [32, 1, 16, 1, 8] : tensor<32x16x8xi16> into tensor<32x1x16x1x8xi16>
 //   CHECK-DAG:   %[[PACK_RHS_DEST:.+]] = tensor.empty() : tensor<32x128x16x32x8xi4>
-//   CHECK-DAG:   %[[PACK_RHS:.+]] = tensor.pack %[[RHS]] outer_dims_perm = [1, 0, 2] inner_dims_pos = [0, 2] inner_tiles = [32, 8] into %[[PACK_RHS_DEST]] : tensor<4096x32x128xi4> -> tensor<32x128x16x32x8xi4>
+//   CHECK-DAG:   %[[PACK_RHS:.+]] = linalg.pack %[[RHS]] outer_dims_perm = [1, 0, 2] inner_dims_pos = [0, 2] inner_tiles = [32, 8] into %[[PACK_RHS_DEST]] : tensor<4096x32x128xi4> -> tensor<32x128x16x32x8xi4>
 //   CHECK-DAG:   %[[PACK_RES_DEST:.+]] = tensor.empty() : tensor<32x128x32xi32>
-//   CHECK-DAG:   %[[PACK_RES:.+]] = tensor.pack %[[RESULT]] outer_dims_perm = [1, 0] inner_dims_pos = [0] inner_tiles = [32] into %[[PACK_RES_DEST]] : tensor<4096x32xi32> -> tensor<32x128x32xi32>
+//   CHECK-DAG:   %[[PACK_RES:.+]] = linalg.pack %[[RESULT]] outer_dims_perm = [1, 0] inner_dims_pos = [0] inner_tiles = [32] into %[[PACK_RES_DEST]] : tensor<4096x32xi32> -> tensor<32x128x32xi32>
 //   CHECK-DAG:   %[[EXTEND_DEST:.+]] = tensor.empty() : tensor<32x128x16x32x8xi32>
 //       CHECK:   %[[EXTEND:.+]] = linalg.generic
 //  CHECK-SAME:       indexing_maps = [#[[$MAP]], #[[$MAP]]]
@@ -2734,7 +2734,7 @@ func.func @generic_batch_vecmat_transposed_i16u4i32(%arg0: tensor<32x128xi16>, %
 //  CHECK-SAME:       outs(%[[EXPAND_RES]] :
 //       CHECK:   %[[COLLAPSE:.+]] = tensor.collapse_shape %[[BATCH_MMT4D]] {{.*}} : tensor<32x1x128x1x32xi32> into tensor<32x128x32xi32>
 //   CHECK-DAG:   %[[UNPACK_DEST:.+]] = tensor.empty() : tensor<4096x32xi32>
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[COLLAPSE]] outer_dims_perm = [1, 0] inner_dims_pos = [0] inner_tiles = [32] into %[[UNPACK_DEST]] : tensor<32x128x32xi32> -> tensor<4096x32xi32>
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[COLLAPSE]] outer_dims_perm = [1, 0] inner_dims_pos = [0] inner_tiles = [32] into %[[UNPACK_DEST]] : tensor<32x128x32xi32> -> tensor<4096x32xi32>
 
 // -----
 
@@ -2980,7 +2980,7 @@ func.func @set_encoding_LHS_with_layout() attributes {
 //   CHECK-DAG:   %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan {{.*}} binding(0) {{.*}} : !flow.dispatch.tensor<readonly:tensor<1x256xf32>>
 //   CHECK-DAG:   %[[RESULT_BINDING:.+]] = hal.interface.binding.subspan {{.*}} binding(1) {{.*}} : !flow.dispatch.tensor<writeonly:tensor<1x256x1x1xf32>>
 //       CHECK:   %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]]
-//       CHECK:   %[[PACK:.+]] = tensor.pack %[[INPUT]]
+//       CHECK:   %[[PACK:.+]] = linalg.pack %[[INPUT]]
 //  CHECK-SAME:     outer_dims_perm = [0, 1]
 //  CHECK-SAME:     inner_dims_pos = [0, 1]
 //  CHECK-SAME:     inner_tiles = [1, 1]
@@ -3012,7 +3012,7 @@ func.func @set_encoding_RHS_with_layout() attributes {
 //   CHECK-DAG:   %[[RESULT_BINDING:.+]] = hal.interface.binding.subspan {{.*}} binding(1) {{.*}} : !flow.dispatch.tensor<writeonly:tensor<1x256x16x1xf32>>
 //   CHECK-DAG:   %[[PAD_VALUE:.+]] = arith.constant 0.000000e+00 : f32
 //       CHECK:   %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]]
-//       CHECK:   %[[PACK:.+]] = tensor.pack %[[INPUT]]
+//       CHECK:   %[[PACK:.+]] = linalg.pack %[[INPUT]]
 //  CHECK-SAME:     padding_value(%[[PAD_VALUE]] : f32)
 //  CHECK-SAME:     outer_dims_perm = [1, 0]
 //  CHECK-SAME:     inner_dims_pos = [1, 0]
@@ -3044,7 +3044,7 @@ func.func @unset_encoding_RES_with_layout() attributes {
 //   CHECK-DAG:   %[[INPUT_BINDING:.+]] = hal.interface.binding.subspan {{.*}} binding(0) {{.*}} : !flow.dispatch.tensor<readonly:tensor<1x1x1x16xf32>>
 //   CHECK-DAG:   %[[RESULT_BINDING:.+]] = hal.interface.binding.subspan {{.*}} binding(1) {{.*}} : !flow.dispatch.tensor<writeonly:tensor<1x10xf32>>
 //       CHECK:   %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]]
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[INPUT]]
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[INPUT]]
 //  CHECK-SAME:     outer_dims_perm = [0, 1]
 //  CHECK-SAME:     inner_dims_pos = [0, 1]
 //  CHECK-SAME:     inner_tiles = [1, 16]
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir b/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
index 03d1e51f9398..6ed536d29c15 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/tile_and_distribute_to_workgroups.mlir
@@ -1918,7 +1918,7 @@ hal.executable private @pack_lowering {
         %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [100, 250], strides = [1, 1]
             : !flow.dispatch.tensor<readonly:tensor<100x250xf32>> -> tensor<100x250xf32>
         %3 = tensor.empty() : tensor<14x64x8x4xf32>
-        %4 = tensor.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %3
+        %4 = linalg.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %3
             {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[12, 12]]>}
             : tensor<100x250xf32> -> tensor<14x64x8x4xf32>
         flow.dispatch.tensor.store %4, %1, offsets = [0, 0, 0, 0], sizes = [14, 64, 8, 4], strides = [1, 1, 1, 1]
@@ -1960,7 +1960,7 @@ hal.executable private @pack_lowering {
         %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [250, 500], strides = [1, 1]
             : !flow.dispatch.tensor<readonly:tensor<250x500xf32>> -> tensor<250x500xf32>
         %3 = tensor.empty() : tensor<64x64x8x4xf32>
-        %4 = tensor.pack %2 padding_value(%cst : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 4] into %3
+        %4 = linalg.pack %2 padding_value(%cst : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 4] into %3
             {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[12, 14]]>}
             : tensor<250x500xf32> -> tensor<64x64x8x4xf32>
         flow.dispatch.tensor.store %4, %1, offsets = [0, 0, 0, 0], sizes = [64, 64, 8, 4], strides = [1, 1, 1, 1]
@@ -2018,7 +2018,7 @@ hal.executable private @clone_index_computations {
         %15 = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%13]
         %16 = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%14]
         %17 = tensor.empty(%15, %16) : tensor<?x?x8x4xf32>
-        %18 = tensor.pack %12 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %17
+        %18 = linalg.pack %12 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %17
             {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64]]>}
             : tensor<?x?xf32> -> tensor<?x?x8x4xf32>
         %19 = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%6]
@@ -2075,7 +2075,7 @@ hal.executable private @dynamic_unpack {
         %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
         %10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x32x16xi32>>{%4, %5} -> tensor<?x?x32x16xi32>
         %11 = tensor.empty(%6, %7) : tensor<?x?xi32>
-        %12 = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11
+        %12 = linalg.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11
           {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64]]>}
           : tensor<?x?x32x16xi32> -> tensor<?x?xi32>
         flow.dispatch.tensor.store %12, %9, offsets = [0, 0], sizes = [%6, %7], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
@@ -2087,7 +2087,7 @@ hal.executable private @dynamic_unpack {
 // CHECK-LABEL: func.func @dynamic_unpack
 // CHECK:         scf.for
 // CHECK:           scf.for
-// CHECK:             tensor.unpack
+// CHECK:             linalg.unpack
 
 // -----
 
@@ -2128,7 +2128,7 @@ hal.executable private @dynamic_unpack_dynamic_tile {
         %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
         %10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, %c32, %c16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%4, %5, %c32, %c16} -> tensor<?x?x?x?xi32>
         %11 = tensor.empty(%6, %7) : tensor<?x?xi32>
-        %12 = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [%tile0, %tile1] into %11
+        %12 = linalg.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [%tile0, %tile1] into %11
           {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64]]>}
           : tensor<?x?x?x?xi32> -> tensor<?x?xi32>
         flow.dispatch.tensor.store %12, %9, offsets = [0, 0], sizes = [%6, %7], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
@@ -2140,7 +2140,7 @@ hal.executable private @dynamic_unpack_dynamic_tile {
 // CHECK-LABEL: func.func @dynamic_unpack_dynamic_tile
 // CHECK:         scf.for
 // CHECK:           scf.for
-// CHECK:             tensor.unpack
+// CHECK:             linalg.unpack
 
 // -----
 
@@ -2162,7 +2162,7 @@ hal.executable private @unpack_elem {
         %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<128x384xf32>>
         %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [16, 48, 8, 8], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<16x48x8x8xf32>> -> tensor<16x48x8x8xf32>
         %3 = tensor.empty() : tensor<128x384xf32>
-        %4 = tensor.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %3 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64]]>} : tensor<16x48x8x8xf32> -> tensor<128x384xf32>
+        %4 = linalg.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %3 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[64, 64]]>} : tensor<16x48x8x8xf32> -> tensor<128x384xf32>
         %5 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%4 : tensor<128x384xf32>) outs(%3 : tensor<128x384xf32>) {
         ^bb0(%in: f32, %out: f32):
           %6 = arith.addf %in, %in : f32
@@ -2178,7 +2178,7 @@ hal.executable private @unpack_elem {
 // CHECK-LABEL: func.func @unpack_elem
 // CHECK:         scf.for
 // CHECK:           scf.for
-// CHECK:             tensor.unpack
+// CHECK:             linalg.unpack
 // CHECK:             linalg.generic
 
 // -----
@@ -2221,7 +2221,7 @@ hal.executable private @dynamic_unpack_fusion {
         %12 = flow.dispatch.tensor.load %5, offsets = [0], sizes = [16], strides = [1] : !flow.dispatch.tensor<readonly:tensor<16xi32>> -> tensor<16xi32>
         %13 = tensor.empty() : tensor<12544x16xi32>
         %14 = tensor.empty() : tensor<12544x16xi32>
-        %16 = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [%0#0, %0#1] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[16, 16]]>} : tensor<?x?x?x?xi32> -> tensor<12544x16xi32>
+        %16 = linalg.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [%0#0, %0#1] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[16, 16]]>} : tensor<?x?x?x?xi32> -> tensor<12544x16xi32>
         %17 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%cst, %16, %11, %12 : tensor<16xi32>, tensor<12544x16xi32>, tensor<12544xi32>, tensor<16xi32>) outs(%13 : tensor<12544x16xi32>) {
         ^bb0(%in: i32, %in_0: i32, %in_1: i32, %in_2: i32, %out: i32):
           %18 = arith.muli %in_1, %c-30_i32 : i32
@@ -2240,7 +2240,7 @@ hal.executable private @dynamic_unpack_fusion {
 }
 // CHECK-LABEL: func.func @dynamic_unpack_fusion
 // CHECK:         scf.for
-// CHECK:           tensor.unpack
+// CHECK:           linalg.unpack
 // CHECK:           tensor.extract_slice
 // CHECK:           linalg.generic
 
@@ -2300,7 +2300,7 @@ hal.executable private @elem_pack {
           linalg.yield %23, %25 : f32, f32
         } -> (tensor<384x512xf32>, tensor<384x512xf32>)
         %17 = tensor.empty() : tensor<48x512x8x1xf32>
-        %18 = tensor.pack %16#0 inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %17 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[8, 64]]>} : tensor<384x512xf32> -> tensor<48x512x8x1xf32>
+        %18 = linalg.pack %16#0 inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %17 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[8, 64]]>} : tensor<384x512xf32> -> tensor<48x512x8x1xf32>
         flow.dispatch.tensor.store %18, %6, offsets = [0, 0, 0, 0], sizes = [48, 512, 8, 1], strides = [1, 1, 1, 1] : tensor<48x512x8x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<48x512x8x1xf32>>
         flow.dispatch.tensor.store %16#0, %7, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : tensor<384x512xf32> -> !flow.dispatch.tensor<writeonly:tensor<384x512xf32>>
         return
@@ -2312,7 +2312,7 @@ hal.executable private @elem_pack {
 // CHECK:         scf.for
 // CHECK:           scf.for
 // CHECK:             %[[ELEM:.+]]:2 = linalg.generic
-// CHECK:             %[[PACK:.+]] = tensor.pack
+// CHECK:             %[[PACK:.+]] = linalg.pack
 // CHECK-DAG:         flow.dispatch.tensor.store %[[PACK]], {{.*}} sizes = [8, 64, 8, 1]
 // CHECK-DAG:         flow.dispatch.tensor.store %[[ELEM]]#0, {{.*}} sizes = [64, 64]
 
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/vmvx_materialize_encoding.mlir b/compiler/src/iree/compiler/Codegen/Common/test/vmvx_materialize_encoding.mlir
index 2f3b91ff7255..0602dc6ae32a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/vmvx_materialize_encoding.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/vmvx_materialize_encoding.mlir
@@ -178,7 +178,7 @@ func.func @set_encoding_dynamic() attributes {
 //  CHECK-SAME:       !flow.dispatch.tensor<writeonly:tensor<?x?x8x4xf32>>{%[[TILED_D0]], %[[TILED_D1]]}
 //       CHECK:   %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]]
 //       CHECK:   %[[EMPTY:.+]] = tensor.empty
-//       CHECK:   %[[PACK:.+]] = tensor.pack
+//       CHECK:   %[[PACK:.+]] = linalg.pack
 //  CHECK-SAME:       %[[INPUT]] padding_value(%[[CST]] : f32)
 //  CHECK-SAME:       inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[EMPTY]]
 //       CHECK:   flow.dispatch.tensor.store %[[PACK]], %[[OUTPUT_BINDING]]
@@ -229,7 +229,7 @@ func.func @unset_encoding_dynamic() attributes {
 //       CHECK:   %[[INPUT:.+]] = flow.dispatch.tensor.load %[[INPUT_BINDING]]
 //  CHECK-SAME:       offsets = [0, 0, 0, 0], sizes = [%[[TILED_D0]], %[[TILED_D1]], 8, 4], strides = [1, 1, 1, 1]
 //       CHECK:   %[[EMPTY:.+]] = tensor.empty(%[[D0]], %[[D1]])
-//       CHECK:   %[[UNPACK:.+]] = tensor.unpack %[[INPUT]]
+//       CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[INPUT]]
 //  CHECK-SAME:       inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %[[EMPTY]]
 //   CHECK-DAG:   flow.dispatch.tensor.store %[[UNPACK]], %[[OUTPUT_BINDING]]
 
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenTypes.h b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenTypes.h
index 13a393d33914..55945f2329ca 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenTypes.h
+++ b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenTypes.h
@@ -84,7 +84,7 @@ struct TileSwizzle {
 
 /// Container of information needed to materialize the layout transformations.
 struct MaterializeEncodingInfo {
-  // The next 3 fields are used to create a `tensor.pack` or `tensor.unpack` op,
+  // The next 3 fields are used to create a `linalg.pack` or `linalg.unpack` op,
   // changing the overall layout between row-major and tiled (where each tile is
   // row-major).
   SmallVector<int64_t> innerDimsPos;
diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
index 3dc8e36d3b4b..6fbc9c45ff8a 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
@@ -364,7 +364,7 @@ template <typename OpTy>
 static FailureOr<std::pair<Value, Value>>
 getSourceAndDestFromPackUnPackOp(RewriterBase &rewriter, OpTy op,
                                  const BufferizationOptions &options) {
-  static_assert(llvm::is_one_of<OpTy, tensor::PackOp, tensor::UnPackOp>::value);
+  static_assert(llvm::is_one_of<OpTy, linalg::PackOp, linalg::UnPackOp>::value);
   Value source;
   auto maybeBuffer = getBuffer(rewriter, op.getSource(), options);
   if (failed(maybeBuffer))
@@ -385,7 +385,7 @@ getSourceAndDestFromPackUnPackOp(RewriterBase &rewriter, OpTy op,
   return std::make_pair(source, dest);
 }
 
-static LogicalResult bufferizePackOp(RewriterBase &rewriter, tensor::PackOp op,
+static LogicalResult bufferizePackOp(RewriterBase &rewriter, linalg::PackOp op,
                                      const BufferizationOptions &options) {
   // Take a guard before anything else.
   OpBuilder::InsertionGuard g(rewriter);
@@ -410,7 +410,7 @@ static LogicalResult bufferizePackOp(RewriterBase &rewriter, tensor::PackOp op,
 }
 
 static LogicalResult bufferizeUnPackOp(RewriterBase &rewriter,
-                                       tensor::UnPackOp op,
+                                       linalg::UnPackOp op,
                                        const BufferizationOptions &options) {
   // Take a guard before anything else.
   OpBuilder::InsertionGuard g(rewriter);
@@ -489,9 +489,9 @@ struct PackUnPackOpInterface
   LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
                           const BufferizationOptions &options) const {
     return TypeSwitch<Operation *, LogicalResult>(op)
-        .template Case<tensor::PackOp>(
+        .template Case<linalg::PackOp>(
             [&](auto pack) { return bufferizePackOp(rewriter, pack, options); })
-        .template Case<tensor::UnPackOp>([&](auto unpack) {
+        .template Case<linalg::UnPackOp>([&](auto unpack) {
           return bufferizeUnPackOp(rewriter, unpack, options);
         })
         .Default([](auto) { return failure(); });
@@ -643,10 +643,11 @@ void registerBufferizationInterfaces(DialectRegistry &registry) {
     IREE::LinalgExt::AttentionOp::attachInterface<
         LinalgExtOpInterface<IREE::LinalgExt::AttentionOp>>(*ctx);
   });
-  registry.addExtension(+[](MLIRContext *ctx, tensor::TensorDialect *dialect) {
-    tensor::PackOp::attachInterface<PackUnPackOpInterface<tensor::PackOp>>(
+  registry.insert<linalg::LinalgDialect>();
+  registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) {
+    linalg::PackOp::attachInterface<PackUnPackOpInterface<linalg::PackOp>>(
         *ctx);
-    tensor::UnPackOp::attachInterface<PackUnPackOpInterface<tensor::UnPackOp>>(
+    linalg::UnPackOp::attachInterface<PackUnPackOpInterface<linalg::UnPackOp>>(
         *ctx);
   });
 }
diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.cpp
index f669b5e0e17b..6264b843bc22 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.cpp
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/PartitionableLoopsInterface.cpp
@@ -227,6 +227,10 @@ void registerPartitionableLoopsInterfaceModels(DialectRegistry &registry) {
 
 #define GET_OP_LIST
   registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) {
+    linalg::PackOp::attachInterface<
+        OuterParallelAsPartitionableLoops<linalg::PackOp>>(*ctx);
+    linalg::UnPackOp::attachInterface<
+        OuterParallelAsPartitionableLoops<linalg::UnPackOp>>(*ctx);
     registerInterfaceForLinalgOps<
 #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
         >(ctx);
@@ -263,12 +267,8 @@ void registerPartitionableLoopsInterfaceModels(DialectRegistry &registry) {
         *ctx);
   });
   registry.addExtension(+[](MLIRContext *ctx, tensor::TensorDialect *dialect) {
-    tensor::PackOp::attachInterface<
-        OuterParallelAsPartitionableLoops<tensor::PackOp>>(*ctx);
     tensor::PadOp::attachInterface<
         OuterParallelAsPartitionableLoops<tensor::PadOp>>(*ctx);
-    tensor::UnPackOp::attachInterface<
-        OuterParallelAsPartitionableLoops<tensor::UnPackOp>>(*ctx);
   });
   registry.addExtension(
       +[](MLIRContext *ctx, IREE::GPU::IREEGPUDialect *dialect) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index 0c52f1ae2b06..0dbc504d7de3 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -1720,7 +1720,7 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn,
       DispatchLoweringPassPipeline::Mmt4dTilingExpert);
 }
 
-static bool isPackMatmulLHS(tensor::PackOp op) {
+static bool isPackMatmulLHS(linalg::PackOp op) {
   // linalg.batch_matmul LHS shape
   if (op.getSourceRank() == 3 && op.getInnerDimsPos().size() == 2 &&
       op.getInnerDimsPos()[0] == 1 && op.getInnerDimsPos()[1] == 2) {
@@ -1735,7 +1735,7 @@ static bool isPackMatmulLHS(tensor::PackOp op) {
 /// configurations and target CPU features.
 static SmallVector<int64_t>
 getPackVectorTileSizes(mlir::FunctionOpInterface entryPointFn,
-                       tensor::PackOp op) {
+                       linalg::PackOp op) {
   SmallVector<int64_t> tileSizes(op.getSourceRank(), 1);
   auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(entryPointFn);
   int64_t vectorSize = getVectorSize(entryPointFn, op.getSourceType());
@@ -1755,7 +1755,7 @@ getPackVectorTileSizes(mlir::FunctionOpInterface entryPointFn,
 }
 
 static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn,
-                                   tensor::PackOp op) {
+                                   linalg::PackOp op) {
   assert(!getLoweringConfig(op) && "expected lowering_config is not set");
 
   int srcRank = op.getSourceRank();
@@ -1803,7 +1803,7 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn,
 }
 
 static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn,
-                                   tensor::UnPackOp op) {
+                                   linalg::UnPackOp op) {
   DistributionHeuristicConfig distConfig;
   distConfig.maxTileSizes.resize(op.getDestRank(), clDefaultDistTileSize);
   SmallVector<int64_t> distTileSizes =
@@ -2629,7 +2629,7 @@ setRootConfigImpl(mlir::FunctionOpInterface entryPointFn, Operation *op,
                                                   initCPULaunchConfig);
         })
         .Case<IREE::LinalgExt::AttentionOp, IREE::LinalgExt::FftOp,
-              tensor::PackOp, tensor::PadOp, tensor::UnPackOp, linalg::Mmt4DOp,
+              linalg::PackOp, tensor::PadOp, linalg::UnPackOp, linalg::Mmt4DOp,
               linalg::BatchMmt4DOp>(
             [&](auto op) { return setRootConfig(entryPointFn, op); })
         .Case<IREE::LinalgExt::WinogradFilterTransformOp,
@@ -2668,7 +2668,7 @@ setRootConfigImpl(mlir::FunctionOpInterface entryPointFn, Operation *op,
 /// pack op wants to tile-and-fuse it.
 static LogicalResult
 adjustTileSizesForPackOp(mlir::FunctionOpInterface entryPointFn,
-                         tensor::PackOp packOp,
+                         linalg::PackOp packOp,
                          SmallVector<int64_t> &distTileSizes,
                          SmallVector<int64_t> &parallelVecTileSizes) {
 
@@ -2716,9 +2716,9 @@ adjustTileSizesForPackOp(mlir::FunctionOpInterface entryPointFn,
 }
 
 /// Adjusts the tile sizes (carried by `rootOp`) to be aligned with
-/// tensor.unpack inner tile sizes, if there are tensor.unpack producers. If the
+/// linalg.unpack inner tile sizes, if there are linalg.unpack producers. If the
 /// tile sizes are not aligned, a stack buffer is needed because of
-/// tensor.unpack tiling implementations.
+/// linalg.unpack tiling implementations.
 static LogicalResult
 adjustTileSizesForUnPackOp(mlir::FunctionOpInterface entryPointFn,
                            Operation *rootOp) {
@@ -2733,7 +2733,7 @@ adjustTileSizesForUnPackOp(mlir::FunctionOpInterface entryPointFn,
   bool foundUnPackOp = false;
   SmallVector<int64_t> alignedSizes(linalgOp.getNumLoops(), 1);
   for (OpOperand *opOperand : linalgOp.getDpsInputOperands()) {
-    auto unpackOp = opOperand->get().getDefiningOp<tensor::UnPackOp>();
+    auto unpackOp = opOperand->get().getDefiningOp<linalg::UnPackOp>();
     if (!unpackOp)
       continue;
 
@@ -2874,7 +2874,7 @@ adjustTileSizesForGenericOp(mlir::FunctionOpInterface entryPointFn,
 ///   ^bb0(%in: f32, %in_2: f32, %out: f32):
 ///     ...
 ///   } -> tensor<384x1024xf32>
-///   %pack = tensor.pack %13
+///   %pack = linalg.pack %13
 ///     inner_dims_pos = [0, 1]
 ///     inner_tiles = [16, 1]
 ///     into %14 : tensor<384x1024xf32> -> tensor<24x1024x16x1xf32>
@@ -2944,7 +2944,7 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn,
   // Given there are 3 generic ops in the dispatch:
   // %rootOp = linalg.generic {iterator_types = ["reduction", "parallel"]} ...
   // %2 = linalg.generic {iterator_types = ["parallel", "parallel"]}
-  // %3 = tensor.pack %2
+  // %3 = linalg.pack %2
   // Assume the distribution and parallel vector tile sizes from %rootOp is:
   // [[X1, 0], [X2, 0]]
   // Then the generic op %2 set the missing parallel vector tile sizes on its
@@ -2970,7 +2970,7 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn,
     if (op == rootOperation)
       continue;
 
-    if (auto packOp = dyn_cast<tensor::PackOp>(op)) {
+    if (auto packOp = dyn_cast<linalg::PackOp>(op)) {
       if (failed(adjustTileSizesForPackOp(entryPointFn, packOp, distTileSizes,
                                           parallelVecTileSizes))) {
         return failure();
@@ -3065,7 +3065,7 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn,
       scalableTileFlagsList.push_back(commonVecScalableTileFlags);
       bool setUpOK =
           TypeSwitch<Operation *, bool>(op)
-              .Case<tensor::PackOp>([&](auto packOp) {
+              .Case<linalg::PackOp>([&](auto packOp) {
                 for (auto flags :
                      rootLoweringConfig.getScalableTileFlagVals()) {
                   // TODO: Handle scalable flags
@@ -3119,7 +3119,7 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn,
               });
 
       // TODO: (awarzynski) This is effectively tracking the case of
-      // tensor.pack + scalable flags, which is not support ATM (see TODO
+      // linalg.pack + scalable flags, which is not support ATM (see TODO
       // above). Remove once that's implemented.
       if (!setUpOK)
         return failure();
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp
index 6f15257417ae..6fe11c03ae4f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUPeel.cpp
@@ -36,7 +36,7 @@ void collectLoopsToPeel(Operation *op,
                               .Case<linalg::LinalgOp>([](auto linalgOp) {
                                 return linalgOp.getNumLoops();
                               })
-                              .Case<tensor::PackOp>([](auto packOp) {
+                              .Case<linalg::PackOp>([](auto packOp) {
                                 return packOp.getSourceRank();
                               })
                               .Default([](auto) { return 0; });
@@ -66,7 +66,7 @@ void LLVMCPUPeelPass::runOnOperation() {
 
   llvm::SmallSetVector<scf::ForOp, 8> uniqueLoopsToPeel;
   funcOp.walk([&](Operation *op) {
-    if (isa<linalg::LinalgOp, tensor::PackOp>(op)) {
+    if (isa<linalg::LinalgOp, linalg::PackOp>(op)) {
       LLVM_DEBUG(llvm::dbgs() << "Gather loops to peel from candidate op:\n"
                               << *op << "\n");
       collectLoopsToPeel(op, uniqueLoopsToPeel);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir
index a44a37000a17..3ec520150259 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/peel.mlir
@@ -96,7 +96,7 @@ module {
         %3 = affine.apply #map1(%arg2)
         %extracted_slice = tensor.extract_slice %arg0[%3, %arg4] [16, %2] [1, 1] : tensor<?x?xf32> to tensor<16x?xf32>
         %extracted_slice_1 = tensor.extract_slice %arg5[%arg2, %arg4, 0, 0] [1, %2, 16, 1] [1, 1, 1, 1] : tensor<?x?x16x1xf32> to tensor<1x?x16x1xf32>
-        %pack = tensor.pack %extracted_slice inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %extracted_slice_1 {lowering_config = #config} : tensor<16x?xf32> -> tensor<1x?x16x1xf32>
+        %pack = linalg.pack %extracted_slice inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %extracted_slice_1 {lowering_config = #config} : tensor<16x?xf32> -> tensor<1x?x16x1xf32>
         %inserted_slice = tensor.insert_slice %pack into %arg5[%arg2, %arg4, 0, 0] [1, %2, 16, 1] [1, 1, 1, 1] : tensor<1x?x16x1xf32> into tensor<?x?x16x1xf32>
         scf.yield %inserted_slice : tensor<?x?x16x1xf32>
       }
@@ -108,6 +108,6 @@ module {
 // CHECK-LABEL: func.func @peel_pack
 // CHECK:         scf.for
 // CHECK:           scf.for
-// CHECK:             tensor.pack {{.*}} : tensor<16x16xf32> -> tensor<1x16x16x1xf32>
+// CHECK:             linalg.pack {{.*}} : tensor<16x16xf32> -> tensor<1x16x16x1xf32>
 // CHECK:           scf.for
-// CHECK:             tensor.pack {{.*}} : tensor<16x?xf32> -> tensor<1x?x16x1xf32>
+// CHECK:             linalg.pack {{.*}} : tensor<16x?xf32> -> tensor<1x?x16x1xf32>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir
index f8827d14338c..823928b08bec 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_pack_unpack_tests.mlir
@@ -27,7 +27,7 @@ module {
       %10 = arith.maximumf %9, %cst_0 : f32
       linalg.yield %10 : f32
     } -> tensor<384x512xf32>
-    %pack = tensor.pack %7 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %5 : tensor<384x512xf32> -> tensor<24x512x16x1xf32>
+    %pack = linalg.pack %7 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %5 : tensor<384x512xf32> -> tensor<24x512x16x1xf32>
     flow.dispatch.tensor.store %pack, %2, offsets = [0, 0, 0, 0], sizes = [24, 512, 16, 1], strides = [1, 1, 1, 1] : tensor<24x512x16x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<24x512x16x1xf32>>
     return
   }
@@ -64,7 +64,7 @@ module {
     %3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [24, 32, 16, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<24x32x16x16xf32>> -> tensor<24x32x16x16xf32>
     %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor<readonly:tensor<512xf32>> -> tensor<512xf32>
     %5 = tensor.empty() : tensor<384x512xf32>
-    %unpack = tensor.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %5 : tensor<24x32x16x16xf32> -> tensor<384x512xf32>
+    %unpack = linalg.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %5 : tensor<24x32x16x16xf32> -> tensor<384x512xf32>
     %6 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%4, %unpack : tensor<512xf32>, tensor<384x512xf32>) outs(%5 : tensor<384x512xf32>) {
     ^bb0(%in: f32, %in_1: f32, %out: f32):
       %7 = arith.addf %in, %in_1 : f32
@@ -101,7 +101,7 @@ module {
     %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<24x512x16x1xf32>>
     %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [383, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<383x512xf32>> -> tensor<383x512xf32>
     %3 = tensor.empty() : tensor<24x512x16x1xf32>
-    %pack = tensor.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<383x512xf32> -> tensor<24x512x16x1xf32>
+    %pack = linalg.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<383x512xf32> -> tensor<24x512x16x1xf32>
     flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [24, 512, 16, 1], strides = [1, 1, 1, 1] : tensor<24x512x16x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<24x512x16x1xf32>>
     return
   }
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir
index 6936adef7e01..e8338e89e770 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/pipeline_tests.mlir
@@ -407,7 +407,7 @@ func.func @fuse_inputs_reduction() attributes {hal.executable.target = #executab
   %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [64, 1, 1, 16, 16], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x1x1x16x16xf32>> -> tensor<64x1x1x16x16xf32>
   %3 = tensor.empty() : tensor<64x16x16xf32>
   %4 = linalg.fill ins(%cst : f32) outs(%3 : tensor<64x16x16xf32>) -> tensor<64x16x16xf32>
-  %unpack = tensor.unpack %2 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %3 : tensor<64x1x1x16x16xf32> -> tensor<64x16x16xf32>
+  %unpack = linalg.unpack %2 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 16] into %3 : tensor<64x1x1x16x16xf32> -> tensor<64x16x16xf32>
   %5 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%unpack : tensor<64x16x16xf32>) outs(%4 : tensor<64x16x16xf32>) {
   ^bb0(%in: f32, %out: f32):
     %6 = arith.addf %out, %in : f32
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir
index cba1af4fec2c..faeb04a02d9a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_aarch64_lowering_strategy.mlir
@@ -255,7 +255,7 @@ func.func @pack() attributes {hal.executable.target = #executable_target_system_
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x48x8x1xf32>>
   %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [20, 40], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<20x40xf32>> -> tensor<20x40xf32>
   %3 = tensor.empty() : tensor<4x48x8x1xf32>
-  %pack = tensor.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %3 : tensor<20x40xf32> -> tensor<4x48x8x1xf32>
+  %pack = linalg.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %3 : tensor<20x40xf32> -> tensor<4x48x8x1xf32>
   flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [4, 48, 8, 1], strides = [1, 1, 1, 1] : tensor<4x48x8x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<4x48x8x1xf32>>
   return
 }
@@ -263,7 +263,7 @@ func.func @pack() attributes {hal.executable.target = #executable_target_system_
 //   CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDataTiling, {enable_decomposition}>
 //       CHECK: func.func @pack()
 //  CHECK-SAME:     translation_info = #[[TRANSLATION]]
-//       CHECK:   tensor.pack
+//       CHECK:   linalg.pack
 //  CHECK-SAME:       lowering_config = #[[CONFIG]]
 
 // -----
@@ -288,7 +288,7 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl
   %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
   %10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x32x16xi32>>{%4, %5} -> tensor<?x?x32x16xi32>
   %11 = tensor.empty(%6, %7) : tensor<?x?xi32>
-  %unpack = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 : tensor<?x?x32x16xi32> -> tensor<?x?xi32>
+  %unpack = linalg.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 : tensor<?x?x32x16xi32> -> tensor<?x?xi32>
   flow.dispatch.tensor.store %unpack, %9, offsets = [0, 0], sizes = [%6, %7], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
   return
 }
@@ -296,7 +296,7 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl
 //   CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDataTiling, {enable_decomposition}>
 //       CHECK: func.func @unpack_outer_dynamic()
 //  CHECK-SAME:     translation_info = #[[TRANSLATION]]
-//       CHECK:   tensor.unpack
+//       CHECK:   linalg.unpack
 //  CHECK-SAME:       lowering_config = #[[CONFIG]]
 
 // -----
@@ -322,7 +322,7 @@ func.func @unpack_fully_dynamic() attributes {hal.executable.target = #executabl
   %13 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%8, %9}
   %14 = flow.dispatch.tensor.load %12, offsets = [0, 0, 0, 0], sizes = [%6, %7, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x?x?xi32>>{%6, %7, %10, %11} -> tensor<?x?x?x?xi32>
   %15 = tensor.empty(%8, %9) : tensor<?x?xi32>
-  %unpack = tensor.unpack %14 inner_dims_pos = [0, 1] inner_tiles = [%10, %11] into %15 : tensor<?x?x?x?xi32> -> tensor<?x?xi32>
+  %unpack = linalg.unpack %14 inner_dims_pos = [0, 1] inner_tiles = [%10, %11] into %15 : tensor<?x?x?x?xi32> -> tensor<?x?xi32>
   flow.dispatch.tensor.store %unpack, %13, offsets = [0, 0], sizes = [%8, %9], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%8, %9}
   return
 }
@@ -330,7 +330,7 @@ func.func @unpack_fully_dynamic() attributes {hal.executable.target = #executabl
 //   CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDataTiling>
 //       CHECK: func.func @unpack_fully_dynamic()
 //  CHECK-SAME:     translation_info = #[[TRANSLATION]]
-//       CHECK:   tensor.unpack
+//       CHECK:   linalg.unpack
 //  CHECK-SAME:       lowering_config = #[[CONFIG]]
 
 // -----
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
index fff8747cf504..3c6301d3dcaf 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
@@ -1064,7 +1064,7 @@ func.func @pack() attributes {hal.executable.target = #executable_target_embedde
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x48x16x1xf32>>
   %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [20, 40], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<20x40xf32>> -> tensor<20x40xf32>
   %3 = tensor.empty() : tensor<2x48x16x1xf32>
-  %pack = tensor.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<20x40xf32> -> tensor<2x48x16x1xf32>
+  %pack = linalg.pack %2 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<20x40xf32> -> tensor<2x48x16x1xf32>
   flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [2, 48, 16, 1], strides = [1, 1, 1, 1] : tensor<2x48x16x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x48x16x1xf32>>
   return
 }
@@ -1073,7 +1073,7 @@ func.func @pack() attributes {hal.executable.target = #executable_target_embedde
 //  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDataTiling>
 //      CHECK: func.func @pack()
 // CHECK-SAME:     translation_info = #[[TRANSLATION]]
-//      CHECK:   tensor.pack
+//      CHECK:   linalg.pack
 // CHECK-SAME:       lowering_config = #[[CONFIG]]
 
 // -----
@@ -1090,7 +1090,7 @@ func.func @pack_f16() attributes {hal.executable.target = #executable_target_emb
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x48x16x1xf16>>
   %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [20, 40], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<20x40xf16>> -> tensor<20x40xf16>
   %3 = tensor.empty() : tensor<2x48x16x1xf16>
-  %pack = tensor.pack %2 padding_value(%cst : f16) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<20x40xf16> -> tensor<2x48x16x1xf16>
+  %pack = linalg.pack %2 padding_value(%cst : f16) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %3 : tensor<20x40xf16> -> tensor<2x48x16x1xf16>
   flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [2, 48, 16, 1], strides = [1, 1, 1, 1] : tensor<2x48x16x1xf16> -> !flow.dispatch.tensor<writeonly:tensor<2x48x16x1xf16>>
   return
 }
@@ -1099,7 +1099,7 @@ func.func @pack_f16() attributes {hal.executable.target = #executable_target_emb
 //  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDataTiling>
 //      CHECK: func.func @pack_f16()
 // CHECK-SAME:     translation_info = #[[TRANSLATION]]
-//      CHECK:   tensor.pack
+//      CHECK:   linalg.pack
 // CHECK-SAME:       lowering_config = #[[CONFIG]]
 
 // -----
@@ -1115,7 +1115,7 @@ func.func @pack_many_elements() attributes {hal.executable.target = #executable_
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<31250x1200x16x1xf32>>
   %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [1200, 500000], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<1200x500000xf32>> -> tensor<1200x500000xf32>
   %3 = tensor.empty() : tensor<31250x1200x16x1xf32>
-  %pack = tensor.pack %2 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %3 : tensor<1200x500000xf32> -> tensor<31250x1200x16x1xf32>
+  %pack = linalg.pack %2 outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %3 : tensor<1200x500000xf32> -> tensor<31250x1200x16x1xf32>
   flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [31250, 1200, 16, 1], strides = [1, 1, 1, 1] : tensor<31250x1200x16x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<31250x1200x16x1xf32>>
   return
 }
@@ -1124,7 +1124,7 @@ func.func @pack_many_elements() attributes {hal.executable.target = #executable_
 //  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDataTiling>
 //      CHECK: func.func @pack_many_elements()
 // CHECK-SAME:     translation_info = #[[TRANSLATION]]
-//      CHECK:   tensor.pack
+//      CHECK:   linalg.pack
 // CHECK-SAME:       lowering_config = #[[CONFIG]]
 
 // -----
@@ -1148,7 +1148,7 @@ func.func @unpack_generic_pack(%arg0: !stream.binding {stream.alignment = 64 : i
   %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor<readonly:tensor<512xf32>> -> tensor<512xf32>
   %5 = tensor.empty() : tensor<24x512x16x1xf32>
   %6 = tensor.empty() : tensor<384x512xf32>
-  %unpack = tensor.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %6 : tensor<24x32x16x16xf32> -> tensor<384x512xf32>
+  %unpack = linalg.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %6 : tensor<24x32x16x16xf32> -> tensor<384x512xf32>
   %7 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%4, %unpack : tensor<512xf32>, tensor<384x512xf32>) outs(%6 : tensor<384x512xf32>) {
   ^bb0(%in: f32, %in_1: f32, %out: f32):
     %8 = arith.addf %in, %in_1 : f32
@@ -1156,7 +1156,7 @@ func.func @unpack_generic_pack(%arg0: !stream.binding {stream.alignment = 64 : i
     %10 = arith.maximumf %9, %cst_0 : f32
     linalg.yield %10 : f32
   } -> tensor<384x512xf32>
-  %pack = tensor.pack %7 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %5 : tensor<384x512xf32> -> tensor<24x512x16x1xf32>
+  %pack = linalg.pack %7 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %5 : tensor<384x512xf32> -> tensor<24x512x16x1xf32>
   flow.dispatch.tensor.store %pack, %2, offsets = [0, 0, 0, 0], sizes = [24, 512, 16, 1], strides = [1, 1, 1, 1] : tensor<24x512x16x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<24x512x16x1xf32>>
   return
 }
@@ -1166,11 +1166,11 @@ func.func @unpack_generic_pack(%arg0: !stream.binding {stream.alignment = 64 : i
 //  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDoubleTilingExpert>
 //      CHECK: func.func @unpack_generic_pack(
 // CHECK-SAME:     translation_info = #[[TRANSLATION]]
-//      CHECK:   tensor.unpack
+//      CHECK:   linalg.unpack
 // CHECK-SAME:       lowering_config = #[[CONFIG2]]
 //      CHECK:   linalg.generic
 // CHECK-SAME:       lowering_config = #[[CONFIG2]]
-//      CHECK:   tensor.pack
+//      CHECK:   linalg.pack
 // CHECK-SAME:       lowering_config = #[[CONFIG1]]
 
 // -----
@@ -1193,7 +1193,7 @@ func.func @elem_pack() attributes {hal.executable.target = #executable_target_em
     linalg.yield %6 : f32
   } -> tensor<128x384xf32>
   %5 = tensor.empty() : tensor<16x384x8x1xf32>
-  %pack = tensor.pack %4 inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %5 : tensor<128x384xf32> -> tensor<16x384x8x1xf32>
+  %pack = linalg.pack %4 inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %5 : tensor<128x384xf32> -> tensor<16x384x8x1xf32>
   flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [16, 384, 8, 1], strides = [1, 1, 1, 1] : tensor<16x384x8x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<16x384x8x1xf32>>
   return
 }
@@ -1205,7 +1205,7 @@ func.func @elem_pack() attributes {hal.executable.target = #executable_target_em
 // CHECK-SAME:     translation_info = #[[TRANSLATION]]
 //      CHECK:   linalg.generic
 // CHECK-SAME:       lowering_config = #[[CONFIG1]]
-//      CHECK:   tensor.pack
+//      CHECK:   linalg.pack
 // CHECK-SAME:       lowering_config = #[[CONFIG2]]
 
 // -----
@@ -1230,7 +1230,7 @@ func.func @transpose_pack() attributes {hal.executable.target = #executable_targ
     linalg.yield %in : f32
   } -> tensor<768x30522xf32>
   %5 = tensor.empty() : tensor<1908x768x16x1xf32>
-  %pack = tensor.pack %4 padding_value(%cst : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %5 : tensor<768x30522xf32> -> tensor<1908x768x16x1xf32>
+  %pack = linalg.pack %4 padding_value(%cst : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %5 : tensor<768x30522xf32> -> tensor<1908x768x16x1xf32>
   flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [1908, 768, 16, 1], strides = [1, 1, 1, 1] : tensor<1908x768x16x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<1908x768x16x1xf32>>
   return
 }
@@ -1242,7 +1242,7 @@ func.func @transpose_pack() attributes {hal.executable.target = #executable_targ
 // CHECK-SAME:     translation_info = #[[TRANSLATION]]
 //      CHECK:   linalg.generic
 // CHECK-SAME:       lowering_config = #[[CONFIG1]]
-//      CHECK:   tensor.pack
+//      CHECK:   linalg.pack
 // CHECK-SAME:       lowering_config = #[[CONFIG2]]
 
 // -----
@@ -1295,7 +1295,7 @@ func.func @reduction_broadcast_pack() attributes {hal.executable.target = #execu
     %22 = arith.addf %21, %20 : f32
     linalg.yield %22 : f32
   } -> tensor<384x1024xf32>
-  %pack = tensor.pack %14 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %9 : tensor<384x1024xf32> -> tensor<24x1024x16x1xf32>
+  %pack = linalg.pack %14 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %9 : tensor<384x1024xf32> -> tensor<24x1024x16x1xf32>
   flow.dispatch.tensor.store %pack, %4, offsets = [0, 0, 0, 0], sizes = [24, 1024, 16, 1], strides = [1, 1, 1, 1] : tensor<24x1024x16x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<24x1024x16x1xf32>>
   return
 }
@@ -1313,7 +1313,7 @@ func.func @reduction_broadcast_pack() attributes {hal.executable.target = #execu
 // CHECK-SAME:       lowering_config = #[[CONFIG2]]
 //      CHECK:   linalg.generic
 // CHECK-SAME:       lowering_config = #[[CONFIG3]]
-//      CHECK:   tensor.pack
+//      CHECK:   linalg.pack
 // CHECK-SAME:       lowering_config = #[[CONFIG4]]
 
 // -----
@@ -1345,7 +1345,7 @@ func.func @reduction_pack() attributes {hal.executable.target = #executable_targ
     %12 = arith.addf %out, %11 : f32
     linalg.yield %12 : f32
   } -> tensor<384x1024xf32>
-  %pack = tensor.pack %9 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %5 : tensor<384x1024xf32> -> tensor<1024x24x16x1xf32>
+  %pack = linalg.pack %9 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %5 : tensor<384x1024xf32> -> tensor<1024x24x16x1xf32>
   flow.dispatch.tensor.store %pack, %2, offsets = [0, 0, 0, 0], sizes = [1024, 24, 16, 1], strides = [1, 1, 1, 1] : tensor<1024x24x16x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<1024x24x16x1xf32>>
   return
 }
@@ -1360,7 +1360,7 @@ func.func @reduction_pack() attributes {hal.executable.target = #executable_targ
 // CHECK-SAME:       lowering_config = #[[CONFIG1]]
 //      CHECK:   linalg.generic
 // CHECK-SAME:       lowering_config = #[[CONFIG2]]
-//      CHECK:   tensor.pack
+//      CHECK:   linalg.pack
 // CHECK-SAME:       lowering_config = #[[CONFIG3]]
 
 // -----
@@ -1377,7 +1377,7 @@ func.func @unpack_static() attributes {hal.executable.target = #executable_targe
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<1024x4096xf32>>
   %2 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [64, 256, 16, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<64x256x16x16xf32>> -> tensor<64x256x16x16xf32>
   %3 = tensor.empty() : tensor<1024x4096xf32>
-  %unpack = tensor.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<64x256x16x16xf32> -> tensor<1024x4096xf32>
+  %unpack = linalg.unpack %2 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<64x256x16x16xf32> -> tensor<1024x4096xf32>
   flow.dispatch.tensor.store %unpack, %1, offsets = [0, 0], sizes = [1024, 4096], strides = [1, 1] : tensor<1024x4096xf32> -> !flow.dispatch.tensor<writeonly:tensor<1024x4096xf32>>
   return
 }
@@ -1386,7 +1386,7 @@ func.func @unpack_static() attributes {hal.executable.target = #executable_targe
 //  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDataTiling>
 //      CHECK: func.func @unpack_static()
 // CHECK-SAME:     translation_info = #[[TRANSLATION]]
-//      CHECK:   tensor.unpack
+//      CHECK:   linalg.unpack
 // CHECK-SAME:       lowering_config = #[[CONFIG]]
 
 // -----
@@ -1409,7 +1409,7 @@ func.func @unpack_elem() attributes {hal.executable.target = #executable_target_
   %4 = flow.dispatch.tensor.load %1, offsets = [0], sizes = [128], strides = [1] : !flow.dispatch.tensor<readonly:tensor<128xf32>> -> tensor<128xf32>
   %5 = tensor.empty() : tensor<128x384xf32>
   %6 = tensor.empty() : tensor<384x128xf32>
-  %unpack = tensor.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %6 : tensor<48x64x8x2xf32> -> tensor<384x128xf32>
+  %unpack = linalg.unpack %3 inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %6 : tensor<48x64x8x2xf32> -> tensor<384x128xf32>
   %7 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel"]} ins(%4, %unpack : tensor<128xf32>, tensor<384x128xf32>) outs(%5 : tensor<128x384xf32>) {
   ^bb0(%in: f32, %in_0: f32, %out: f32):
     %8 = arith.addf %in, %in_0 : f32
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile-root-fuse-consumer-producer.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile-root-fuse-consumer-producer.mlir
index 0433b677f24b..f0ca16903d17 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile-root-fuse-consumer-producer.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/tile-root-fuse-consumer-producer.mlir
@@ -66,7 +66,7 @@ func.func @quantized_matmul(%arg0: tensor<2x4x128x16x1xi8>, %arg1: tensor<2x4x16
   %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<2x4x688x16x16xf32>) -> tensor<2x4x688x16x16xf32>
   %6 = linalg.batch_mmt4d {lowering_config = #config2} ins(%1, %3 : tensor<2x4x128x16x1xf32>, tensor<2x688x128x16x1xf32>) outs(%5 : tensor<2x4x688x16x16xf32>) -> tensor<2x4x688x16x16xf32>
   %7 = tensor.empty() : tensor<2x11008x64xf32>
-  %unpack = tensor.unpack %6 outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 16] into %7 : tensor<2x4x688x16x16xf32> -> tensor<2x11008x64xf32>
+  %unpack = linalg.unpack %6 outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 16] into %7 : tensor<2x4x688x16x16xf32> -> tensor<2x11008x64xf32>
   return %unpack : tensor<2x11008x64xf32>
 }
 //      CHECK: func.func @quantized_matmul(
@@ -76,7 +76,7 @@ func.func @quantized_matmul(%arg0: tensor<2x4x128x16x1xi8>, %arg1: tensor<2x4x16
 //      CHECK:       linalg.generic
 //      CHECK:       linalg.fill
 //      CHECK:       linalg.batch_mmt4d
-//      CHECK:       tensor.unpack
+//      CHECK:       linalg.unpack
 //      CHECK:   }
 
 
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
index a8ef7b779f56..c6e0a3c2d11a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
@@ -1637,7 +1637,7 @@ getDefaultWorkgroupTileSizesForPackUnPack(TilingInterface op,
 
 static LogicalResult setPackConfig(IREE::GPU::TargetAttr target,
                                    mlir::FunctionOpInterface entryPoint,
-                                   tensor::PackOp packOp) {
+                                   linalg::PackOp packOp) {
   SmallVector<int64_t> tileSizes = getDefaultWorkgroupTileSizesForPackUnPack(
       cast<TilingInterface>(packOp.getOperation()),
       target.getPreferredSubgroupSize());
@@ -2436,7 +2436,7 @@ static LogicalResult setRootConfig(IREE::GPU::TargetAttr target,
         LDBG("Winograd Config");
         return setWinogradOpConfig(target, entryPointFn, winogradOp);
       })
-      .Case<tensor::PackOp>([&](auto packOp) {
+      .Case<linalg::PackOp>([&](auto packOp) {
         LDBG("Pack Config");
         return setPackConfig(target, entryPointFn, packOp);
       })
@@ -2534,15 +2534,15 @@ LogicalResult initGPULaunchConfig(FunctionOpInterface funcOp) {
 
   Operation *rootOperation = nullptr;
 
-  // Find the root operation. linalg.generic, linalg.fill, tensor.pack,
-  // tensor.unpack, and scatter are not root operations if there are other
+  // Find the root operation. linalg.generic, linalg.fill, linalg.pack,
+  // linalg.unpack, and scatter are not root operations if there are other
   // compute operations present. Also, construct a set of generic ops that
   // are to be skipped. These generic ops that are used to compute scatter
   // indices are not root operations.
   llvm::SmallDenseSet<Operation *, 4> genericToSkip;
   for (Operation *op : llvm::reverse(computeOps)) {
     if (!isa<linalg::GenericOp, linalg::FillOp, IREE::LinalgExt::ScatterOp,
-             tensor::PackOp, tensor::UnPackOp>(op)) {
+             linalg::PackOp, linalg::UnPackOp>(op)) {
       rootOperation = op;
       break;
     }
@@ -2583,7 +2583,7 @@ LogicalResult initGPULaunchConfig(FunctionOpInterface funcOp) {
   // Pack and unpack ops take priority over scatter and fill ops as the root op.
   if (!rootOperation) {
     for (Operation *op : llvm::reverse(computeOps)) {
-      if (isa<tensor::PackOp, tensor::UnPackOp>(op)) {
+      if (isa<linalg::PackOp, linalg::UnPackOp>(op)) {
         rootOperation = op;
         break;
       }
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp
index bc9465ce4d31..ca2fbe1f057a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUTensorPad.cpp
@@ -49,8 +49,8 @@ getPaddedShapeFromTensorLoad(IREE::Flow::DispatchTensorLoadOp tensorLoad,
 }
 
 static FailureOr<Value> rewriteAsPaddedOp(IRRewriter &rewriter,
-                                          tensor::UnPackOp op,
-                                          tensor::UnPackOp &paddedOp) {
+                                          linalg::UnPackOp op,
+                                          linalg::UnPackOp &paddedOp) {
   Location loc = op.getLoc();
 
   // Set IP after op because we also take the dims of the original output.
@@ -68,7 +68,7 @@ static FailureOr<Value> rewriteAsPaddedOp(IRRewriter &rewriter,
     return failure();
   auto paddedShape = *maybePaddedShape;
 
-  // Pad to the shape that makes tensor.unpack ops produce full tiles.
+  // Pad to the shape that makes linalg.unpack ops produce full tiles.
   SmallVector<int64_t> innerTiles = op.getStaticTiles();
   ArrayRef<int64_t> dimPos = op.getInnerDimsPos();
   for (auto [pos, size] : llvm::zip_equal(dimPos, innerTiles)) {
@@ -85,7 +85,7 @@ static FailureOr<Value> rewriteAsPaddedOp(IRRewriter &rewriter,
 
   SmallVector<Value> paddedOperands = {op.getSource(), paddedValue};
   paddedOperands.append(op.getInnerTiles().begin(), op.getInnerTiles().end());
-  paddedOp = rewriter.create<tensor::UnPackOp>(
+  paddedOp = rewriter.create<linalg::UnPackOp>(
       loc, TypeRange{paddedValue.getType()}, paddedOperands, op->getAttrs());
 
   // Slice out the original shape from the padded result to pass on to
@@ -154,8 +154,8 @@ struct LLVMGPUTensorPadPass final
       rewriter.replaceOp(linalgOp, newResults);
     });
 
-    funcOp.walk([&](tensor::UnPackOp unpackOp) {
-      tensor::UnPackOp paddedOp;
+    funcOp.walk([&](linalg::UnPackOp unpackOp) {
+      linalg::UnPackOp paddedOp;
       FailureOr<Value> newResult =
           rewriteAsPaddedOp(rewriter, unpackOp, paddedOp);
       if (failed(newResult)) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
index 4783036a7172..53c9cd3ed7be 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
@@ -333,12 +333,12 @@ static void addGPUBufferizePasses(OpPassManager &funcPassManager) {
 /// op is a PackOp with a DispatchTensorLoadOp producer, or an UnPackOp with
 /// only DispatchTensorStoreOp consumers.
 LogicalResult isAtBoundary(Operation *op) {
-  if (isa<tensor::PackOp>(op)) {
+  if (isa<linalg::PackOp>(op)) {
     if (isa_and_nonnull<IREE::Flow::DispatchTensorLoadOp>(
             op->getOperand(0).getDefiningOp())) {
       return success();
     }
-  } else if (isa<tensor::UnPackOp>(op)) {
+  } else if (isa<linalg::UnPackOp>(op)) {
     if (llvm::all_of(op->getUsers(), [](Operation *user) {
           return isa<IREE::Flow::DispatchTensorStoreOp>(user);
         })) {
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir
index 5901999d5adb..e78b11ba20de 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir
@@ -525,7 +525,7 @@ func.func @scatter_as_root_op(%arg0: tensor<4x?xi64>,
 func.func @set_encoding_gpu(%0 : tensor<1234x567xi8>) -> tensor<10x9x8x4x4x4x2x8xi8> {
   %c0_i8 = arith.constant 0 : i8
   %22 = tensor.empty() : tensor<10x9x128x64xi8>
-  %pack = tensor.pack %0 padding_value(%c0_i8 : i8)
+  %pack = linalg.pack %0 padding_value(%c0_i8 : i8)
       outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [128, 64]
       into %22 : tensor<1234x567xi8> -> tensor<10x9x128x64xi8>
   %expanded = tensor.expand_shape %pack [[0], [1], [2, 3, 4], [5, 6, 7]]
@@ -565,7 +565,7 @@ func.func @unset_encoding_gpu(%arg0: tensor<10x5x4x8x2x4x16x4xi32>) -> tensor<12
   %collapsed = tensor.collapse_shape %transposed [[0], [1], [2, 3, 4], [5, 6, 7]]
       : tensor<10x5x4x8x4x4x16x2xi32> into tensor<10x5x128x128xi32>
   %1 = tensor.empty() : tensor<1234x567xi32>
-  %unpack = tensor.unpack %collapsed
+  %unpack = linalg.unpack %collapsed
       outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [128, 128]
       into %1 : tensor<10x5x128x128xi32> -> tensor<1234x567xi32>
   return %unpack : tensor<1234x567xi32>
@@ -591,7 +591,7 @@ func.func @pack_dynamic_producer(%arg0: tensor<?x?xi8>, %d0: index, %d1: index,
     linalg.yield %in : i8
   } -> tensor<?x?xi8>
   %init1 = tensor.empty(%d2, %d3) : tensor<?x?x32x32xi8>
-  %pack = tensor.pack %0 padding_value(%c0_i8 : i8)
+  %pack = linalg.pack %0 padding_value(%c0_i8 : i8)
       outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 32]
       into %init1 : tensor<?x?xi8> -> tensor<?x?x32x32xi8>
   return %pack : tensor<?x?x32x32xi8>
@@ -617,7 +617,7 @@ func.func @pack_full_tile(%arg0: tensor<32x32xi8>) -> tensor<1x1x32x32xi8> {
     linalg.yield %in : i8
   } -> tensor<32x32xi8>
   %init1 = tensor.empty() : tensor<1x1x32x32xi8>
-  %pack = tensor.pack %0 padding_value(%c0_i8 : i8)
+  %pack = linalg.pack %0 padding_value(%c0_i8 : i8)
       outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 32]
       into %init1 : tensor<32x32xi8> -> tensor<1x1x32x32xi8>
   return %pack : tensor<1x1x32x32xi8>
@@ -643,7 +643,7 @@ func.func @pack_dynamic_tile(%arg0: tensor<32x32xi8>, %d0: index, %d1: index, %t
     linalg.yield %in : i8
   } -> tensor<32x32xi8>
   %init1 = tensor.empty(%d0, %d1, %tile0, %tile1) : tensor<?x?x?x?xi8>
-  %pack = tensor.pack %0 padding_value(%c0_i8 : i8)
+  %pack = linalg.pack %0 padding_value(%c0_i8 : i8)
       outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [%tile0, %tile1]
       into %init1 : tensor<32x32xi8> -> tensor<?x?x?x?xi8>
   return %pack : tensor<?x?x?x?xi8>
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir
index 642c6ed1a179..efac8f5670ca 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir
@@ -455,7 +455,7 @@ func.func @dynamic_pack_2x2() {
   %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<?x?x2x2xi32>>{%6, %7}
   %10 = flow.dispatch.tensor.load %8, offsets = [0, 0], sizes = [%4, %5], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xi32>>{%4, %5} -> tensor<?x?xi32>
   %11 = tensor.empty(%6, %7) : tensor<?x?x2x2xi32>
-  %pack = tensor.pack %10 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %11 : tensor<?x?xi32> -> tensor<?x?x2x2xi32>
+  %pack = linalg.pack %10 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %11 : tensor<?x?xi32> -> tensor<?x?x2x2xi32>
   flow.dispatch.tensor.store %pack, %9, offsets = [0, 0, 0, 0], sizes = [%6, %7, 2, 2], strides = [1, 1, 1, 1] : tensor<?x?x2x2xi32> -> !flow.dispatch.tensor<writeonly:tensor<?x?x2x2xi32>>{%6, %7}
   return
 }
@@ -464,7 +464,7 @@ func.func @dynamic_pack_2x2() {
 //  SM80-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = LLVMGPUPackUnPack workgroup_size = [32, 1, 1]>
 //      SM80:   func.func @dynamic_pack_2x2()
 // SM80-SAME:     translation_info = #[[TRANSLATION]]
-//      SM80:     tensor.pack
+//      SM80:     linalg.pack
 // SM80-SAME:       lowering_config = #[[CONFIG]]
 
 // -----
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir
index bb7722c3086b..8cdd5223dbb8 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/pack_pipeline_test.mlir
@@ -10,7 +10,7 @@ func.func @static_pack() {
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<4x16x16x32xi32>>
   %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xi32>> -> tensor<128x256xi32>
   %3 = tensor.empty() : tensor<4x16x16x32xi32>
-  %pack = tensor.pack %2 inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %3 : tensor<128x256xi32> -> tensor<4x16x16x32xi32>
+  %pack = linalg.pack %2 inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %3 : tensor<128x256xi32> -> tensor<4x16x16x32xi32>
   flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [4, 16, 16, 32], strides = [1, 1, 1, 1] : tensor<4x16x16x32xi32> -> !flow.dispatch.tensor<writeonly:tensor<4x16x16x32xi32>>
   return
 }
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir
index 48fc842231ed..b49f474debab 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/tensor_pad.mlir
@@ -114,7 +114,7 @@ func.func @unpack_dynamic() {
       %c0_i32 = arith.constant 0 : i32
       %22 = arith.subi %c16, %12 : index
       %23 = arith.subi %c16, %15 : index
-      %unpack = tensor.unpack %21 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %16 : tensor<?x?x2x2xi32> -> tensor<?x?xi32>
+      %unpack = linalg.unpack %21 inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %16 : tensor<?x?x2x2xi32> -> tensor<?x?xi32>
       flow.dispatch.tensor.store %unpack, %9, offsets = [%arg0, %arg1], sizes = [%12, %15], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
     }
   }
@@ -124,6 +124,6 @@ func.func @unpack_dynamic() {
 // CHECK:         %[[DEST_BUF:.+]] = hal.interface.binding.subspan layout({{.+}}) binding(1)
 // CHECK:           %[[LOAD:.+]] = flow.dispatch.tensor.load %[[DEST_BUF]]
 // CHECK:           %[[PAD:.+]] = tensor.pad %[[LOAD]]
-// CHECK:           %[[UNPACK:.+]] = tensor.unpack {{.+}} into %[[PAD]]
+// CHECK:           %[[UNPACK:.+]] = linalg.unpack {{.+}} into %[[PAD]]
 // CHECK:           %[[SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
 // CHECK:           flow.dispatch.tensor.store %[[SLICE]], %[[DEST_BUF]]
diff --git a/compiler/src/iree/compiler/Codegen/Utils/CPUUtils.cpp b/compiler/src/iree/compiler/Codegen/Utils/CPUUtils.cpp
index eafb5cf83721..072f6afd0742 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/CPUUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Utils/CPUUtils.cpp
@@ -34,7 +34,7 @@ FailureOr<Operation *> getRootOperation(ArrayRef<Operation *> computeOps) {
     }
 
     if (isa<TilingInterface>(op) &&
-        !isa<tensor::PadOp, tensor::PackOp, tensor::UnPackOp>(op)) {
+        !isa<tensor::PadOp, linalg::PackOp, linalg::UnPackOp>(op)) {
       // All other operations that implement this interface are root ops.
       rootOperation = op;
       break;
@@ -54,7 +54,7 @@ FailureOr<Operation *> getRootOperation(ArrayRef<Operation *> computeOps) {
   if (!rootOperation) {
     // Check for pad/pack/unpack ops by themselves.
     for (auto op : llvm::reverse(computeOps)) {
-      if (isa<tensor::PadOp, tensor::PackOp, tensor::UnPackOp>(op)) {
+      if (isa<tensor::PadOp, linalg::PackOp, linalg::UnPackOp>(op)) {
         rootOperation = op;
         break;
       }
diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp
index 3f0c42b7a4de..f7287ed1d1d2 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp
@@ -1605,7 +1605,7 @@ inferSizesFromIR(linalg::LinalgOp linalgOp, std::optional<OpResult> opResult) {
   std::optional<vector::VscaleRange> vscaleRange;
   if (!opResult) {
     // Note: Inferring scalable sizes is not supported is `opResult` is set
-    // (which is used to compute sizes for tensor.pack/unpack).
+    // (which is used to compute sizes for linalg.pack/unpack).
     auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(linalgOp);
     vscaleRange = getDefaultVscaleRange(targetAttr);
   }
@@ -1672,7 +1672,7 @@ inferSizesFromIR(linalg::LinalgOp linalgOp, std::optional<OpResult> opResult) {
   return result;
 }
 
-std::optional<VectorizationTileSizes> inferSizesFromIR(tensor::PackOp op) {
+std::optional<VectorizationTileSizes> inferSizesFromIR(linalg::PackOp op) {
   LLVM_DEBUG(llvm::dbgs() << "Inferring dest sizes for:\n" << op << "\n");
 
   if (llvm::any_of(op.getInnerTiles(), [](OpFoldResult v) {
@@ -1715,7 +1715,7 @@ std::optional<VectorizationTileSizes> inferSizesFromIR(tensor::PackOp op) {
   return result;
 }
 
-std::optional<VectorizationTileSizes> inferSizesFromIR(tensor::UnPackOp op) {
+std::optional<VectorizationTileSizes> inferSizesFromIR(linalg::UnPackOp op) {
   LLVM_DEBUG(llvm::dbgs() << "Inferring dest sizes for:\n" << op << "\n");
 
   if (llvm::any_of(op.getInnerTiles(), [](OpFoldResult v) {
@@ -1766,7 +1766,7 @@ std::optional<VectorizationTileSizes> inferSizesFromIR(Value val) {
   TypeSwitch<Operation *, void>(val.getDefiningOp())
       .Case<linalg::LinalgOp>(
           [&](auto op) { result = inferSizesFromIR(op, cast<OpResult>(val)); })
-      .Case<tensor::PackOp>([&](auto op) { result = inferSizesFromIR(op); })
+      .Case<linalg::PackOp>([&](auto op) { result = inferSizesFromIR(op); })
       .Case<tensor::ExtractSliceOp>([&](tensor::ExtractSliceOp op) {
         // tensor::ExtractSliceOp is not vectorizable, so only `destShape` has
         // the values.
diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.h b/compiler/src/iree/compiler/Codegen/Utils/Utils.h
index ea83c9f8de4d..18427690d1f9 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/Utils.h
+++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.h
@@ -267,15 +267,15 @@ struct VectorizationTileSizes {
 /// chain.
 std::optional<VectorizationTileSizes> inferSizesFromIR(Value val);
 
-/// Returns the result sizes and vector input sizes of the tensor.unpack op. The
+/// Returns the result sizes and vector input sizes of the linalg.unpack op. The
 /// inferred bounding size is returned if it is dynamic shape. Returns
 /// std::nullopt if the shape inference failed.
-std::optional<VectorizationTileSizes> inferSizesFromIR(tensor::UnPackOp op);
+std::optional<VectorizationTileSizes> inferSizesFromIR(linalg::UnPackOp op);
 
-/// Returns the result sizes and vector input sizes of the tensor.pack op. The
+/// Returns the result sizes and vector input sizes of the linalg.pack op. The
 /// inferred bounding size is returned if it is dynamic shape. Returns
 /// std::nullopt if the shape inference failed.
-std::optional<VectorizationTileSizes> inferSizesFromIR(tensor::PackOp op);
+std::optional<VectorizationTileSizes> inferSizesFromIR(linalg::PackOp op);
 
 /// Tries to infer the vector sizes from an IR using ValueBounds analysis. If
 /// `opResult` is provided, it stores the bounded result shapes to destShape.
diff --git a/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir
index ecc662357740..c8f40e490cf4 100644
--- a/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/VMVX/test/select_lowering_strategy.mlir
@@ -169,7 +169,7 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl
   %9 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c131072) : !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
   %10 = flow.dispatch.tensor.load %8, offsets = [0, 0, 0, 0], sizes = [%4, %5, 32, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x32x16xi32>>{%4, %5} -> tensor<?x?x32x16xi32>
   %11 = tensor.empty(%6, %7) : tensor<?x?xi32>
-  %unpack = tensor.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 : tensor<?x?x32x16xi32> -> tensor<?x?xi32>
+  %unpack = linalg.unpack %10 inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %11 : tensor<?x?x32x16xi32> -> tensor<?x?xi32>
   flow.dispatch.tensor.store %unpack, %9, offsets = [0, 0], sizes = [%6, %7], strides = [1, 1] : tensor<?x?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?x?xi32>>{%6, %7}
   return
 }
@@ -178,7 +178,7 @@ func.func @unpack_outer_dynamic() attributes {hal.executable.target = #executabl
 //  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = VMVXDefault>
 //      CHECK: func.func @unpack_outer_dynamic
 // CHECK-SAME:     translation_info = #[[TRANSLATION]]
-//      CHECK:   tensor.unpack
+//      CHECK:   linalg.unpack
 // CHECK-SAME:       lowering_config = #[[CONFIG]]
 
 // -----
@@ -210,7 +210,7 @@ func.func @elem_pack_ukernels() attributes {hal.executable.target = #executable_
   %9 = affine.apply #map()[%8#0]
   %10 = affine.apply #map1()[%8#1]
   %11 = tensor.empty(%9, %10, %8#0, %8#1) : tensor<?x?x?x?xf32>
-  %pack = tensor.pack %7 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [%8#0, %8#1] into %11 : tensor<1024x2048xf32> -> tensor<?x?x?x?xf32>
+  %pack = linalg.pack %7 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [%8#0, %8#1] into %11 : tensor<1024x2048xf32> -> tensor<?x?x?x?xf32>
   %12:2 = iree_codegen.query_tile_sizes tensor<1024x2048xf32, #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32]>> -> index, index
   %13 = affine.apply #map()[%12#0]
   %14 = affine.apply #map1()[%12#1]
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/AnnotateDispatches.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/AnnotateDispatches.cpp
index ad93878b83d2..03028fb2220e 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/AnnotateDispatches.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/AnnotateDispatches.cpp
@@ -381,7 +381,7 @@ static std::string summarizeDispatchRegion(Region &region) {
                                   << "', cost: " << bestEstimatedCost << "\n");
         })
         .Case<IREE::Encoding::SetEncodingOp, IREE::Encoding::UnsetEncodingOp,
-              tensor::PackOp, tensor::UnPackOp>([&](auto op) {
+              linalg::PackOp, linalg::UnPackOp>([&](auto op) {
           // SetEncoding/UnsetEncoding/PackOp/UnPackOp is the bestOp only if
           // there are no other operations.
           int64_t estimatedCost = kMinEstimatedCost + 1;
@@ -429,7 +429,7 @@ static std::string summarizeDispatchRegion(Region &region) {
           [&](auto op) { bestSummary = summarizeLinalgExtOp(op); })
       .Case<linalg::LinalgOp>(
           [&](auto op) { bestSummary = summarizeLinalgOp(op); })
-      .Case<tensor::PackOp, tensor::UnPackOp>([&](auto op) {
+      .Case<linalg::PackOp, linalg::UnPackOp>([&](auto op) {
         auto opName = getOpNameWithoutDialectName(op);
         bestSummary = opName + "_" + operandTypeToString(op.getSource());
       })
@@ -462,12 +462,12 @@ static std::string summarizeDispatchRegion(Region &region) {
   // Add heuristic hint to dispatch name if the unpack op is the first op and
   // the pack op is the last op.
   if (!tileableOps.empty()) {
-    if (!isa<tensor::UnPackOp>(bestOp) &&
-        isa<tensor::UnPackOp>(tileableOps.front())) {
+    if (!isa<linalg::UnPackOp>(bestOp) &&
+        isa<linalg::UnPackOp>(tileableOps.front())) {
       bestSummary = "unpack_" + bestSummary;
     }
-    if (!isa<tensor::PackOp>(bestOp) &&
-        isa<tensor::PackOp>(tileableOps.back())) {
+    if (!isa<linalg::PackOp>(bestOp) &&
+        isa<linalg::PackOp>(tileableOps.back())) {
       bestSummary = bestSummary + "_pack";
     }
   }
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InitializeEmptyTensors.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InitializeEmptyTensors.cpp
index 0e340c4e69b5..294d44325edd 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/InitializeEmptyTensors.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/InitializeEmptyTensors.cpp
@@ -35,7 +35,7 @@ static FailureOr<TypedAttr> getZero(OpBuilder &builder, Location loc,
 static bool shouldBeConvertedToFlowTensorOp(tensor::EmptyOp emptyTensorOp) {
   return !(llvm::all_of(emptyTensorOp->getUsers(),
                         llvm::IsaPred<linalg::LinalgOp, LinalgExt::LinalgExtOp,
-                                      tensor::PackOp, tensor::UnPackOp>) ||
+                                      linalg::PackOp, linalg::UnPackOp>) ||
            emptyTensorOp->getParentOfType<Flow::DispatchWorkgroupsOp>());
 }
 
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/annotate_dispatches.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/annotate_dispatches.mlir
index c795817b0e75..b4f4becc6323 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/annotate_dispatches.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/annotate_dispatches.mlir
@@ -184,7 +184,7 @@ flow.executable private @ex {
     func.func @ex(%arg0: !flow.dispatch.tensor<readonly:tensor<384x512xf32>>, %arg1: !flow.dispatch.tensor<writeonly:tensor<24x512x16x1xf32>>) {
       %0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<384x512xf32>> -> tensor<384x512xf32>
       %1 = tensor.empty() : tensor<24x512x16x1xf32>
-      %pack = tensor.pack %0 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %1 : tensor<384x512xf32> -> tensor<24x512x16x1xf32>
+      %pack = linalg.pack %0 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %1 : tensor<384x512xf32> -> tensor<24x512x16x1xf32>
       flow.dispatch.tensor.store %pack, %arg1, offsets = [0, 0, 0, 0], sizes = [24, 512, 16, 1], strides = [1, 1, 1, 1] : tensor<24x512x16x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<24x512x16x1xf32>>
       return
     }
@@ -200,7 +200,7 @@ flow.executable private @ex {
     func.func @ex(%arg0: !flow.dispatch.tensor<readonly:tensor<24x32x16x16xf32>>, %arg1: !flow.dispatch.tensor<writeonly:tensor<384x512xf32>>) {
       %0 = flow.dispatch.tensor.load %arg0, offsets = [0, 0, 0, 0], sizes = [24, 32, 16, 16], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<24x32x16x16xf32>> -> tensor<24x32x16x16xf32>
       %1 = tensor.empty() : tensor<384x512xf32>
-      %unpack = tensor.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %1 : tensor<24x32x16x16xf32> -> tensor<384x512xf32>
+      %unpack = linalg.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %1 : tensor<24x32x16x16xf32> -> tensor<384x512xf32>
       flow.dispatch.tensor.store %unpack, %arg1, offsets = [0, 0], sizes = [384, 512], strides = [1, 1] : tensor<384x512xf32> -> !flow.dispatch.tensor<writeonly:tensor<384x512xf32>>
       return
     }
@@ -220,13 +220,13 @@ flow.executable private @ex {
       %1 = flow.dispatch.tensor.load %arg1, offsets = [0], sizes = [512], strides = [1] : !flow.dispatch.tensor<readonly:tensor<512xf32>> -> tensor<512xf32>
       %2 = tensor.empty() : tensor<24x512x16x1xf32>
       %3 = tensor.empty() : tensor<384x512xf32>
-      %unpack = tensor.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<24x32x16x16xf32> -> tensor<384x512xf32>
+      %unpack = linalg.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<24x32x16x16xf32> -> tensor<384x512xf32>
       %4 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%1, %unpack : tensor<512xf32>, tensor<384x512xf32>) outs(%3 : tensor<384x512xf32>) {
       ^bb0(%in: f32, %in_0: f32, %out: f32):
         %5 = arith.addf %in, %in_0 : f32
         linalg.yield %5 : f32
       } -> tensor<384x512xf32>
-      %pack = tensor.pack %4 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %2 : tensor<384x512xf32> -> tensor<24x512x16x1xf32>
+      %pack = linalg.pack %4 inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %2 : tensor<384x512xf32> -> tensor<24x512x16x1xf32>
       flow.dispatch.tensor.store %pack, %arg2, offsets = [0, 0, 0, 0], sizes = [24, 512, 16, 1], strides = [1, 1, 1, 1] : tensor<24x512x16x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<24x512x16x1xf32>>
       return
     }
diff --git a/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp b/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp
index b2de618764fd..8ac73b8accc2 100644
--- a/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp
+++ b/compiler/src/iree/compiler/DispatchCreation/FormDispatchRegions.cpp
@@ -110,11 +110,11 @@ static void removeFusionGroupsAttribute(Operation *op) {
 //===----------------------------------------------------------------------===//
 
 /// Returns true if the reduced dimensions in the linalgOp of the unpack result
-/// are not unpacked by the producer tensor::UnPackOp. This means the reduced
+/// are not unpacked by the producer linalg::UnPackOp. This means the reduced
 /// dimensions of the unpack result are not part of the inner_dims_pos.
 static bool hasNoPackedReductionDimensions(linalg::LinalgOp linalgOp,
                                            Operation *producer) {
-  auto unpack = dyn_cast<tensor::UnPackOp>(producer);
+  auto unpack = dyn_cast<linalg::UnPackOp>(producer);
   if (!unpack) {
     return false;
   }
@@ -148,7 +148,7 @@ static bool hasNoPackedReductionDimensions(linalg::LinalgOp linalgOp,
 /// Returns true if the linalgOp is fusable with an unpack producer
 static bool hasFusableUnpackProducer(linalg::LinalgOp linalgOp) {
   return llvm::any_of(linalgOp->getOperands(), [&](Value operand) {
-    auto producer = operand.getDefiningOp<tensor::UnPackOp>();
+    auto producer = operand.getDefiningOp<linalg::UnPackOp>();
     return producer && hasNoPackedReductionDimensions(linalgOp, producer);
   });
 }
@@ -173,27 +173,27 @@ static bool isRootOp(Operation *op) {
     return !isa<linalg::FillOp>(op);
   }
   if (isa<TilingInterface>(op)) {
-    return !isa<tensor::PadOp, tensor::PackOp>(op);
+    return !isa<tensor::PadOp, linalg::PackOp>(op);
   }
-  return isa<IREE::Encoding::UnsetEncodingOp, tensor::UnPackOp>(op);
+  return isa<IREE::Encoding::UnsetEncodingOp, linalg::UnPackOp>(op);
 }
 
 /// Returns true if the operation is a `pack` op or a `set_encoding` op that
 /// has pack semantics.
 // TODO(ravishankarm): This seems like a use case for an interface.
 static bool isPackLikeOp(Operation *op) {
-  return isa<IREE::Encoding::SetEncodingOp, tensor::PackOp>(op);
+  return isa<IREE::Encoding::SetEncodingOp, linalg::PackOp>(op);
 }
 
 /// Returns true if the operation is an `unpack` op or an `unset_encoding` op.
 static bool isUnpackLikeOp(Operation *op) {
-  return isa<IREE::Encoding::UnsetEncodingOp, tensor::UnPackOp>(op);
+  return isa<IREE::Encoding::UnsetEncodingOp, linalg::UnPackOp>(op);
 }
 
 /// Since `iree_encoding.set_encoding` doesnt have padding semantics a
 /// `tensor.pad` is introduced to get the shapes of the input and output to
 /// match. The `tensor.pad` -> `set_encoding` can be folded later on into a
-/// single `tensor.pack` operation. But it means the fusion has to try to keep
+/// single `linalg.pack` operation. But it means the fusion has to try to keep
 /// these in the same dispatch.
 // TODO(ravishankarm): Maybe make `set_encoding` have pad semantics that can be
 // explicitly broken down if needed.
@@ -648,7 +648,7 @@ isFusableWithProducer(OpOperand &operand,
     return TypeSwitch<Operation *, bool>(producer)
         .Case<tensor::PadOp>([&](auto padOp) { return true; })
         .Case<linalg::LinalgOp>([&](auto linalgOp) {
-          if (auto packOp = dyn_cast<tensor::PackOp>(consumer)) {
+          if (auto packOp = dyn_cast<linalg::PackOp>(consumer)) {
             // TODO(#12746): fusion of pack with dynamic inner tile size
             // causes an error in backend. Disable for now.
             if (!packOp.getInnerTiles().empty()) {
@@ -777,7 +777,7 @@ decideFusableLinalgOps(Region &region, DominanceInfo const &dominanceInfo,
       // to convert them to splats. Also avoid moving dequantization-like ops
       // into their own dispatch since it is better to clone these ops and avoid
       // materializing large tensors between dispatches.
-      if (!isa<linalg::LinalgOp, tensor::PadOp, tensor::PackOp,
+      if (!isa<linalg::LinalgOp, tensor::PadOp, linalg::PackOp,
                IREE::Encoding::SetEncodingOp>(op) ||
           IREE::Flow::isClonableIntoDispatchOp(&op, clonableOptions)) {
         continue;
diff --git a/compiler/src/iree/compiler/DispatchCreation/FormScalarDispatches.cpp b/compiler/src/iree/compiler/DispatchCreation/FormScalarDispatches.cpp
index 16e5a32a0e1b..8173d9b07b56 100644
--- a/compiler/src/iree/compiler/DispatchCreation/FormScalarDispatches.cpp
+++ b/compiler/src/iree/compiler/DispatchCreation/FormScalarDispatches.cpp
@@ -56,7 +56,7 @@ static bool isComputeOperation(Operation *op) {
   }
   if (op->getDialect() == context->getLoadedDialect<tensor::TensorDialect>()) {
     return !isa<tensor::CastOp, tensor::CollapseShapeOp, tensor::EmptyOp,
-                tensor::ExpandShapeOp, tensor::PackOp, tensor::UnPackOp>(op);
+                tensor::ExpandShapeOp, linalg::PackOp, linalg::UnPackOp>(op);
   }
   return false;
 }
diff --git a/compiler/src/iree/compiler/DispatchCreation/FusionPreprocessing.cpp b/compiler/src/iree/compiler/DispatchCreation/FusionPreprocessing.cpp
index 8ef1c4e53299..19ac81c3bb80 100644
--- a/compiler/src/iree/compiler/DispatchCreation/FusionPreprocessing.cpp
+++ b/compiler/src/iree/compiler/DispatchCreation/FusionPreprocessing.cpp
@@ -160,7 +160,7 @@ struct FusionPreprocessingPass final
     // operand shapes.
     memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns);
     memref::populateResolveShapedTypeResultDimsPatterns(patterns);
-    tensor::populateFoldIntoPackAndUnpackPatterns(patterns);
+    linalg::populateFoldIntoPackAndUnpackPatterns(patterns);
     if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) {
       return signalPassFailure();
     }
diff --git a/compiler/src/iree/compiler/DispatchCreation/SinkReshapes.cpp b/compiler/src/iree/compiler/DispatchCreation/SinkReshapes.cpp
index 416dc5ec17b7..1d67e5cfe531 100644
--- a/compiler/src/iree/compiler/DispatchCreation/SinkReshapes.cpp
+++ b/compiler/src/iree/compiler/DispatchCreation/SinkReshapes.cpp
@@ -50,7 +50,7 @@ struct SinkReshapesPass final
 static bool isFusableUsingTileAndFuse(Operation *producer,
                                       Operation *consumer) {
   return llvm::isa_and_nonnull<IREE::LinalgExt::LinalgFusionOpInterface,
-                               linalg::LinalgOp, tensor::UnPackOp,
+                               linalg::LinalgOp, linalg::UnPackOp,
                                IREE::Encoding::UnsetEncodingOp>(producer);
 }
 
diff --git a/compiler/src/iree/compiler/DispatchCreation/TensorPadToTensorInsertSlice.cpp b/compiler/src/iree/compiler/DispatchCreation/TensorPadToTensorInsertSlice.cpp
index 28927ecc86a4..a9c3caec84e0 100644
--- a/compiler/src/iree/compiler/DispatchCreation/TensorPadToTensorInsertSlice.cpp
+++ b/compiler/src/iree/compiler/DispatchCreation/TensorPadToTensorInsertSlice.cpp
@@ -65,7 +65,7 @@ struct TensorPadOpConversion : public OpRewritePattern<tensor::PadOp> {
           return failure();
         }
       }
-      // (pad + set_encoding) gets folded in to tensor.pack in the
+      // (pad + set_encoding) gets folded in to linalg.pack in the
       // MaterializeEncoding pass. Rewriting those pads into insert_slice would
       // defeat that.
       if (isa<IREE::Encoding::SetEncodingOp>(use)) {
diff --git a/compiler/src/iree/compiler/DispatchCreation/test/collapse_dimensions.mlir b/compiler/src/iree/compiler/DispatchCreation/test/collapse_dimensions.mlir
index 377c91b6a054..484d446b99ef 100644
--- a/compiler/src/iree/compiler/DispatchCreation/test/collapse_dimensions.mlir
+++ b/compiler/src/iree/compiler/DispatchCreation/test/collapse_dimensions.mlir
@@ -34,7 +34,7 @@ util.func public @do_not_collapse_cst_in_place(%arg0: tensor<1x1x2304xf32>) {
 util.func public @unpack_collapse(%arg0: tensor<2x320x128x128xf32>, %arg1: tensor<320xf32>, %arg2: tensor<320xf32>, %arg3: tensor<1x5x2x64xf32>) -> tensor<2x320x128x128xf16> {
   %dispatch = flow.dispatch.region -> (tensor<2x320x128x128xf16>) {
     %0 = tensor.empty() : tensor<2x320xf32>
-    %unpack = tensor.unpack %arg3 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 64] into %0 : tensor<1x5x2x64xf32> -> tensor<2x320xf32>
+    %unpack = linalg.unpack %arg3 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 64] into %0 : tensor<1x5x2x64xf32> -> tensor<2x320xf32>
     %1 = tensor.empty() : tensor<2x320x128x128xf16>
     %2 = linalg.generic {
       indexing_maps = [#map, #map1, #map2, #map1, #map],
@@ -83,7 +83,7 @@ util.func public @unpack_elementwise_collapse(%arg0: tensor<2x320x128x128xf32>,
       linalg.yield %22 : f32
     } -> tensor<2x320x128x128xf32>
 
-    %unpack = tensor.unpack %arg3 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 64] into %1 : tensor<1x5x2x64xf32> -> tensor<2x320xf32>
+    %unpack = linalg.unpack %arg3 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 64] into %1 : tensor<1x5x2x64xf32> -> tensor<2x320xf32>
 
     %3 = linalg.generic {indexing_maps = [#map, #map1, #map2, #map1, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%elementwise, %arg1, %unpack, %arg2 : tensor<2x320x128x128xf32>, tensor<320xf32>, tensor<2x320xf32>, tensor<320xf32>) outs(%2 : tensor<2x320x128x128xf16>) {
     ^bb0(%in: f32, %in_0: f32, %in_1: f32, %in_2: f32, %out: f16):
@@ -133,7 +133,7 @@ util.func public @prevent_collapse(%arg0: tensor<2x320x128x128xf32>, %arg1: tens
     } -> tensor<2x320x128x128xf32>
 
     %barrier = util.optimization_barrier %elementwise : tensor<2x320x128x128xf32>
-    %unpack = tensor.unpack %arg3 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 64] into %1 : tensor<1x5x2x64xf32> -> tensor<2x320xf32>
+    %unpack = linalg.unpack %arg3 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [2, 64] into %1 : tensor<1x5x2x64xf32> -> tensor<2x320xf32>
 
     %3 = linalg.generic {indexing_maps = [#map, #map1, #map2, #map1, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%barrier, %arg1, %unpack, %arg2 : tensor<2x320x128x128xf32>, tensor<320xf32>, tensor<2x320xf32>, tensor<320xf32>) outs(%2 : tensor<2x320x128x128xf16>) {
     ^bb0(%in: f32, %in_0: f32, %in_1: f32, %in_2: f32, %out: f16):
diff --git a/compiler/src/iree/compiler/DispatchCreation/test/dispatch_region_formation_preprocessing.mlir b/compiler/src/iree/compiler/DispatchCreation/test/dispatch_region_formation_preprocessing.mlir
index 33ef6f9ed76f..a848e366d547 100644
--- a/compiler/src/iree/compiler/DispatchCreation/test/dispatch_region_formation_preprocessing.mlir
+++ b/compiler/src/iree/compiler/DispatchCreation/test/dispatch_region_formation_preprocessing.mlir
@@ -524,7 +524,7 @@ util.func public @fix_issue_16835(%arg0: tensor<49x6x16x16xf32>, %arg1: tensor<9
   %cst_2 = arith.constant 0.166666672 : f32
   %0 = tensor.empty() : tensor<784x96xf32>
   %1 = tensor.empty() : tensor<784x96xf32>
-  %unpack = tensor.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %0 : tensor<49x6x16x16xf32> -> tensor<784x96xf32>
+  %unpack = linalg.unpack %arg0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %0 : tensor<49x6x16x16xf32> -> tensor<784x96xf32>
   %2 = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]} ins(%unpack, %arg1 : tensor<784x96xf32>, tensor<96xf32>) outs(%1 : tensor<784x96xf32>) {
   ^bb0(%in: f32, %in_3: f32, %out: f32):
     %3 = arith.addf %in, %in_3 : f32
@@ -539,7 +539,7 @@ util.func public @fix_issue_16835(%arg0: tensor<49x6x16x16xf32>, %arg1: tensor<9
   util.return %expanded : tensor<28x28x96xf32>
 }
 // CHECK-LABEL: util.func public @fix_issue_16835
-//       CHECK:   tensor.unpack
+//       CHECK:   linalg.unpack
 //       CHECK:   linalg.generic
 //       CHECK:   tensor.expand_shape
 
diff --git a/compiler/src/iree/compiler/DispatchCreation/test/form_dispatch_regions.mlir b/compiler/src/iree/compiler/DispatchCreation/test/form_dispatch_regions.mlir
index 4a1b808a9127..6f5abce51feb 100644
--- a/compiler/src/iree/compiler/DispatchCreation/test/form_dispatch_regions.mlir
+++ b/compiler/src/iree/compiler/DispatchCreation/test/form_dispatch_regions.mlir
@@ -24,7 +24,7 @@ util.func public @pack_elementwise_fusion(%arg0 : tensor<?xf32>,
   %8 = tensor.empty(%6, %7) : tensor<?x?x8x32xf32>
   // TODO(#12746) : The inner_tiles could be dynamic here. It is disabled
   // due to unrelated codegen issue.
-  %9 = tensor.pack %5 padding_value(%cst : f32)
+  %9 = linalg.pack %5 padding_value(%cst : f32)
       inner_dims_pos = [0, 1] inner_tiles = [8, 32]
       into %8 : tensor<?x?xf32> -> tensor<?x?x8x32xf32>
   util.return %9 : tensor<?x?x8x32xf32>
@@ -39,7 +39,7 @@ util.func public @pack_elementwise_fusion(%arg0 : tensor<?xf32>,
 //       CHECK:     %[[GENERIC:.+]] = linalg.generic
 //  CHECK-SAME:         iterator_types = ["parallel", "parallel"]
 //  CHECK-SAME:         ins(%[[ARG1]], %[[ARG0]] :
-//       CHECK:     %[[PACK:.+]] = tensor.pack %[[GENERIC]]
+//       CHECK:     %[[PACK:.+]] = linalg.pack %[[GENERIC]]
 //       CHECK:     flow.return %[[PACK]]
 //       CHECK:   util.return %[[RETURN]]
 
@@ -80,7 +80,7 @@ util.func public @pack_fusion(%arg0 : tensor<?x?xf32>,
   %8 = tensor.empty(%6, %7) : tensor<?x?x8x32xf32>
   // TODO(#12746) : The inner_tiles could be dynamic here. It is disabled
   // due to unrelated codegen issue.
-  %9 = tensor.pack %5 padding_value(%cst : f32)
+  %9 = linalg.pack %5 padding_value(%cst : f32)
       inner_dims_pos = [0, 1] inner_tiles = [8, 32]
       into %8 : tensor<?x?xf32> -> tensor<?x?x8x32xf32>
   util.return %9 : tensor<?x?x8x32xf32>
@@ -95,7 +95,7 @@ util.func public @pack_fusion(%arg0 : tensor<?x?xf32>,
 //       CHECK:     %[[GENERIC:.+]] = linalg.generic
 //  CHECK-SAME:         iterator_types = ["parallel", "parallel"]
 //  CHECK-SAME:         ins(%[[ARG1]], %[[REDUCTION]] :
-//       CHECK:     %[[PACK:.+]] = tensor.pack %[[GENERIC]]
+//       CHECK:     %[[PACK:.+]] = linalg.pack %[[GENERIC]]
 //       CHECK:     flow.return %[[PACK]]
 //       CHECK:   util.return %[[RETURN]]
 
@@ -119,7 +119,7 @@ util.func public @tranpose_pack_fusion(%arg0: tensor<?x?xf32>) -> tensor<?x?x8x3
   %2 = affine.apply #map2()[%dim]
   %3 = affine.apply #map3()[%dim_0]
   %4 = tensor.empty(%2, %3) : tensor<?x?x8x32xf32>
-  %pack = tensor.pack %1 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %4 : tensor<?x?xf32> -> tensor<?x?x8x32xf32>
+  %pack = linalg.pack %1 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %4 : tensor<?x?xf32> -> tensor<?x?x8x32xf32>
   util.return %pack : tensor<?x?x8x32xf32>
 }
 // No fusion as the CPU backend currently can't handle fusion with transpose
@@ -131,7 +131,7 @@ util.func public @tranpose_pack_fusion(%arg0: tensor<?x?xf32>) -> tensor<?x?x8x3
 //  CHECK-SAME:         iterator_types = ["parallel", "parallel"]
 //       CHECK:     flow.return %[[GENERIC]]
 //       CHECK:   %[[DISPATCH2:.+]] = flow.dispatch.region
-//       CHECK:     %[[PACK:.+]] = tensor.pack %[[DISPATCH1]]
+//       CHECK:     %[[PACK:.+]] = linalg.pack %[[DISPATCH1]]
 //       CHECK:     flow.return %[[PACK]]
 //       CHECK:   util.return %[[DISPATCH2]]
 
@@ -351,7 +351,7 @@ util.func public @unpack_elementwise_fusion(
   %folded_dim0 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%d0, %d2]
   %folded_dim1 = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%d1, %d3]
   %dest = tensor.empty(%folded_dim0, %folded_dim1) : tensor<?x?xf32>
-  %0 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [%d2, %d3]
+  %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [%d2, %d3]
       into %dest : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
   %1 = linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
@@ -370,7 +370,7 @@ util.func public @unpack_elementwise_fusion(
 //  CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?x?xf32>
 //  CHECK-SAME:     %[[ARG1:.+]]: tensor<?xf32>)
 //       CHECK:   %[[RESULT:.+]] = flow.dispatch.region
-//       CHECK:     %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+//       CHECK:     %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
 //       CHECK:     %[[GENERIC:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[UNPACK]], %[[ARG1]]
 //       CHECK:     flow.return %[[GENERIC]]
@@ -406,7 +406,7 @@ util.func public @unpack_non_intersecting_reduction(
   %folded_dim = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%d1, %d2]
   %dest0 = tensor.empty(%d0, %folded_dim) : tensor<?x?xf32>
   %dest1 = tensor.empty(%folded_dim) : tensor<?xf32>
-  %0 = tensor.unpack %arg0 inner_dims_pos = [1] inner_tiles = [%d2]
+  %0 = linalg.unpack %arg0 inner_dims_pos = [1] inner_tiles = [%d2]
       into %dest0 : tensor<?x?x?xf32> -> tensor<?x?xf32>
   %1 = linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>,
@@ -426,7 +426,7 @@ util.func public @unpack_non_intersecting_reduction(
 //  CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?xf32>
 //  CHECK-SAME:     %[[ARG1:.+]]: tensor<?xf32>)
 //       CHECK:   %[[RESULT:.+]] = flow.dispatch.region
-//       CHECK:     %[[UNPACK:.+]] = tensor.unpack %[[ARG0]]
+//       CHECK:     %[[UNPACK:.+]] = linalg.unpack %[[ARG0]]
 //       CHECK:     %[[GENERIC:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[UNPACK]], %[[ARG1]]
 //       CHECK:     flow.return %[[GENERIC]]
@@ -1014,7 +1014,7 @@ util.func @scatter_index_producer_fusion(%arg0 : tensor<?x1xi64>,
 util.func @move_captured_from_above_ops(%arg0 : tensor<1x1x2x4xi32>,
     %arg1 : f64, %arg2 : f64) -> tensor<2x3xi8> {
   %empty = tensor.empty() : tensor<2x3xi32>
-  %unpack = tensor.unpack %arg0 outer_dims_perm = [0, 1]
+  %unpack = linalg.unpack %arg0 outer_dims_perm = [0, 1]
       inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %empty : tensor<1x1x2x4xi32> -> tensor<2x3xi32>
   %0 = arith.mulf %arg1, %arg2 : f64
   %1 = tensor.empty() : tensor<2x3xi8>
@@ -1034,7 +1034,7 @@ util.func @move_captured_from_above_ops(%arg0 : tensor<1x1x2x4xi32>,
 // CHECK-LABEL: func public @move_captured_from_above_ops
 //       CHECK:   %[[OP:.+]] = arith.mulf
 //       CHECK:   %[[DISPATCH:.+]] = flow.dispatch.region
-//       CHECK:     %[[UNPACK:.+]] = tensor.unpack
+//       CHECK:     %[[UNPACK:.+]] = linalg.unpack
 //       CHECK:     %[[GENERIC:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[UNPACK]] :
 //       CHECK:       %[[TRUNCF:.+]] = arith.truncf %[[OP]]
diff --git a/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp b/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp
index e2778cc8e2cc..2915fddcce52 100644
--- a/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp
+++ b/compiler/src/iree/compiler/ExternalInterfaces/UtilExternalModels.cpp
@@ -510,6 +510,9 @@ void registerUtilExternalModels(DialectRegistry &registry) {
 #define GET_OP_LIST
 #include "mlir/Dialect/Linalg/IR/LinalgOps.cpp.inc"
             >::registerOpInterface(context);
+
+        AlwaysHoistableOpInterfaceHelper<
+            linalg::PackOp, linalg::UnPackOp>::registerOpInterface(context);
       });
   // Register hoistable type interfaces for tensor ops.
   registry.addExtension(
@@ -521,9 +524,8 @@ void registerUtilExternalModels(DialectRegistry &registry) {
             tensor::ExtractSliceOp>::registerOpInterface(context);
         // Cases of trivial pack/unpack should be handled as canonicalizations
         // before we get here, thus we're safe to always hoist.
-        AlwaysHoistableOpInterfaceHelper<
-            tensor::PadOp, tensor::PackOp,
-            tensor::UnPackOp>::registerOpInterface(context);
+        AlwaysHoistableOpInterfaceHelper<tensor::PadOp>::registerOpInterface(
+            context);
       });
   registry.addExtension(
       +[](MLIRContext *context, IREE::Util::UtilDialect *dialect) {
diff --git a/compiler/src/iree/compiler/GlobalOptimization/DataLayoutPropagation.cpp b/compiler/src/iree/compiler/GlobalOptimization/DataLayoutPropagation.cpp
index e01d71dd0d47..267c4f34ff47 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/DataLayoutPropagation.cpp
+++ b/compiler/src/iree/compiler/GlobalOptimization/DataLayoutPropagation.cpp
@@ -27,10 +27,10 @@ struct DataLayoutPropagationPass
         patterns, [](OpOperand *opOperand) {
           Operation *producer = opOperand->get().getDefiningOp();
           Operation *consumer = opOperand->getOwner();
-          if (isa<tensor::PackOp>(consumer)) {
+          if (isa<linalg::PackOp>(consumer)) {
             return isa<tensor::CollapseShapeOp>(producer);
           }
-          if (isa<tensor::UnPackOp>(producer)) {
+          if (isa<linalg::UnPackOp>(producer)) {
             return isa<tensor::ExpandShapeOp>(consumer);
           }
           return false;
diff --git a/compiler/src/iree/compiler/GlobalOptimization/GlobalLoopInvariantCodeMotion.cpp b/compiler/src/iree/compiler/GlobalOptimization/GlobalLoopInvariantCodeMotion.cpp
index 0489448d9b0a..cbbbe5f4880c 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/GlobalLoopInvariantCodeMotion.cpp
+++ b/compiler/src/iree/compiler/GlobalOptimization/GlobalLoopInvariantCodeMotion.cpp
@@ -6,6 +6,7 @@
 
 #include "iree/compiler/GlobalOptimization/Passes.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/SCF/Transforms/Transforms.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
@@ -30,7 +31,7 @@ static bool isHoistableOp(LoopLikeOpInterface loopOp, Operation *op,
   }
 
   // Check if the op type is hoistable.
-  if (!isa<tensor::EmptyOp, tensor::PackOp>(op)) {
+  if (!isa<tensor::EmptyOp, linalg::PackOp>(op)) {
     return false;
   }
 
diff --git a/compiler/src/iree/compiler/GlobalOptimization/SimplifyPackUnpack.cpp b/compiler/src/iree/compiler/GlobalOptimization/SimplifyPackUnpack.cpp
index 86d4135c636e..69a61842f7e8 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/SimplifyPackUnpack.cpp
+++ b/compiler/src/iree/compiler/GlobalOptimization/SimplifyPackUnpack.cpp
@@ -5,7 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 #include "iree/compiler/GlobalOptimization/Passes.h"
-#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
+#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
@@ -25,7 +25,7 @@ struct SimplifyPackUnpackPass
 void SimplifyPackUnpackPass::runOnOperation() {
   MLIRContext *context = &getContext();
   RewritePatternSet patterns(context);
-  tensor::populateSimplifyPackAndUnpackPatterns(patterns);
+  linalg::populateSimplifyPackAndUnpackPatterns(patterns);
   if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) {
     return signalPassFailure();
   }
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/data_layout_propagation.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/data_layout_propagation.mlir
index 556cfedf8fee..d05143897c03 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/data_layout_propagation.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/data_layout_propagation.mlir
@@ -3,7 +3,7 @@
 func.func @bubble_up_pack_through_collapse(%1: tensor<?x16x4xf32>, %dim : index) -> tensor<?x4x8x1xf32> {
   %collapsed = tensor.collapse_shape %1 [[0, 1], [2]] : tensor<?x16x4xf32> into tensor<?x4xf32>
   %2 = tensor.empty(%dim) : tensor<?x4x8x1xf32>
-  %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
+  %pack = linalg.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<?x4xf32> -> tensor<?x4x8x1xf32>
   func.return %pack : tensor<?x4x8x1xf32>
 }
 // CHECK-LABEL: func.func @bubble_up_pack_through_collapse
@@ -12,7 +12,7 @@ func.func @bubble_up_pack_through_collapse(%1: tensor<?x16x4xf32>, %dim : index)
 // CHECK:         %[[C0:.+]] = arith.constant 0 : index
 // CHECK:         %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x16x4xf32>
 // CHECK:         %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x2x4x8x1xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<?x16x4xf32> -> tensor<?x2x4x8x1xf32>
 // CHECK:         %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]] : tensor<?x2x4x8x1xf32> into tensor<?x4x8x1xf32>
 // CHECK:         return %[[COLLAPSED]] : tensor<?x4x8x1xf32>
 
@@ -20,7 +20,7 @@ func.func @bubble_up_pack_through_collapse(%1: tensor<?x16x4xf32>, %dim : index)
 
 func.func @push_down_unpack_through_expand(%5: tensor<?x32x8x8xf32>, %dim: index, %1 : index) -> tensor<?x256x256xf32> {
   %6 = tensor.empty(%dim) : tensor<?x256xf32>
-  %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<?x32x8x8xf32> -> tensor<?x256xf32>
+  %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<?x32x8x8xf32> -> tensor<?x256xf32>
   %expanded = tensor.expand_shape %unpack [[0, 1], [2]] output_shape [%1, 256, 256]: tensor<?x256xf32> into tensor<?x256x256xf32>
   func.return %expanded : tensor<?x256x256xf32>
 }
@@ -31,5 +31,5 @@ func.func @push_down_unpack_through_expand(%5: tensor<?x32x8x8xf32>, %dim: index
 // CHECK:         %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2], [3], [4]] output_shape {{.*}} : tensor<?x32x8x8xf32> into tensor<?x32x32x8x8xf32>
 // CHECK:         %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] : tensor<?x32x32x8x8xf32>
 // CHECK:         %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor<?x256x256xf32>
-// CHECK:         %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<?x32x32x8x8xf32> -> tensor<?x256x256xf32>
+// CHECK:         %[[UNPACK:.+]] = linalg.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<?x32x32x8x8xf32> -> tensor<?x256x256xf32>
 // CHECK:         return %[[UNPACK]] : tensor<?x256x256xf32>
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/global_loop_invariant_code_motion.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/global_loop_invariant_code_motion.mlir
index 9af32a090c76..2d5e95b6913f 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/global_loop_invariant_code_motion.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/global_loop_invariant_code_motion.mlir
@@ -11,7 +11,7 @@ func.func @hoist_pack_op_with_zero_trip_check(%bound : i32, %src : tensor<100x10
   } do {
   ^bb0(%arg1: i32, %arg2: tensor<13x13x8x8xf32>):
     %dest = tensor.empty() : tensor<13x13x8x8xf32>
-    %pack = tensor.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32>
+    %pack = linalg.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32>
     %add = arith.addf %arg2, %pack : tensor<13x13x8x8xf32>
     %next = arith.addi %arg1, %cst1 : i32
     scf.yield %next, %add : i32, tensor<13x13x8x8xf32>
@@ -28,7 +28,7 @@ func.func @hoist_pack_op_with_zero_trip_check(%bound : i32, %src : tensor<100x10
 // CHECK:         %[[PRECOND:.+]] = arith.cmpi slt, %[[C0]], %[[BOUND]] : i32
 // CHECK:         %[[RES:.+]]:2 = scf.if %[[PRECOND]] -> (i32, tensor<13x13x8x8xf32>) {
 // CHECK:           %[[DEST:.+]] = tensor.empty() : tensor<13x13x8x8xf32>
-// CHECK:           %[[PACK:.+]] = tensor.pack %[[SRC]] padding_value(%[[PAD]] : f32)
+// CHECK:           %[[PACK:.+]] = linalg.pack %[[SRC]] padding_value(%[[PAD]] : f32)
 // CHECK-SAME:          inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %[[DEST]]
 // CHECK-SAME:          : tensor<100x100xf32> -> tensor<13x13x8x8xf32>
 // CHECK:           %[[LOOP:.+]]:2 = scf.while (%[[ARG2:.+]] = %[[C0]], %[[ARG3:.+]] = %[[INIT]])
@@ -56,7 +56,7 @@ func.func @hoist_pack_op_from_do_while(%bound : i32, %src : tensor<100x100xf32>)
   %init = arith.constant dense<0.0> : tensor<13x13x8x8xf32>
   %res:2 = scf.while (%iter = %cst0, %val = %init) : (i32, tensor<13x13x8x8xf32>) -> (i32, tensor<13x13x8x8xf32>) {
     %dest = tensor.empty() : tensor<13x13x8x8xf32>
-    %pack = tensor.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32>
+    %pack = linalg.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32>
     %add = arith.addf %val, %pack : tensor<13x13x8x8xf32>
     %next = arith.addi %iter, %cst1 : i32
     %cond = arith.cmpi slt, %next, %bound : i32
@@ -76,7 +76,7 @@ func.func @hoist_pack_op_from_do_while(%bound : i32, %src : tensor<100x100xf32>)
 // CHECK-DAG:     %[[INIT:.+]] = arith.constant dense<0.000000e+00> : tensor<13x13x8x8xf32>
 // CHECK-NOT:     scf.if
 // CHECK:         %[[DEST:.+]] = tensor.empty() : tensor<13x13x8x8xf32>
-// CHECK:         %[[PACK:.+]] = tensor.pack %[[SRC]] padding_value(%[[PAD]] : f32)
+// CHECK:         %[[PACK:.+]] = linalg.pack %[[SRC]] padding_value(%[[PAD]] : f32)
 // CHECK:             inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %[[DEST]] : tensor<100x100xf32> -> tensor<13x13x8x8xf32>
 // CHECK:         %[[LOOP:.+]]:2 = scf.while (%[[ARG2:.+]] = %[[C0]], %[[ARG3:.+]] = %[[INIT]])
 // CHECK-SAME:        (i32, tensor<13x13x8x8xf32>) -> (i32, tensor<13x13x8x8xf32>) {
@@ -104,7 +104,7 @@ func.func @hoist_pack_op_with_zero_trip_check_in_outer_loop(%bound : i32, %src :
     } do {
     ^bb0(%arg1: i32, %arg2: tensor<13x13x8x8xf32>):
         %dest = tensor.empty() : tensor<13x13x8x8xf32>
-        %pack = tensor.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32>
+        %pack = linalg.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32>
         %add = arith.addf %arg2, %pack : tensor<13x13x8x8xf32>
         %next = arith.addi %arg1, %cst1 : i32
         scf.yield %next, %add : i32, tensor<13x13x8x8xf32>
@@ -127,7 +127,7 @@ func.func @hoist_pack_op_with_zero_trip_check_in_outer_loop(%bound : i32, %src :
 // CHECK:         scf.while
 // CHECK:           scf.if
 // CHECK:             tensor.empty
-// CHECK:             tensor.pack
+// CHECK:             linalg.pack
 // CHECK:             scf.while
 // CHECK:             } do {
 // CHECK:             }
@@ -145,10 +145,10 @@ func.func @not_hoist_loop_variant(%bound : i32, %src : tensor<100x100xf32>) -> t
   %bias = arith.constant dense<1.0> : tensor<13x13x8x8xf32>
   %res:2 = scf.while (%iter = %cst0, %val = %src) : (i32, tensor<100x100xf32>) -> (i32, tensor<100x100xf32>) {
     %pack_dest = tensor.empty() : tensor<13x13x8x8xf32>
-    %pack = tensor.pack %val padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %pack_dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32>
+    %pack = linalg.pack %val padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %pack_dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32>
     %add = arith.addf %pack, %bias : tensor<13x13x8x8xf32>
     %unpack_dest = tensor.empty() : tensor<100x100xf32>
-    %unpack = tensor.unpack %add inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %unpack_dest : tensor<13x13x8x8xf32> -> tensor<100x100xf32>
+    %unpack = linalg.unpack %add inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %unpack_dest : tensor<13x13x8x8xf32> -> tensor<100x100xf32>
     %next = arith.addi %iter, %cst1 : i32
     %cond = arith.cmpi slt, %next, %bound : i32
     scf.condition(%cond) %next, %unpack : i32, tensor<100x100xf32>
@@ -162,11 +162,11 @@ func.func @not_hoist_loop_variant(%bound : i32, %src : tensor<100x100xf32>) -> t
 // CHECK-LABEL: func.func @not_hoist_loop_variant
 // CHECK-DAG:     %[[PACK_DEST:.+]] = tensor.empty
 // CHECK-DAG:     %[[UNPACK_DEST:.+]] = tensor.empty
-// CHECK-NOT:     tensor.pack
-// CHECK-NOT:     tensor.unpack
+// CHECK-NOT:     linalg.pack
+// CHECK-NOT:     linalg.unpack
 // CHECK:         scf.while
-// CHECK:           tensor.pack {{.*}} into %[[PACK_DEST]]
-// CHECK:           tensor.unpack {{.*}} into %[[UNPACK_DEST]]
+// CHECK:           linalg.pack {{.*}} into %[[PACK_DEST]]
+// CHECK:           linalg.unpack {{.*}} into %[[UNPACK_DEST]]
 // CHECK:           scf.condition
 // CHECK:         } do {
 // CHECK:           scf.yield
@@ -182,7 +182,7 @@ func.func @not_hoist_from_nested_regions(%bound : i32, %flag : i1, %src : tensor
   %res:2 = scf.while (%iter = %cst0, %val = %init) : (i32, tensor<13x13x8x8xf32>) -> (i32, tensor<13x13x8x8xf32>) {
     %ifadd = scf.if %flag -> tensor<13x13x8x8xf32> {
       %dest = tensor.empty() : tensor<13x13x8x8xf32>
-      %pack = tensor.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32>
+      %pack = linalg.pack %src padding_value(%pad0 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %dest : tensor<100x100xf32> -> tensor<13x13x8x8xf32>
       %add = arith.addf %val, %pack : tensor<13x13x8x8xf32>
       scf.yield %add : tensor<13x13x8x8xf32>
     } else {
@@ -200,12 +200,12 @@ func.func @not_hoist_from_nested_regions(%bound : i32, %flag : i1, %src : tensor
 
 // CHECK-LABEL: func.func @not_hoist_from_nested_regions
 // CHECK-NOT:     tensor.empty
-// CHECK-NOT:     tensor.pack
-// CHECK-NOT:     tensor.unpack
+// CHECK-NOT:     linalg.pack
+// CHECK-NOT:     linalg.unpack
 // CHECK:         scf.while
 // CHECK:           scf.if
 // CHECK:             %[[PACK_DEST:.+]] = tensor.empty
-// CHECK:             tensor.pack {{.*}} into %[[PACK_DEST]]
+// CHECK:             linalg.pack {{.*}} into %[[PACK_DEST]]
 // CHECK:           } else {
 // CHECK:           scf.condition
 // CHECK:         } do {
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/hoist_into_globals.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/hoist_into_globals.mlir
index 67e631518488..7f462b30f691 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/test/hoist_into_globals.mlir
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/hoist_into_globals.mlir
@@ -100,7 +100,7 @@ module @hoist_constant_pack_computation {
     %pad = arith.constant 5 : i4
     %val1 = stablehlo.constant dense<3> : tensor<7x15xi4>
     %val2 = tensor.empty() : tensor<4x1x16x2xi4>
-    %ret = tensor.pack %val1 padding_value(%pad : i4) inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %val2 : tensor<7x15xi4> -> tensor<4x1x16x2xi4>
+    %ret = linalg.pack %val1 padding_value(%pad : i4) inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %val2 : tensor<7x15xi4> -> tensor<4x1x16x2xi4>
     util.return %ret : tensor<4x1x16x2xi4>
   }
 }
diff --git a/compiler/src/iree/compiler/Preprocessing/Common/ConvertConvToChannelsLast.cpp b/compiler/src/iree/compiler/Preprocessing/Common/ConvertConvToChannelsLast.cpp
index 2ac2642717ec..88f6861076f0 100644
--- a/compiler/src/iree/compiler/Preprocessing/Common/ConvertConvToChannelsLast.cpp
+++ b/compiler/src/iree/compiler/Preprocessing/Common/ConvertConvToChannelsLast.cpp
@@ -180,9 +180,9 @@ getUnitOuterDimPackReassociationMap(SmallVector<int64_t> targetIndices,
 }
 
 // Transpose the given tensor based on the given transpose indices using a
-// tensor.pack. Additionally returns a new AffineMap for the packed value
+// linalg.pack. Additionally returns a new AffineMap for the packed value
 // assuming otherwise the same iteration space.
-static std::tuple<Value, std::optional<tensor::PackOp>, AffineMap>
+static std::tuple<Value, std::optional<linalg::PackOp>, AffineMap>
 createTransposeAsTensorPack(
     PatternRewriter &rewriter, Location loc, Value input, AffineMap inputMap,
     SmallVector<int64_t> targetIndices, int tilingFactor,
@@ -209,10 +209,10 @@ createTransposeAsTensorPack(
   }
 
   // Pack the input tensor.
-  auto empty = tensor::PackOp::createDestinationTensor(
+  auto empty = linalg::PackOp::createDestinationTensor(
       rewriter, loc, input, transposedTileSizes, targetIndices,
       SmallVector<int64_t>{});
-  auto packedInput = rewriter.create<tensor::PackOp>(
+  auto packedInput = rewriter.create<linalg::PackOp>(
       loc, input, empty, targetIndices, transposedTileSizes,
       /*padding=*/std::nullopt, SmallVector<int64_t>{});
 
@@ -220,7 +220,7 @@ createTransposeAsTensorPack(
   AffineMap transposedMap;
 
   Value packedOperand = packedInput;
-  // Collapse the unit dims created by tensor.pack if the pack is just a
+  // Collapse the unit dims created by linalg.pack if the pack is just a
   // transpose.
   if (tilingFactor <= 0) {
     auto reassociationMap =
@@ -256,7 +256,7 @@ createTransposeAsTensorPack(
 // unit dimensions necessary for the unpack.
 static Value createTransposeAsTensorUnPack(PatternRewriter &rewriter,
                                            Location loc, Value output,
-                                           tensor::PackOp packOp,
+                                           linalg::PackOp packOp,
                                            int tilingFactor) {
   Value packedOutput = output;
   if (tilingFactor <= 0) {
@@ -289,11 +289,11 @@ static Value createTransposeAsTensorUnPack(PatternRewriter &rewriter,
             .getResult();
   }
 
-  Value empty = tensor::UnPackOp::createDestinationTensor(
+  Value empty = linalg::UnPackOp::createDestinationTensor(
       rewriter, loc, packedOutput, packOp.getMixedTiles(),
       packOp.getInnerDimsPos(), packOp.getOuterDimsPerm());
 
-  auto unpackedOutput = rewriter.create<tensor::UnPackOp>(
+  auto unpackedOutput = rewriter.create<linalg::UnPackOp>(
       loc, packedOutput, empty, packOp.getInnerDimsPos(),
       packOp.getMixedTiles(), packOp.getOuterDimsPerm());
   return unpackedOutput.getResult();
@@ -509,7 +509,7 @@ getTilingReassociationMap(const int64_t rank, SetTy innerDims) {
 // dims. Produces a transpose on the tiled dimensions followed by an
 // expand_shape to introduce the outer unit dims. For example,
 //
-// tensor.pack inner_dims_pos = [1] inner_tiles = [64]
+// linalg.pack inner_dims_pos = [1] inner_tiles = [64]
 //   : tensor<32x64x16xf32> to tensor<32x1x16x64xf32>
 //
 // Generalizes to:
@@ -517,14 +517,14 @@ getTilingReassociationMap(const int64_t rank, SetTy innerDims) {
 // linalg.transpose ... tensor<32x64x16xf32> to tensor<32x16x64xf32>
 // tensor.expand_shape ... tensor<32x16x64xf32> to tensor<32x1x16x64xf32>
 class GeneralizeOuterUnitDimsPackOp final
-    : public OpRewritePattern<tensor::PackOp> {
+    : public OpRewritePattern<linalg::PackOp> {
 public:
   using OpRewritePattern::OpRewritePattern;
   GeneralizeOuterUnitDimsPackOp(MLIRContext *context,
                                 PatternBenefit benefit = 2)
-      : OpRewritePattern<tensor::PackOp>(context, benefit) {}
+      : OpRewritePattern<linalg::PackOp>(context, benefit) {}
 
-  LogicalResult matchAndRewrite(tensor::PackOp packOp,
+  LogicalResult matchAndRewrite(linalg::PackOp packOp,
                                 PatternRewriter &rewriter) const override {
     if (!packOp.getOuterDimsPerm().empty())
       return failure();
@@ -591,7 +591,7 @@ class GeneralizeOuterUnitDimsPackOp final
 // and thus no padding. Produces a collapse_shape to remove the unit dimensions
 // followed by a transpose. For example:
 //
-// tensor.unpack inner_dims_pos = [1] inner_tiles = [64]
+// linalg.unpack inner_dims_pos = [1] inner_tiles = [64]
 //   : tensor<32x1x16x64xf32> to tensor<32x64x16xf32>
 //
 // Generalizes to:
@@ -599,14 +599,14 @@ class GeneralizeOuterUnitDimsPackOp final
 // tensor.collapse_shape ... tensor<32x1x16x64xf32> to tensor<32x16x64xf32>
 // linalg.transpose ... tensor<32x16x64xf32> to tensor<32x64x16xf32>
 class GeneralizeOuterUnitDimsUnPackOp final
-    : public OpRewritePattern<tensor::UnPackOp> {
+    : public OpRewritePattern<linalg::UnPackOp> {
 public:
   using OpRewritePattern::OpRewritePattern;
   GeneralizeOuterUnitDimsUnPackOp(MLIRContext *context,
                                   PatternBenefit benefit = 2)
-      : OpRewritePattern<tensor::UnPackOp>(context, benefit) {}
+      : OpRewritePattern<linalg::UnPackOp>(context, benefit) {}
 
-  LogicalResult matchAndRewrite(tensor::UnPackOp unpackOp,
+  LogicalResult matchAndRewrite(linalg::UnPackOp unpackOp,
                                 PatternRewriter &rewriter) const override {
     if (!unpackOp.getOuterDimsPerm().empty())
       return failure();
@@ -697,8 +697,8 @@ class ConvertConvToChannelsLastPass
     // Run pack/unpack canonicalization to try to cancel any packs.
     {
       RewritePatternSet patterns(context);
-      tensor::PackOp::getCanonicalizationPatterns(patterns, context);
-      tensor::UnPackOp::getCanonicalizationPatterns(patterns, context);
+      linalg::PackOp::getCanonicalizationPatterns(patterns, context);
+      linalg::UnPackOp::getCanonicalizationPatterns(patterns, context);
       linalg::FillOp::getCanonicalizationPatterns(patterns, context);
       if (failed(applyPatternsGreedily(op, std::move(patterns)))) {
         return signalPassFailure();
diff --git a/compiler/src/iree/compiler/Preprocessing/Common/test/conv_to_channels_last.mlir b/compiler/src/iree/compiler/Preprocessing/Common/test/conv_to_channels_last.mlir
index 67c66e58dc8d..813952d505f4 100644
--- a/compiler/src/iree/compiler/Preprocessing/Common/test/conv_to_channels_last.mlir
+++ b/compiler/src/iree/compiler/Preprocessing/Common/test/conv_to_channels_last.mlir
@@ -44,9 +44,9 @@ util.func @conv_nchw_nhwc(%arg0: tensor<8x256x16x16xf32>, %arg1: tensor<16x256x3
 // TILE16: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d0, d1, d2, d3, d8)>
 
 // TILE16-LABEL: util.func public @conv_nchw_nhwc
-// TILE16:      %[[IMG:.+]] = tensor.pack {{.*}} inner_dims_pos = [1] inner_tiles = [16]
+// TILE16:      %[[IMG:.+]] = linalg.pack {{.*}} inner_dims_pos = [1] inner_tiles = [16]
 // TILE16-SAME:   tensor<8x256x16x16xf32> -> tensor<8x16x16x16x16xf32>
-// TILE16:      %[[FILTER:.+]] = tensor.pack {{.*}} inner_dims_pos = [1, 0] inner_tiles = [16, 16]
+// TILE16:      %[[FILTER:.+]] = linalg.pack {{.*}} inner_dims_pos = [1, 0] inner_tiles = [16, 16]
 // TILE16-SAME:   tensor<16x256x3x3xf32> -> tensor<1x16x3x3x16x16xf32>
 // TILE16:      %[[OUT:.+]] = linalg.transpose ins(%{{.*}} : tensor<8x16x14x14xf32>)
 // TILE16-SAME:   outs(%{{.*}} : tensor<8x14x14x16xf32>) permutation = [0, 2, 3, 1]
@@ -104,17 +104,17 @@ module {
 // TILE16: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8) -> (d0, d1, d2, d3, d8)>
 
 // TILE16-LABEL: util.func public @generic_conv_nchw
-// TILE16:      %[[IMG:.+]] = tensor.pack {{.*}} inner_dims_pos = [1] inner_tiles = [16]
+// TILE16:      %[[IMG:.+]] = linalg.pack {{.*}} inner_dims_pos = [1] inner_tiles = [16]
 // TILE16-SAME:   tensor<8x256x16x16xf32> -> tensor<8x16x16x16x16xf32>
-// TILE16:      %[[FILTER:.+]] = tensor.pack {{.*}} inner_dims_pos = [1, 0] inner_tiles = [16, 16]
+// TILE16:      %[[FILTER:.+]] = linalg.pack {{.*}} inner_dims_pos = [1, 0] inner_tiles = [16, 16]
 // TILE16-SAME:   tensor<64x256x3x3xf32> -> tensor<4x16x3x3x16x16xf32>
-// TILE16:      %[[OUT:.+]] = tensor.pack {{.*}} inner_dims_pos = [1] inner_tiles = [16]
+// TILE16:      %[[OUT:.+]] = linalg.pack {{.*}} inner_dims_pos = [1] inner_tiles = [16]
 // TILE16-SAME:   tensor<8x64x14x14xf32> -> tensor<8x4x14x14x16xf32>
 // TILE16:      %[[TILED_CONV:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
 // TILE16-SAME:    iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "reduction", "parallel"]}
 // TILE16-SAME:    ins(%[[IMG]], %[[FILTER]] : tensor<8x16x16x16x16xf32>, tensor<4x16x3x3x16x16xf32>)
 // TILE16-SAME:    outs(%[[OUT]] : tensor<8x4x14x14x16xf32>) {
-// TILE16:      tensor.unpack %[[TILED_CONV]] inner_dims_pos = [1] inner_tiles = [16]
+// TILE16:      linalg.unpack %[[TILED_CONV]] inner_dims_pos = [1] inner_tiles = [16]
 // TILE16-SAME:   tensor<8x4x14x14x16xf32> -> tensor<8x64x14x14xf32>
 
 // -----
@@ -143,7 +143,7 @@ util.func @mmt_no_transpose(%arg0: tensor<2048x1280xf16>, %arg1: tensor<1280x128
 
 util.func @test_unit_dims_pack(%arg0: tensor<10x20x5xf32>) -> tensor<1x1x5x20x10xf32> {
   %dst = tensor.empty() : tensor<1x1x5x20x10xf32>
-  %packed = tensor.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [20, 10]
+  %packed = linalg.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [20, 10]
     into %dst : tensor<10x20x5xf32> -> tensor<1x1x5x20x10xf32>
 
   util.return %packed : tensor<1x1x5x20x10xf32>
diff --git a/docs/website/docs/community/blog/posts/microkernels.md b/docs/website/docs/community/blog/posts/microkernels.md
index 7e14195b0555..49667d136513 100644
--- a/docs/website/docs/community/blog/posts/microkernels.md
+++ b/docs/website/docs/community/blog/posts/microkernels.md
@@ -382,17 +382,17 @@ module attributes {hal.device.targets = [#device_target_llvm_cpu]} {
     %8 = hal.tensor.import %arg2 "input2" : !hal.buffer_view -> tensor<?x?xf32>{%6, %7}
     %9 = affine.apply #map()[%0]
     %10 = tensor.empty(%9, %1) : tensor<?x?x16x1xf32>
-    %pack = tensor.pack %2 padding_value(%cst : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %10 : tensor<?x?xf32> -> tensor<?x?x16x1xf32>
+    %pack = linalg.pack %2 padding_value(%cst : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %10 : tensor<?x?xf32> -> tensor<?x?x16x1xf32>
     %11 = affine.apply #map()[%4]
     %12 = tensor.empty(%11, %3) : tensor<?x?x16x1xf32>
-    %pack_0 = tensor.pack %5 padding_value(%cst : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %12 : tensor<?x?xf32> -> tensor<?x?x16x1xf32>
+    %pack_0 = linalg.pack %5 padding_value(%cst : f32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 1] into %12 : tensor<?x?xf32> -> tensor<?x?x16x1xf32>
     %13 = affine.apply #map()[%6]
     %14 = affine.apply #map()[%7]
     %15 = tensor.empty(%13, %14) : tensor<?x?x16x16xf32>
-    %pack_1 = tensor.pack %8 padding_value(%cst : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %15 : tensor<?x?xf32> -> tensor<?x?x16x16xf32>
+    %pack_1 = linalg.pack %8 padding_value(%cst : f32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %15 : tensor<?x?xf32> -> tensor<?x?x16x16xf32>
     %16 = linalg.mmt4d ins(%pack, %pack_0 : tensor<?x?x16x1xf32>, tensor<?x?x16x1xf32>) outs(%pack_1 : tensor<?x?x16x16xf32>) -> tensor<?x?x16x16xf32>
     %17 = tensor.empty(%6, %7) : tensor<?x?xf32>
-    %unpack = tensor.unpack %16 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %17 : tensor<?x?x16x16xf32> -> tensor<?x?xf32>
+    %unpack = linalg.unpack %16 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %17 : tensor<?x?x16x16xf32> -> tensor<?x?xf32>
     %18 = hal.tensor.export %unpack "output0" : tensor<?x?xf32>{%6, %7} -> !hal.buffer_view
     return %18 : !hal.buffer_view
   }
diff --git a/tests/e2e/regression/pack_pad_transpose_1x9_into_2x4x8x4_issue_12546.mlir b/tests/e2e/regression/pack_pad_transpose_1x9_into_2x4x8x4_issue_12546.mlir
index 57b0782e22a0..b88b686b16ae 100644
--- a/tests/e2e/regression/pack_pad_transpose_1x9_into_2x4x8x4_issue_12546.mlir
+++ b/tests/e2e/regression/pack_pad_transpose_1x9_into_2x4x8x4_issue_12546.mlir
@@ -3,7 +3,7 @@ func.func @pack_pad_transpose_1x9_into_2x4x8x4_issue_12546.mlir() {
       dense<[[1, 2, 3, 4, 5, 6, 7, 8, 9]]> : tensor<1x9xi8>
   %empty = tensor.empty() : tensor<2x4x8x4xi8>
   %c0_i8 = arith.constant 0 : i8
-  %pack = tensor.pack %iree_input padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0]
+  %pack = linalg.pack %iree_input padding_value(%c0_i8 : i8) outer_dims_perm = [1, 0]
       inner_dims_pos = [1, 0] inner_tiles = [8, 4] into %empty
       : tensor<1x9xi8> -> tensor<2x4x8x4xi8>
   check.expect_eq_const(%pack, dense<
diff --git a/tests/e2e/tensor_ops/pack.mlir b/tests/e2e/tensor_ops/pack.mlir
index 98d5db6c2111..7ede2b15a88c 100644
--- a/tests/e2e/tensor_ops/pack.mlir
+++ b/tests/e2e/tensor_ops/pack.mlir
@@ -21,7 +21,7 @@ func.func private @generate_2D_source(%height : index, %width : index) -> tensor
 func.func @static_pack_simple() {
   %iree_input = util.unfoldable_constant dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : tensor<4x4xi32>
   %init = tensor.empty() : tensor<2x2x2x2xi32>
-  %pack = tensor.pack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init
+  %pack = linalg.pack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init
       : tensor<4x4xi32> -> tensor<2x2x2x2xi32>
   check.expect_eq_const(%pack, dense<[[[[0, 1], [4, 5]], [[2, 3], [6, 7]]], [[[8, 9], [12, 13]], [[10 ,11], [14, 15]]]]> : tensor<2x2x2x2xi32>) : tensor<2x2x2x2xi32>
   return
@@ -41,7 +41,7 @@ func.func @dynamic_pack_simple() {
   %out_d0 = arith.ceildivui %in_d0, %c2 : index
   %out_d1 = arith.ceildivui %in_d1, %c2 : index
   %init = tensor.empty(%out_d0, %out_d1) : tensor<?x?x2x2xi32>
-  %pack = tensor.pack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init
+  %pack = linalg.pack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init
       : tensor<?x?xi32> -> tensor<?x?x2x2xi32>
   %cast = tensor.cast %pack : tensor<?x?x2x2xi32> to tensor<2x2x2x2xi32>
   check.expect_eq_const(%cast, dense<[[[[0, 1], [4, 5]], [[2, 3], [6, 7]]], [[[8, 9], [12, 13]], [[10 ,11], [14, 15]]]]> : tensor<2x2x2x2xi32>) : tensor<2x2x2x2xi32>
@@ -52,7 +52,7 @@ func.func @static_pack_simple_pad_mode() {
   %iree_input = util.unfoldable_constant dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : tensor<4x4xi32>
   %pad = arith.constant 0 : i32
   %init = tensor.empty() : tensor<2x2x3x3xi32>
-  %pack = tensor.pack %iree_input padding_value(%pad : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init
+  %pack = linalg.pack %iree_input padding_value(%pad : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init
       : tensor<4x4xi32> -> tensor<2x2x3x3xi32>
   // After padding, the input is
   //  0,  1,  2,  3,  0,  0
@@ -83,7 +83,7 @@ func.func @dynamic_pack_simple_pad_mode() {
   %out_d0 = arith.ceildivui %in_d0, %c3 : index
   %out_d1 = arith.ceildivui %in_d1, %c3 : index
   %init = tensor.empty(%out_d0, %out_d1) : tensor<?x?x3x3xi32>
-  %pack = tensor.pack %iree_input padding_value(%pad : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init
+  %pack = linalg.pack %iree_input padding_value(%pad : i32) inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init
       : tensor<?x?xi32> -> tensor<?x?x3x3xi32>
   %cast = tensor.cast %pack : tensor<?x?x3x3xi32> to tensor<2x2x3x3xi32>
   check.expect_eq_const(%cast, dense<[[[[0, 1, 2], [4, 5, 6], [8, 9, 10]],
@@ -100,7 +100,7 @@ func.func @static_pack_large() {
   %source = tensor.cast %0 : tensor<?x?xi32> to tensor<128x256xi32>
 
   %init_pack = tensor.empty() : tensor<4x16x32x16xi32>
-  %pack = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_pack
+  %pack = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_pack
       : tensor<128x256xi32> -> tensor<4x16x32x16xi32>
 
   // Pack without padding is just a reshape followed by a transpose.
@@ -121,7 +121,7 @@ func.func @static_pack_transpose_inner_dims_large() {
   %source = tensor.cast %0 : tensor<?x?xi32> to tensor<128x256xi32>
 
   %init_pack = tensor.empty() : tensor<4x16x16x32xi32>
-  %pack = tensor.pack %source inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack
+  %pack = linalg.pack %source inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack
       : tensor<128x256xi32> -> tensor<4x16x16x32xi32>
   %reshape = tensor.expand_shape %source [[0, 1], [2, 3]] output_shape [4, 32, 16, 16] : tensor<128x256xi32> into tensor<4x32x16x16xi32>
   %init_transpose = tensor.empty() : tensor<4x16x16x32xi32>
@@ -142,7 +142,7 @@ func.func @static_pack_pad_large() {
   %padding_value = arith.constant 42 : i32
 
   %init_pack = tensor.empty() : tensor<4x16x32x16xi32>
-  %pack = tensor.pack %source padding_value(%padding_value : i32)
+  %pack = linalg.pack %source padding_value(%padding_value : i32)
       inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_pack
       : tensor<100x250xi32> -> tensor<4x16x32x16xi32>
 
@@ -169,7 +169,7 @@ func.func @static_pack_pad_transpose_outer_dims_large() {
   %padding_value = arith.constant 42 : i32
 
   %init_pack = tensor.empty() : tensor<16x4x32x16xi32>
-  %pack = tensor.pack %source padding_value(%padding_value : i32)
+  %pack = linalg.pack %source padding_value(%padding_value : i32)
       outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_pack
       : tensor<100x250xi32> -> tensor<16x4x32x16xi32>
 
@@ -196,7 +196,7 @@ func.func @static_pack_pad_transpose_inner_dims_large() {
   %padding_value = arith.constant 42 : i32
 
   %init_pack = tensor.empty() : tensor<4x16x16x32xi32>
-  %pack = tensor.pack %source padding_value(%padding_value : i32)
+  %pack = linalg.pack %source padding_value(%padding_value : i32)
       inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack
       : tensor<100x250xi32> -> tensor<4x16x16x32xi32>
 
@@ -223,7 +223,7 @@ func.func @static_pack_pad_transpose_inner_and_outer_dims_large() {
   %padding_value = arith.constant 42 : i32
 
   %init_pack = tensor.empty() : tensor<16x4x16x32xi32>
-  %pack = tensor.pack %source padding_value(%padding_value : i32)
+  %pack = linalg.pack %source padding_value(%padding_value : i32)
       outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack
       : tensor<100x250xi32> -> tensor<16x4x16x32xi32>
 
diff --git a/tests/e2e/tensor_ops/pack_dynamic_inner_tiles.mlir b/tests/e2e/tensor_ops/pack_dynamic_inner_tiles.mlir
index b3a29ebdf545..6b3156d25366 100644
--- a/tests/e2e/tensor_ops/pack_dynamic_inner_tiles.mlir
+++ b/tests/e2e/tensor_ops/pack_dynamic_inner_tiles.mlir
@@ -32,7 +32,7 @@ func.func @fully_dynamic_pack_simple() {
   %out_d0 = arith.ceildivui %in_d0, %c2 : index
   %out_d1 = arith.ceildivui %in_d1, %c2 : index
   %init = tensor.empty(%out_d0, %out_d1, %c2, %c2) : tensor<?x?x?x?xi32>
-  %pack = tensor.pack %iree_input inner_dims_pos = [0, 1] inner_tiles = [%c2, %c2] into %init
+  %pack = linalg.pack %iree_input inner_dims_pos = [0, 1] inner_tiles = [%c2, %c2] into %init
       : tensor<?x?xi32> -> tensor<?x?x?x?xi32>
   %cast = tensor.cast %pack : tensor<?x?x?x?xi32> to tensor<2x2x2x2xi32>
   check.expect_eq_const(%cast, dense<[[[[0, 1], [4, 5]], [[2, 3], [6, 7]]], [[[8, 9], [12, 13]], [[10 ,11], [14, 15]]]]> : tensor<2x2x2x2xi32>) : tensor<2x2x2x2xi32>
@@ -50,7 +50,7 @@ func.func @fully_dynamic_pack_pad_transpose_inner_and_outer_dims_large() {
   %tiled_d0 = arith.ceildivui %d0, %c32 : index
   %tiled_d1 = arith.ceildivui %d1, %c16 : index
   %init_pack = tensor.empty(%tiled_d1, %tiled_d0, %c16, %c32) : tensor<?x?x?x?xi32>
-  %pack = tensor.pack %source padding_value(%padding_value : i32)
+  %pack = linalg.pack %source padding_value(%padding_value : i32)
       outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [%c16, %c32] into %init_pack
       : tensor<?x?xi32> -> tensor<?x?x?x?xi32>
   %cast_pack = tensor.cast %pack : tensor<?x?x?x?xi32> to tensor<16x4x16x32xi32>
@@ -85,7 +85,7 @@ func.func @dynamic_pack_large() {
   %tiled_d0 = arith.ceildivui %d0, %c32 : index
   %tiled_d1 = arith.ceildivui %d1, %c16 : index
   %dyn_init_pack = tensor.empty(%tiled_d0, %tiled_d1) : tensor<?x?x32x16xi32>
-  %pack = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dyn_init_pack
+  %pack = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dyn_init_pack
       : tensor<?x?xi32> -> tensor<?x?x32x16xi32>
   %cast_pack = tensor.cast %pack : tensor<?x?x32x16xi32> to tensor<4x16x32x16xi32>
 
@@ -113,7 +113,7 @@ func.func @dynamic_pack_transpose_inner_dims_large() {
   %tiled_d0 = arith.ceildivui %d0, %c32 : index
   %tiled_d1 = arith.ceildivui %d1, %c16 : index
   %dyn_init_pack = tensor.empty(%tiled_d0, %tiled_d1) : tensor<?x?x16x32xi32>
-  %pack = tensor.pack %source inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %dyn_init_pack
+  %pack = linalg.pack %source inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %dyn_init_pack
       : tensor<?x?xi32> -> tensor<?x?x16x32xi32>
   %cast_pack = tensor.cast %pack : tensor<?x?x16x32xi32> to tensor<4x16x16x32xi32>
 
@@ -143,7 +143,7 @@ func.func @dynamic_pack_pad_large() {
   %tiled_d0 = arith.ceildivui %d0, %c32 : index
   %tiled_d1 = arith.ceildivui %d1, %c16 : index
   %dyn_init_pack = tensor.empty(%tiled_d0, %tiled_d1) : tensor<?x?x32x16xi32>
-  %pack = tensor.pack %source padding_value(%padding_value : i32)
+  %pack = linalg.pack %source padding_value(%padding_value : i32)
       inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dyn_init_pack
       : tensor<?x?xi32> -> tensor<?x?x32x16xi32>
   %cast_pack = tensor.cast %pack : tensor<?x?x32x16xi32> to tensor<4x16x32x16xi32>
@@ -178,7 +178,7 @@ func.func @dynamic_pack_pad_transpose_outer_dims_large() {
   %tiled_d0 = arith.ceildivui %d0, %c32 : index
   %tiled_d1 = arith.ceildivui %d1, %c16 : index
   %dyn_init_pack = tensor.empty(%tiled_d1, %tiled_d0) : tensor<?x?x32x16xi32>
-  %pack = tensor.pack %source padding_value(%padding_value : i32)
+  %pack = linalg.pack %source padding_value(%padding_value : i32)
       outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dyn_init_pack
       : tensor<?x?xi32> -> tensor<?x?x32x16xi32>
   %cast_pack = tensor.cast %pack : tensor<?x?x32x16xi32> to tensor<16x4x32x16xi32>
@@ -213,7 +213,7 @@ func.func @dynamic_pack_pad_transpose_inner_dims_large() {
   %tiled_d0 = arith.ceildivui %d0, %c32 : index
   %tiled_d1 = arith.ceildivui %d1, %c16 : index
   %init_pack = tensor.empty(%tiled_d0, %tiled_d1) : tensor<?x?x16x32xi32>
-  %pack = tensor.pack %source padding_value(%padding_value : i32)
+  %pack = linalg.pack %source padding_value(%padding_value : i32)
       inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack
       : tensor<?x?xi32> -> tensor<?x?x16x32xi32>
   %cast_pack = tensor.cast %pack : tensor<?x?x16x32xi32> to tensor<4x16x16x32xi32>
@@ -249,7 +249,7 @@ func.func @dynamic_pack_pad_transpose_inner_and_outer_dims_large() {
   %tiled_d0 = arith.ceildivui %d0, %c32 : index
   %tiled_d1 = arith.ceildivui %d1, %c16 : index
   %init_pack = tensor.empty(%tiled_d1, %tiled_d0) : tensor<?x?x16x32xi32>
-  %pack = tensor.pack %source padding_value(%padding_value : i32)
+  %pack = linalg.pack %source padding_value(%padding_value : i32)
       outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_pack
       : tensor<?x?xi32> -> tensor<?x?x16x32xi32>
   %cast_pack = tensor.cast %pack : tensor<?x?x16x32xi32> to tensor<16x4x16x32xi32>
diff --git a/tests/e2e/tensor_ops/pack_i8.mlir b/tests/e2e/tensor_ops/pack_i8.mlir
index cd8016906a43..07312c0ca711 100644
--- a/tests/e2e/tensor_ops/pack_i8.mlir
+++ b/tests/e2e/tensor_ops/pack_i8.mlir
@@ -30,7 +30,7 @@ func.func @static_pack_vnni_lhs_large() {
   %source = tensor.cast %0 : tensor<?x?xi8> to tensor<128x256xi8>
 
   %init_pack = tensor.empty() : tensor<8x128x16x2xi8>
-  %pack = tensor.pack %source
+  %pack = linalg.pack %source
     outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2]
     into %init_pack : tensor<128x256xi8> -> tensor<8x128x16x2xi8>
 
@@ -52,7 +52,7 @@ func.func @static_pack_vnni_rhs_large() {
   %source = tensor.cast %0 : tensor<?x?xi8> to tensor<256x512xi8>
 
   %init_pack = tensor.empty() : tensor<32x128x16x2xi8>
-  %pack = tensor.pack %source
+  %pack = linalg.pack %source
     outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2]
     into %init_pack : tensor<256x512xi8> -> tensor<32x128x16x2xi8>
 
@@ -75,7 +75,7 @@ func.func @static_pack_vnni_lhs_large_with_pad() {
   %c0_i8 = arith.constant 0 : i8
 
   %init_pack = tensor.empty() : tensor<8x128x16x2xi8>
-  %pack = tensor.pack %source padding_value(%c0_i8 : i8)
+  %pack = linalg.pack %source padding_value(%c0_i8 : i8)
     outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 2]
     into %init_pack : tensor<127x255xi8> -> tensor<8x128x16x2xi8>
 
@@ -101,7 +101,7 @@ func.func @static_pack_vnni_rhs_large_with_pad() {
   %c0_i8 = arith.constant 0 : i8
 
   %init_pack = tensor.empty() : tensor<32x128x16x2xi8>
-  %pack = tensor.pack %source padding_value(%c0_i8 : i8)
+  %pack = linalg.pack %source padding_value(%c0_i8 : i8)
     outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 2]
     into %init_pack : tensor<255x511xi8> -> tensor<32x128x16x2xi8>
 
diff --git a/tests/e2e/tensor_ops/unpack.mlir b/tests/e2e/tensor_ops/unpack.mlir
index e5b0df40a81d..8a8af619c693 100644
--- a/tests/e2e/tensor_ops/unpack.mlir
+++ b/tests/e2e/tensor_ops/unpack.mlir
@@ -24,7 +24,7 @@ func.func private @generate_4D_source(%d0: index, %d1: index, %d2: index, %d3: i
 func.func @static_unpack_simple() {
   %iree_input = util.unfoldable_constant dense<[[[[0, 1], [4, 5]], [[2, 3], [6, 7]]], [[[8, 9], [12, 13]], [[10 ,11], [14, 15]]]]> : tensor<2x2x2x2xi32>
   %init = tensor.empty() : tensor<4x4xi32>
-  %unpack = tensor.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init
+  %unpack = linalg.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init
       : tensor<2x2x2x2xi32> -> tensor<4x4xi32>
   check.expect_eq_const(%unpack, dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : tensor<4x4xi32>) : tensor<4x4xi32>
   return
@@ -40,7 +40,7 @@ func.func @dynamic_unpack_simple() {
   %out_d0 = arith.muli %in_d0, %c2 : index
   %out_d1 = arith.muli %in_d1, %c2 : index
   %init = tensor.empty(%out_d0, %out_d1) : tensor<?x?xi32>
-  %unpack = tensor.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init
+  %unpack = linalg.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %init
       : tensor<?x?x2x2xi32> -> tensor<?x?xi32>
   %cast = tensor.cast %unpack : tensor<?x?xi32> to tensor<4x4xi32>
   check.expect_eq_const(%cast, dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : tensor<4x4xi32>) : tensor<4x4xi32>
@@ -53,7 +53,7 @@ func.func @static_unpack_simple_extract_slice() {
                                       [[[12, 13, 14], [0, 0, 0], [0, 0, 0]],
                                        [[15, 0, 0], [0, 0, 0], [0, 0, 0]]]]> : tensor<2x2x3x3xi32>
   %init = tensor.empty() : tensor<4x4xi32>
-  %unpack = tensor.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init
+  %unpack = linalg.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init
       : tensor<2x2x3x3xi32> -> tensor<4x4xi32>
   check.expect_eq_const(%unpack, dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : tensor<4x4xi32>) : tensor<4x4xi32>
   return
@@ -75,7 +75,7 @@ func.func @dynamic_unpack_simple_extract_slice() {
   %out_d0 = arith.subi %full_out_d0, %c2 : index
   %out_d1 = arith.subi %full_out_d1, %c2 : index
   %init = tensor.empty(%out_d0, %out_d1) : tensor<?x?xi32>
-  %unpack = tensor.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init
+  %unpack = linalg.unpack %iree_input inner_dims_pos = [0, 1] inner_tiles = [3, 3] into %init
       : tensor<?x?x3x3xi32> -> tensor<?x?xi32>
   %cast = tensor.cast %unpack : tensor<?x?xi32> to tensor<4x4xi32>
   check.expect_eq_const(%cast, dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : tensor<4x4xi32>) : tensor<4x4xi32>
@@ -91,7 +91,7 @@ func.func @static_unpack_large() {
   %source = tensor.cast %0 : tensor<?x?x?x?xi32> to tensor<4x16x32x16xi32>
 
   %init_unpack = tensor.empty() : tensor<128x256xi32>
-  %unpack = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack
+  %unpack = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack
       : tensor<4x16x32x16xi32> -> tensor<128x256xi32>
 
   %init_transpose = tensor.empty() : tensor<4x32x16x16xi32>
@@ -116,7 +116,7 @@ func.func @dynamic_unpack_large() {
   %packed_d0 = util.unfoldable_constant 128 : index
   %packed_d1 = util.unfoldable_constant 256 : index
   %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor<?x?xi32>
-  %unpack = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack
+  %unpack = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack
       : tensor<?x?x32x16xi32> -> tensor<?x?xi32>
   %cast_unpack = tensor.cast %unpack : tensor<?x?xi32> to tensor<128x256xi32>
 
@@ -144,7 +144,7 @@ func.func @dynamic_unpack_transpose_inner_dims_large() {
   %packed_d0 = util.unfoldable_constant 128 : index
   %packed_d1 = util.unfoldable_constant 256 : index
   %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor<?x?xi32>
-  %unpack = tensor.unpack %source inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack
+  %unpack = linalg.unpack %source inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack
       : tensor<?x?x16x32xi32> -> tensor<?x?xi32>
   %cast_unpack = tensor.cast %unpack : tensor<?x?xi32> to tensor<128x256xi32>
 
@@ -173,7 +173,7 @@ func.func @dynamic_unpack_transpose_outer_dims_large() {
   %packed_d0 = util.unfoldable_constant 128 : index
   %packed_d1 = util.unfoldable_constant 256 : index
   %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor<?x?xi32>
-  %unpack = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack
+  %unpack = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack
       : tensor<?x?x32x16xi32> -> tensor<?x?xi32>
   %cast_unpack = tensor.cast %unpack : tensor<?x?xi32> to tensor<128x256xi32>
 
@@ -202,7 +202,7 @@ func.func @dynamic_unpack_transpose_inner_and_outer_dims_large() {
   %packed_d0 = util.unfoldable_constant 128 : index
   %packed_d1 = util.unfoldable_constant 256 : index
   %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor<?x?xi32>
-  %unpack = tensor.unpack %source outer_dims_perm = [1, 0]  inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack
+  %unpack = linalg.unpack %source outer_dims_perm = [1, 0]  inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack
       : tensor<?x?x16x32xi32> -> tensor<?x?xi32>
   %cast_unpack = tensor.cast %unpack : tensor<?x?xi32> to tensor<128x256xi32>
 
@@ -229,7 +229,7 @@ func.func @static_unpack_extract_slice_large() {
   %source = tensor.cast %0 : tensor<?x?x?x?xi32> to tensor<4x16x32x16xi32>
 
   %init_unpack = tensor.empty() : tensor<100x250xi32>
-  %unpack = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack
+  %unpack = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack
       : tensor<4x16x32x16xi32> -> tensor<100x250xi32>
 
   %init_transpose = tensor.empty() : tensor<4x32x16x16xi32>
@@ -256,7 +256,7 @@ func.func @dynamic_unpack_extract_slice_large() {
   %packed_d0 = util.unfoldable_constant 100 : index
   %packed_d1 = util.unfoldable_constant 250 : index
   %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor<?x?xi32>
-  %unpack = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack
+  %unpack = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack
       : tensor<?x?x32x16xi32> -> tensor<?x?xi32>
   %cast_unpack = tensor.cast %unpack : tensor<?x?xi32> to tensor<100x250xi32>
 
@@ -284,7 +284,7 @@ func.func @static_unpack_extract_slice_transpose_inner_dims_large() {
   %source = tensor.cast %0 : tensor<?x?x?x?xi32> to tensor<4x16x16x32xi32>
 
   %init_unpack = tensor.empty() : tensor<100x250xi32>
-  %unpack = tensor.unpack %source
+  %unpack = linalg.unpack %source
       inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack
       : tensor<4x16x16x32xi32> -> tensor<100x250xi32>
 
@@ -310,7 +310,7 @@ func.func @static_unpack_extract_slice_transpose_outer_dims_large() {
   %source = tensor.cast %0 : tensor<?x?x?x?xi32> to tensor<16x4x32x16xi32>
 
   %init_unpack = tensor.empty() : tensor<100x250xi32>
-  %unpack = tensor.unpack %source outer_dims_perm = [1, 0]  inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack
+  %unpack = linalg.unpack %source outer_dims_perm = [1, 0]  inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack
       : tensor<16x4x32x16xi32> -> tensor<100x250xi32>
 
   %init_transpose = tensor.empty() : tensor<4x32x16x16xi32>
@@ -335,7 +335,7 @@ func.func @static_unpack_extract_slice_transpose_inner_and_outer_dims_large() {
   %source = tensor.cast %0 : tensor<?x?x?x?xi32> to tensor<16x4x16x32xi32>
 
   %init_unpack = tensor.empty() : tensor<100x250xi32>
-  %unpack = tensor.unpack %source
+  %unpack = linalg.unpack %source
       outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack
       : tensor<16x4x16x32xi32> -> tensor<100x250xi32>
 
@@ -364,7 +364,7 @@ func.func @dynamic_unpack_extract_slice_transpose_inner_dims_large() {
   %packed_d0 = util.unfoldable_constant 100 : index
   %packed_d1 = util.unfoldable_constant 250 : index
   %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor<?x?xi32>
-  %unpack = tensor.unpack %source
+  %unpack = linalg.unpack %source
       inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack
       : tensor<?x?x16x32xi32> -> tensor<?x?xi32>
   %cast_unpack = tensor.cast %unpack : tensor<?x?xi32> to tensor<100x250xi32>
@@ -395,7 +395,7 @@ func.func @dynamic_unpack_extract_slice_transpose_outer_dims_large() {
   %packed_d0 = util.unfoldable_constant 100 : index
   %packed_d1 = util.unfoldable_constant 250 : index
   %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor<?x?xi32>
-  %unpack = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack
+  %unpack = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %init_unpack
       : tensor<?x?x32x16xi32> -> tensor<?x?xi32>
   %cast_unpack = tensor.cast %unpack : tensor<?x?xi32> to tensor<100x250xi32>
 
@@ -425,7 +425,7 @@ func.func @dynamic_unpack_extract_slice_transpose_inner_and_outer_dims_large() {
   %packed_d0 = util.unfoldable_constant 100 : index
   %packed_d1 = util.unfoldable_constant 250 : index
   %init_unpack = tensor.empty(%packed_d0, %packed_d1) : tensor<?x?xi32>
-  %unpack = tensor.unpack %source
+  %unpack = linalg.unpack %source
       outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %init_unpack
       : tensor<?x?x16x32xi32> -> tensor<?x?xi32>
   %cast_unpack = tensor.cast %unpack : tensor<?x?xi32> to tensor<100x250xi32>