Skip to content

Commit

Permalink
Bump IREE to 055ce1f (#1124)
Browse files Browse the repository at this point in the history
The main change is to update `tensor.pack/unpack to linalg.pack/unpack`
followed by upstream change
llvm/llvm-project#123902.
  • Loading branch information
yzhang93 authored Feb 21, 2025
1 parent 5cfa07a commit b005ec2
Show file tree
Hide file tree
Showing 15 changed files with 186 additions and 186 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ static FailureOr<SmallVector<Value>> getPackOrCopyOperands(
uint32_t currentLevel{0};
Operation *currentOp = input.value().getDefiningOp();
while (currentLevel < depthLevel && currentOp != nullptr) {
if (dyn_cast<tensor::PackOp>(currentOp)) {
if (dyn_cast<linalg::PackOp>(currentOp)) {
currentLevel++;
if (currentLevel == depthLevel) break;
} else if (dyn_cast<linalg::CopyOp>(currentOp)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace {

/// A utility function specific to this pass which, given a value `operand`,
/// traverses the def-chain till it finds a tensor.extract_slice. Currently,
/// the two producer ops that are allowed in the def-chain are tensor.pack and
/// the two producer ops that are allowed in the def-chain are linalg.pack and
/// linalg.copy ops. The 2 cases where it successfully finds and returns an
/// extract_slice (SLICE) are:
///
Expand All @@ -39,7 +39,7 @@ namespace {
static FailureOr<tensor::ExtractSliceOp> getTensorExtractSliceDefiningOp(
Value operand) {
// Roll back through all the pack or copy ops immediately preceding `operand`.
while (isa_and_present<tensor::PackOp, linalg::CopyOp>(
while (isa_and_present<linalg::PackOp, linalg::CopyOp>(
operand.getDefiningOp())) {
operand = operand.getDefiningOp()->getOperand(0);
}
Expand All @@ -49,7 +49,7 @@ static FailureOr<tensor::ExtractSliceOp> getTensorExtractSliceDefiningOp(
if (!sliceOp) return failure();

// Case 1 outlined above.
if (isa_and_present<tensor::PackOp, linalg::CopyOp>(
if (isa_and_present<linalg::PackOp, linalg::CopyOp>(
sliceOp.getSource().getDefiningOp())) {
return sliceOp;
}
Expand All @@ -60,7 +60,7 @@ static FailureOr<tensor::ExtractSliceOp> getTensorExtractSliceDefiningOp(
LoopLikeOpInterface loop = dyn_cast<LoopLikeOpInterface>(parent);
if (!loop) return failure();
Operation *operandParent = loop.getTiedLoopInit(blkArg)->getOwner();
if (isa_and_present<tensor::PackOp, linalg::CopyOp>(operandParent))
if (isa_and_present<linalg::PackOp, linalg::CopyOp>(operandParent))
return sliceOp;
}

Expand Down Expand Up @@ -110,7 +110,7 @@ void AMDAIEFuseProducerIntoLoopPass::runOnOperation() {
LoopLikeOpInterface loops = cast<LoopLikeOpInterface>(scfLoopOp);

// Based on the `fuseDepth`, we would greedily fuse the producers of a linalg
// computation op. Currently, we are limiting the producers to tensor.pack or
// computation op. Currently, we are limiting the producers to linalg.pack or
// linalg.copy ops.
for (unsigned depth = 1; depth <= fuseDepth; depth++) {
// Search the last compute op in the loop and its producer slices.
Expand Down Expand Up @@ -153,7 +153,7 @@ void AMDAIEFuseProducerIntoLoopPass::runOnOperation() {

// Case where operand of a generic op is a pack/copy op which is in a
// different block than the generic's block.
else if (isa_and_present<tensor::PackOp, linalg::CopyOp>(
else if (isa_and_present<linalg::PackOp, linalg::CopyOp>(
operand.getDefiningOp())) {
Operation *parent = operand.getDefiningOp();
Block *genericBlock = genericOp->getBlock();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ void AMDAIEPackAndTransposePass::runOnOperation() {
}

// Step 3. Pack Transpose
SmallVector<tensor::PackOp> packOps = packResult->packOps;
SmallVector<linalg::PackOp> packOps = packResult->packOps;
linalg::LinalgOp packedOp = packResult->packedLinalgOp;
SmallVector<tensor::UnPackOp> unpackOps = packResult->unPackOps;
SmallVector<linalg::UnPackOp> unpackOps = packResult->unPackOps;

if (packOps.size() != 3 || !packedOp || unpackOps.empty()) {
funcOp->emitOpError("failed to get correct pack and unpack ops");
Expand All @@ -122,7 +122,7 @@ void AMDAIEPackAndTransposePass::runOnOperation() {

for (auto [index, unpackEmpty, innerPerm, outerPerm] :
llvm::zip(packIndices, unpackArr, innerPermArr, outerPermArr)) {
tensor::UnPackOp unpackOp;
linalg::UnPackOp unpackOp;
if (unpackEmpty) {
unpackOp = unpackOps.back();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,8 @@ static bool isTilingReductionDimension(TilingInterface consumerOp,
}

static bool consumerToSkip(TilingInterface op) {
if (isa<linalg::CopyOp>(op) || isa<tensor::PackOp>(op) ||
isa<tensor::UnPackOp>(op))
if (isa<linalg::CopyOp>(op) || isa<linalg::PackOp>(op) ||
isa<linalg::UnPackOp>(op))
return true;
return false;
}
Expand Down Expand Up @@ -279,7 +279,7 @@ void AMDAIETileAndFusePass::runOnOperation() {
TilingInterface consumerOp;
funcOp->walk<WalkOrder::PostOrder, ReverseIterator>([&](TilingInterface op) {
// Find the next consumer op if it does not have loops OR it is from
// the skip ops list which currently contains linalg.copy and tensor.unpack.
// the skip ops list which currently contains linalg.copy and linalg.unpack.
if (op.getLoopIteratorTypes().empty() || consumerToSkip(op))
return WalkResult::advance();

Expand Down Expand Up @@ -356,7 +356,7 @@ void AMDAIETileAndFusePass::runOnOperation() {
bool fusableOp =
TypeSwitch<Operation *, bool>(originalProducer.getOwner())
// List ops that shouldnt be fused.
.Case<tensor::PackOp, tensor::PadOp, linalg::CopyOp,
.Case<linalg::PackOp, tensor::PadOp, linalg::CopyOp,
memref::CopyOp>([](Operation *) { return false; })
// Fuse all Linalg ops (can be generalized later)
.Default([&](Operation *op) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,7 @@ void addMLIRAIELoweringPasses(OpPassManager &pm) {
pm.addPass(createCanonicalizerPass());
pm.addPass(createConvertLinalgToLoopsPass());
pm.addPass(createLowerAffinePass());
pm.addPass(createConvertSCFToCFPass());
pm.addPass(createSCFToControlFlowPass());

{
OpPassManager &devicePM = pm.nest<xilinx::AIE::DeviceOp>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def AMDAIEFuseProducerIntoLoop :
let description = [{
Greedily fuse the producers of a linalg computation op based on the `fuseDepth`.
Currently, the two producer ops that are allowed in the defining op chain are
tensor.pack and linalg.copy ops.
linalg.pack and linalg.copy ops.
}];
let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIEFuseProducerIntoLoopPass()";
let options = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ func.func @matmul_static(%arg0 : tensor<1024x2048xi32>, %arg1 : tensor<2048x512x
%c0 = arith.constant 0 : index
%5 = tensor.empty() : tensor<1024x512xi32>
%6 = tensor.empty() : tensor<16x32x64x64xi32>
%pack = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %6 : tensor<1024x2048xi32> -> tensor<16x32x64x64xi32>
%pack = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %6 : tensor<1024x2048xi32> -> tensor<16x32x64x64xi32>
%7 = tensor.empty() : tensor<32x8x64x64xi32>
%pack_0 = tensor.pack %arg1 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %7 : tensor<2048x512xi32> -> tensor<32x8x64x64xi32>
%pack_0 = linalg.pack %arg1 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %7 : tensor<2048x512xi32> -> tensor<32x8x64x64xi32>
%8 = tensor.empty() : tensor<16x8x64x64xi32>
%9 = tensor.empty() : tensor<16x32x16x8x4x8xi32>
%pack_1 = tensor.pack %pack inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %9 : tensor<16x32x64x64xi32> -> tensor<16x32x16x8x4x8xi32>
%pack_1 = linalg.pack %pack inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %9 : tensor<16x32x64x64xi32> -> tensor<16x32x16x8x4x8xi32>
%10 = tensor.empty() : tensor<32x8x8x8x8x8xi32>
%pack_2 = tensor.pack %pack_0 inner_dims_pos = [3, 2] inner_tiles = [8, 8] into %10 : tensor<32x8x64x64xi32> -> tensor<32x8x8x8x8x8xi32>
%pack_2 = linalg.pack %pack_0 inner_dims_pos = [3, 2] inner_tiles = [8, 8] into %10 : tensor<32x8x64x64xi32> -> tensor<32x8x8x8x8x8xi32>
%11 = tensor.empty() : tensor<16x8x16x8x4x8xi32>
%12 = linalg.fill ins(%c0_i32 : i32) outs(%11 : tensor<16x8x16x8x4x8xi32>) -> tensor<16x8x16x8x4x8xi32>
%13 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]} ins(%pack_1, %pack_2 : tensor<16x32x16x8x4x8xi32>, tensor<32x8x8x8x8x8xi32>) outs(%12 : tensor<16x8x16x8x4x8xi32>) {
Expand All @@ -30,63 +30,63 @@ func.func @matmul_static(%arg0 : tensor<1024x2048xi32>, %arg1 : tensor<2048x512x
%15 = arith.addi %out, %14 : i32
linalg.yield %15 : i32
} -> tensor<16x8x16x8x4x8xi32>
%unpack = tensor.unpack %13 inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %8 : tensor<16x8x16x8x4x8xi32> -> tensor<16x8x64x64xi32>
%unpack_3 = tensor.unpack %unpack inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %5 : tensor<16x8x64x64xi32> -> tensor<1024x512xi32>
%unpack = linalg.unpack %13 inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %8 : tensor<16x8x16x8x4x8xi32> -> tensor<16x8x64x64xi32>
%unpack_3 = linalg.unpack %unpack inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %5 : tensor<16x8x64x64xi32> -> tensor<1024x512xi32>
return %unpack_3 : tensor<1024x512xi32>
}

// LINALG-INPUT-OUTPUT-NOT: memref.alloc
// LINALG-INPUT-OUTPUT: tensor.pack
// LINALG-INPUT-OUTPUT: linalg.pack
// LINALG-INPUT-OUTPUT-NOT: memref.alloc
// LINALG-INPUT-OUTPUT: tensor.pack
// LINALG-INPUT-OUTPUT: linalg.pack
// LINALG-INPUT-OUTPUT: memref.alloc() : memref<16x32x16x8x4x8xi32, 2 : i32>
// LINALG-INPUT-OUTPUT: bufferization.to_tensor
// LINALG-INPUT-OUTPUT: tensor.pack
// LINALG-INPUT-OUTPUT: linalg.pack
// LINALG-INPUT-OUTPUT: memref.alloc() : memref<32x8x8x8x8x8xi32, 2 : i32>
// LINALG-INPUT-OUTPUT: bufferization.to_tensor
// LINALG-INPUT-OUTPUT: tensor.pack
// LINALG-INPUT-OUTPUT: linalg.pack
// LINALG-INPUT-OUTPUT: memref.alloc() : memref<16x8x16x8x4x8xi32, 2 : i32>
// LINALG-INPUT-OUTPUT: bufferization.to_tensor
// LINALG-INPUT-OUTPUT: linalg.fill
// LINALG-INPUT-OUTPUT: linalg.generic

// LINALG-INPUT-NOT: memref.alloc
// LINALG-INPUT: tensor.pack
// LINALG-INPUT: linalg.pack
// LINALG-INPUT-NOT: memref.alloc
// LINALG-INPUT: tensor.pack
// LINALG-INPUT: linalg.pack
// LINALG-INPUT: memref.alloc() : memref<16x32x16x8x4x8xi32, 2 : i32>
// LINALG-INPUT: bufferization.to_tensor
// LINALG-INPUT: tensor.pack
// LINALG-INPUT: linalg.pack
// LINALG-INPUT: memref.alloc() : memref<32x8x8x8x8x8xi32, 2 : i32>
// LINALG-INPUT: bufferization.to_tensor
// LINALG-INPUT: tensor.pack
// LINALG-INPUT: linalg.pack
// LINALG-INPUT-NOT: memref.alloc
// LINALG-INPUT: linalg.fill
// LINALG-INPUT: linalg.generic

// LINALG-OUTPUT-NOT: memref.alloc
// LINALG-OUTPUT: tensor.pack
// LINALG-OUTPUT: linalg.pack
// LINALG-OUTPUT-NOT: memref.alloc
// LINALG-OUTPUT: tensor.pack
// LINALG-OUTPUT: linalg.pack
// LINALG-OUTPUT-NOT: memref.alloc
// LINALG-OUTPUT: tensor.pack
// LINALG-OUTPUT: linalg.pack
// LINALG-OUTPUT-NOT: memref.alloc
// LINALG-OUTPUT: tensor.pack
// LINALG-OUTPUT: linalg.pack
// LINALG-OUTPUT: memref.alloc() : memref<16x8x16x8x4x8xi32, 2 : i32>
// LINALG-OUTPUT: bufferization.to_tensor
// LINALG-OUTPUT: linalg.fill
// LINALG-OUTPUT: linalg.generic

// PACK-INPUT: memref.alloc() : memref<16x32x64x64xi32, 1 : i32>
// PACK-INPUT: bufferization.to_tensor
// PACK-INPUT: tensor.pack
// PACK-INPUT: linalg.pack
// PACK-INPUT: memref.alloc() : memref<32x8x64x64xi32, 1 : i32>
// PACK-INPUT: bufferization.to_tensor
// PACK-INPUT: tensor.pack
// PACK-INPUT: linalg.pack
// PACK-INPUT-NOT: memref.alloc
// PACK-INPUT: tensor.pack
// PACK-INPUT: linalg.pack
// PACK-INPUT-NOT: memref.alloc
// PACK-INPUT: tensor.pack
// PACK-INPUT: linalg.pack
// PACK-INPUT-NOT: memref.alloc
// PACK-INPUT: linalg.fill
// PACK-INPUT: linalg.generic
Expand All @@ -105,14 +105,14 @@ func.func @matmul_elementwise(%arg0: tensor<1024x512xi8>, %arg1: tensor<512x1024
%extracted_slice_0 = tensor.extract_slice %arg1[0, %arg4] [512, 64] [1, 1] : tensor<512x1024xi8> to tensor<512x64xi8>
%extracted_slice_1 = tensor.extract_slice %0[%arg3, %arg4] [64, 64] [1, 1] : tensor<1024x1024xi32> to tensor<64x64xi32>
%2 = tensor.empty() : tensor<1x16x64x32xi8>
%pack = tensor.pack %extracted_slice inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %2 : tensor<64x512xi8> -> tensor<1x16x64x32xi8>
%pack = linalg.pack %extracted_slice inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %2 : tensor<64x512xi8> -> tensor<1x16x64x32xi8>
%3 = tensor.empty() : tensor<16x1x32x64xi8>
%pack_2 = tensor.pack %extracted_slice_0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %3 : tensor<512x64xi8> -> tensor<16x1x32x64xi8>
%pack_2 = linalg.pack %extracted_slice_0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %3 : tensor<512x64xi8> -> tensor<16x1x32x64xi8>
%4 = tensor.empty() : tensor<1x1x64x64xi32>
%5 = tensor.empty() : tensor<1x16x4x16x4x8xi8>
%pack_3 = tensor.pack %pack outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %5 : tensor<1x16x64x32xi8> -> tensor<1x16x4x16x4x8xi8>
%pack_3 = linalg.pack %pack outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %5 : tensor<1x16x64x32xi8> -> tensor<1x16x4x16x4x8xi8>
%6 = tensor.empty() : tensor<16x1x8x4x8x8xi8>
%pack_4 = tensor.pack %pack_2 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [8, 8] into %6 : tensor<16x1x32x64xi8> -> tensor<16x1x8x4x8x8xi8>
%pack_4 = linalg.pack %pack_2 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [8, 8] into %6 : tensor<16x1x32x64xi8> -> tensor<16x1x8x4x8x8xi8>
%7 = tensor.empty() : tensor<1x1x8x16x4x8xi32>
%8 = linalg.fill ins(%c0_i32 : i32) outs(%7 : tensor<1x1x8x16x4x8xi32>) -> tensor<1x1x8x16x4x8xi32>
%9 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]} ins(%pack_3, %pack_4 : tensor<1x16x4x16x4x8xi8>, tensor<16x1x8x4x8x8xi8>) outs(%8 : tensor<1x1x8x16x4x8xi32>) {
Expand All @@ -125,49 +125,49 @@ func.func @matmul_elementwise(%arg0: tensor<1024x512xi8>, %arg1: tensor<512x1024
} -> tensor<1x1x8x16x4x8xi32>
%extracted_slice_5 = tensor.extract_slice %arg2[%arg3, %arg4] [64, 64] [1, 1] : tensor<1024x1024xi32> to tensor<64x64xi32>
%extracted_slice_6 = tensor.extract_slice %arg5[%arg3, %arg4] [64, 64] [1, 1] : tensor<1024x1024xi32> to tensor<64x64xi32>
%pack_7 = tensor.pack %extracted_slice_6 inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %4 : tensor<64x64xi32> -> tensor<1x1x64x64xi32>
%pack_8 = tensor.pack %extracted_slice_5 inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %4 : tensor<64x64xi32> -> tensor<1x1x64x64xi32>
%pack_9 = tensor.pack %pack_7 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %7 : tensor<1x1x64x64xi32> -> tensor<1x1x8x16x4x8xi32>
%pack_10 = tensor.pack %pack_8 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %7 : tensor<1x1x64x64xi32> -> tensor<1x1x8x16x4x8xi32>
%pack_7 = linalg.pack %extracted_slice_6 inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %4 : tensor<64x64xi32> -> tensor<1x1x64x64xi32>
%pack_8 = linalg.pack %extracted_slice_5 inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %4 : tensor<64x64xi32> -> tensor<1x1x64x64xi32>
%pack_9 = linalg.pack %pack_7 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %7 : tensor<1x1x64x64xi32> -> tensor<1x1x8x16x4x8xi32>
%pack_10 = linalg.pack %pack_8 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %7 : tensor<1x1x64x64xi32> -> tensor<1x1x8x16x4x8xi32>
%10 = linalg.generic {indexing_maps = [#map3, #map3, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%9, %pack_10 : tensor<1x1x8x16x4x8xi32>, tensor<1x1x8x16x4x8xi32>) outs(%pack_9 : tensor<1x1x8x16x4x8xi32>) {
^bb0(%in: i32, %in_12: i32, %out: i32):
%11 = arith.addi %in, %in_12 : i32
linalg.yield %11 : i32
} -> tensor<1x1x8x16x4x8xi32>
%unpack = tensor.unpack %10 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %4 : tensor<1x1x8x16x4x8xi32> -> tensor<1x1x64x64xi32>
%unpack_11 = tensor.unpack %unpack inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %extracted_slice_1 : tensor<1x1x64x64xi32> -> tensor<64x64xi32>
%unpack = linalg.unpack %10 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %4 : tensor<1x1x8x16x4x8xi32> -> tensor<1x1x64x64xi32>
%unpack_11 = linalg.unpack %unpack inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %extracted_slice_1 : tensor<1x1x64x64xi32> -> tensor<64x64xi32>
scf.forall.in_parallel {
tensor.parallel_insert_slice %unpack_11 into %arg5[%arg3, %arg4] [64, 64] [1, 1] : tensor<64x64xi32> into tensor<1024x1024xi32>
}
} {mapping = [#gpu.block<y>, #gpu.block<x>]}
return %1 : tensor<1024x1024xi32>
}

// ELEMENTWISE-INPUT-COUNT-4: tensor.pack
// ELEMENTWISE-INPUT-COUNT-4: linalg.pack
// ELEMENTWISE-INPUT: linalg.fill
// ELEMENTWISE-INPUT: linalg.generic
// ELEMENTWISE-INPUT-NOT: memref.alloc
// ELEMENTWISE-INPUT: tensor.pack
// ELEMENTWISE-INPUT: linalg.pack
// ELEMENTWISE-INPUT-NOT: memref.alloc
// ELEMENTWISE-INPUT: tensor.pack
// ELEMENTWISE-INPUT: linalg.pack
// ELEMENTWISE-INPUT-NOT: memref.alloc
// ELEMENTWISE-INPUT: tensor.pack
// ELEMENTWISE-INPUT: linalg.pack
// ELEMENTWISE-INPUT: memref.alloc() : memref<1x1x8x16x4x8xi32, 2 : i32>
// ELEMENTWISE-INPUT: bufferization.to_tensor
// ELEMENTWISE-INPUT: tensor.pack
// ELEMENTWISE-INPUT: linalg.pack
// ELEMENTWISE-INPUT: linalg.generic

// ELEMENTWISE-INPUT-OUTPUT-COUNT-4: tensor.pack
// ELEMENTWISE-INPUT-OUTPUT-COUNT-4: linalg.pack
// ELEMENTWISE-INPUT-OUTPUT: linalg.fill
// ELEMENTWISE-INPUT-OUTPUT: linalg.generic
// ELEMENTWISE-INPUT-OUTPUT-NOT: memref.alloc
// ELEMENTWISE-INPUT-OUTPUT: tensor.pack
// ELEMENTWISE-INPUT-OUTPUT: linalg.pack
// ELEMENTWISE-INPUT-OUTPUT-NOT: memref.alloc
// ELEMENTWISE-INPUT-OUTPUT: tensor.pack
// ELEMENTWISE-INPUT-OUTPUT: linalg.pack
// ELEMENTWISE-INPUT-OUTPUT: memref.alloc() : memref<1x1x8x16x4x8xi32, 2 : i32>
// ELEMENTWISE-INPUT-OUTPUT: bufferization.to_tensor
// ELEMENTWISE-INPUT-OUTPUT: tensor.pack
// ELEMENTWISE-INPUT-OUTPUT: linalg.pack
// ELEMENTWISE-INPUT-OUTPUT: memref.alloc() : memref<1x1x8x16x4x8xi32, 2 : i32>
// ELEMENTWISE-INPUT-OUTPUT: bufferization.to_tensor
// ELEMENTWISE-INPUT-OUTPUT: tensor.pack
// ELEMENTWISE-INPUT-OUTPUT: linalg.pack
// ELEMENTWISE-INPUT-OUTPUT: linalg.generic
Loading

0 comments on commit b005ec2

Please sign in to comment.