Skip to content

Commit

Permalink
Merge branch 'main' into skipROCmTest
Browse files Browse the repository at this point in the history
  • Loading branch information
petrex authored Jan 21, 2025
2 parents 460884d + ea7910e commit 5104916
Show file tree
Hide file tree
Showing 22 changed files with 284 additions and 300 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/float8_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ jobs:
gpu-arch-type: "cuda"
gpu-arch-version: "12.1"

permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
timeout: 60
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/nightly_smoke_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ concurrency:
cancel-in-progress: true

env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}

jobs:
test:
Expand All @@ -25,7 +25,9 @@ jobs:
gpu-arch-type: "cuda"
gpu-arch-version: "12.1"


permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: ${{ matrix.runs-on }}
Expand Down
16 changes: 6 additions & 10 deletions .github/workflows/regression_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@ concurrency:
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}

permissions:
id-token: write
contents: read

jobs:
test-nightly:
strategy:
Expand All @@ -37,16 +33,13 @@ jobs:
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cpu'
gpu-arch-type: "cpu"
gpu-arch-version: ""
- name: ROCM Nightly
runs-on: linux.rocm.gpu.2
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/rocm6.3'
gpu-arch-type: "rocm"
gpu-arch-version: "6.3"

permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
timeout: 120
no-sudo: ${{ matrix.gpu-arch-type == 'rocm' }}
runner: ${{ matrix.runs-on }}
gpu-arch-type: ${{ matrix.gpu-arch-type }}
gpu-arch-version: ${{ matrix.gpu-arch-version }}
Expand Down Expand Up @@ -81,6 +74,7 @@ jobs:
torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121'
gpu-arch-type: "cuda"
gpu-arch-version: "12.1"

- name: CPU 2.3
runs-on: linux.4xlarge
torch-spec: 'torch==2.3.0 --index-url https://download.pytorch.org/whl/cpu'
Expand Down Expand Up @@ -108,6 +102,8 @@ jobs:
conda create -n venv python=3.9 -y
conda activate venv
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
python -m pip install --upgrade pip
pip install ${{ matrix.torch-spec }}
pip install -r dev-requirements.txt
Expand Down
Empty file removed test/__init__.py
Empty file.
3 changes: 0 additions & 3 deletions test/dtypes/test_affine_quantized.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ def test_tensor_core_layout_transpose(self):
aqt_shape = aqt.shape
self.assertEqual(aqt_shape, shape)

@skip_if_rocm("ROCm development in progress")
@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
@common_utils.parametrize(
"apply_quant", get_quantization_functions(True, True, "cuda", True)
Expand Down Expand Up @@ -170,7 +169,6 @@ def apply_uint6_weight_only_quant(linear):

deregister_aqt_quantized_linear_dispatch(dispatch_condition)

@skip_if_rocm("ROCm development in progress")
@common_utils.parametrize("apply_quant", get_quantization_functions(True, True))
@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
def test_print_quantized_module(self, apply_quant):
Expand All @@ -183,7 +181,6 @@ class TestAffineQuantizedBasic(TestCase):
COMMON_DEVICES = ["cpu"] + (["cuda"] if torch.cuda.is_available() else [])
COMMON_DTYPES = [torch.bfloat16]

@skip_if_rocm("ROCm development in progress")
@common_utils.parametrize("device", COMMON_DEVICES)
@common_utils.parametrize("dtype", COMMON_DTYPES)
def test_flatten_unflatten(self, device, dtype):
Expand Down
1 change: 0 additions & 1 deletion test/dtypes/test_floatx.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ def test_to_copy_device(self, ebits, mbits):
@parametrize("ebits,mbits", _Floatx_DTYPES)
@parametrize("bias", [False, True])
@parametrize("dtype", [torch.half, torch.bfloat16])
@skip_if_rocm("ROCm development in progress")
@unittest.skipIf(is_fbcode(), reason="broken in fbcode")
def test_fpx_weight_only(self, ebits, mbits, bias, dtype):
N, OC, IC = 4, 256, 64
Expand Down
1 change: 0 additions & 1 deletion test/float8/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,6 @@ def test_linear_from_config_params(
@pytest.mark.parametrize("x_shape", [(16, 16), (2, 16, 16), (3, 2, 16, 16)])
@pytest.mark.parametrize("linear_bias", [True, False])
@unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
@skip_if_rocm("ROCm development in progress")
def test_linear_from_recipe(
self,
recipe_name,
Expand Down
1 change: 0 additions & 1 deletion test/hqq/test_hqq_affine.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ def test_hqq_plain_5bit(self):
ref_dot_product_error=0.000704,
)

@skip_if_rocm("ROCm development in progress")
def test_hqq_plain_4bit(self):
self._test_hqq(
dtype=torch.uint4,
Expand Down
7 changes: 1 addition & 6 deletions test/integration/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,6 @@ def test_per_token_linear_cpu(self):
self._test_per_token_linear_impl("cpu", dtype)

@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
@skip_if_rocm("ROCm development in progress")
def test_per_token_linear_cuda(self):
for dtype in (torch.float32, torch.float16, torch.bfloat16):
self._test_per_token_linear_impl("cuda", dtype)
Expand Down Expand Up @@ -689,7 +688,6 @@ def test_dequantize_int8_weight_only_quant_subclass(self, device, dtype):
@parameterized.expand(COMMON_DEVICE_DTYPE)
@unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "int4 requires torch nightly.")
# @unittest.skipIf(TORCH_VERSION_AT_LEAST_2_5, "int4 skipping 2.5+ for now")
@skip_if_rocm("ROCm development in progress")
def test_dequantize_int4_weight_only_quant_subclass(self, device, dtype):
if device == "cpu":
self.skipTest(f"Temporarily skipping for {device}")
Expand All @@ -709,7 +707,6 @@ def test_dequantize_int4_weight_only_quant_subclass(self, device, dtype):
@parameterized.expand(COMMON_DEVICE_DTYPE)
@unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "int4 requires torch nightly.")
# @unittest.skipIf(TORCH_VERSION_AT_LEAST_2_5, "int4 skipping 2.5+ for now")
@skip_if_rocm("ROCm development in progress")
def test_dequantize_int4_weight_only_quant_subclass_grouped(self, device, dtype):
if device == "cpu":
self.skipTest(f"Temporarily skipping for {device}")
Expand Down Expand Up @@ -903,7 +900,6 @@ def test_aq_float8_dynamic_quant_tensorwise_scaling_subclass(self, device, dtype
@parameterized.expand(COMMON_DEVICE_DTYPE)
@unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "int4 requires torch nightly.")
# @unittest.skipIf(TORCH_VERSION_AT_LEAST_2_5, "int4 skipping 2.5+ for now")
@skip_if_rocm("ROCm development in progress")
def test_int4_weight_only_quant_subclass(self, device, dtype):
if device == "cpu":
self.skipTest(f"Temporarily skipping for {device}")
Expand All @@ -923,7 +919,6 @@ def test_int4_weight_only_quant_subclass(self, device, dtype):
@parameterized.expand(COMMON_DEVICE_DTYPE)
@unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "int4 requires torch nightly.")
# @unittest.skipIf(TORCH_VERSION_AT_LEAST_2_5, "int4 skipping 2.5+ for now")
@skip_if_rocm("ROCm development in progress")
def test_int4_weight_only_quant_subclass_grouped(self, device, dtype):
if dtype != torch.bfloat16:
self.skipTest(f"Fails for {dtype}")
Expand Down Expand Up @@ -1827,7 +1822,7 @@ def test_autoquant_int4wo(self, device, dtype):
self.assertGreater(compute_error(ref, out), 20)

@parameterized.expand(COMMON_DEVICE_DTYPE)
@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
@unittest.skipIf(not is_sm_at_least_90(), "Need cuda arch greater than SM90")
@unittest.skipIf(
not TORCH_VERSION_AT_LEAST_2_5, "autoquant int4 option requires 2.5+."
)
Expand Down
1 change: 0 additions & 1 deletion test/kernel/test_galore_downproj.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@

@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU")
@pytest.mark.parametrize("M, N, rank, allow_tf32, fp8_fast_accum, dtype", TEST_CONFIGS)
@skip_if_rocm("ROCm development in progress")
def test_galore_downproj(M, N, rank, allow_tf32, fp8_fast_accum, dtype):
torch.backends.cuda.matmul.allow_tf32 = allow_tf32
MAX_DIFF = MAX_DIFF_tf32 if allow_tf32 else MAX_DIFF_no_tf32
Expand Down
1 change: 0 additions & 1 deletion test/prototype/test_awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ def test_awq_loading(device, qdtype):

@pytest.mark.skipif(not TORCH_VERSION_AT_LEAST_2_5, reason="requires nightly pytorch")
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
@skip_if_rocm("ROCm development in progress")
def test_save_weights_only():
dataset_size = 100
l1, l2, l3 = 512, 256, 128
Expand Down
1 change: 0 additions & 1 deletion test/prototype/test_low_bit_optim.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ class TestOptim(TestCase):
)
@parametrize("dtype", [torch.float32, torch.bfloat16])
@parametrize("device", _DEVICES)
@skip_if_rocm("ROCm development in progress")
def test_optim_smoke(self, optim_name, dtype, device):
if optim_name.endswith("Fp8") and device == "cuda":
if not TORCH_VERSION_AT_LEAST_2_4:
Expand Down
3 changes: 1 addition & 2 deletions test/prototype/test_splitk.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,13 @@
except ImportError:
triton_available = False

from torchao.utils import skip_if_compute_capability_less_than, skip_if_rocm

from torchao.utils import skip_if_compute_capability_less_than, skip_if_rocm

@unittest.skipIf(not triton_available, "Triton is required but not available")
@unittest.skipIf(not torch.cuda.is_available(), "CUDA is required")
class TestFP8Gemm(TestCase):
@skip_if_compute_capability_less_than(9.0)
@skip_if_rocm("ROCm development in progress")
def test_gemm_split_k(self):
dtype = torch.float16
qdtype = torch.float8_e4m3fn
Expand Down
1 change: 0 additions & 1 deletion test/quantization/test_galore_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ def test_galore_quantize_blockwise(dim1, dim2, dtype, signed, blocksize):
"dim1,dim2,dtype,signed,blocksize",
TEST_CONFIGS,
)
@skip_if_rocm("ROCm development in progress")
def test_galore_dequant_blockwise(dim1, dim2, dtype, signed, blocksize):
g = torch.randn(dim1, dim2, device="cuda", dtype=dtype) * 0.01

Expand Down
2 changes: 0 additions & 2 deletions test/quantization/test_marlin_qqq.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def setUp(self):
)

@pytest.mark.skipif(not torch.cuda.is_available(), reason="Need CUDA available")
@skip_if_rocm("ROCm development in progress")
def test_marlin_qqq(self):
output_ref = self.model(self.input)
for group_size in [-1, 128]:
Expand All @@ -67,7 +66,6 @@ def test_marlin_qqq(self):

@pytest.mark.skipif(not TORCH_VERSION_AT_LEAST_2_5, reason="Needs PyTorch 2.5+")
@pytest.mark.skipif(not torch.cuda.is_available(), reason="Need CUDA available")
@skip_if_rocm("ROCm development in progress")
def test_marlin_qqq_compile(self):
model_copy = copy.deepcopy(self.model)
model_copy.forward = torch.compile(model_copy.forward, fullgraph=True)
Expand Down
3 changes: 1 addition & 2 deletions test/sparsity/test_marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def setUp(self):
)

@pytest.mark.skipif(not torch.cuda.is_available(), reason="Need CUDA available")
@skip_if_rocm("ROCm development in progress")
def test_quant_sparse_marlin_layout_eager(self):
apply_fake_sparsity(self.model)
model_copy = copy.deepcopy(self.model)
Expand All @@ -49,13 +48,13 @@ def test_quant_sparse_marlin_layout_eager(self):
# Sparse + quantized
quantize_(self.model, int4_weight_only(layout=MarlinSparseLayout()))
sparse_result = self.model(self.input)

assert torch.allclose(
dense_result, sparse_result, atol=3e-1
), "Results are not close"

@pytest.mark.skipif(not TORCH_VERSION_AT_LEAST_2_5, reason="Needs PyTorch 2.5+")
@pytest.mark.skipif(not torch.cuda.is_available(), reason="Need CUDA available")
@skip_if_rocm("ROCm development in progress")
def test_quant_sparse_marlin_layout_compile(self):
apply_fake_sparsity(self.model)
model_copy = copy.deepcopy(self.model)
Expand Down
3 changes: 0 additions & 3 deletions test/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@
from torchao.sparsity.marlin import inject_24, marlin_24_workspace, pack_to_marlin_24
from torchao.utils import TORCH_VERSION_AT_LEAST_2_5, compute_max_diff, is_fbcode

if torch.version.hip is not None:
pytest.skip("Skipping the test in ROCm", allow_module_level=True)

if is_fbcode():
pytest.skip(
"Skipping the test in fbcode since we don't have TARGET file for kernels"
Expand Down
3 changes: 0 additions & 3 deletions test/test_s8s4_linear_cutlass.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
from torchao.quantization.utils import group_quantize_tensor_symmetric
from torchao.utils import compute_max_diff

if torch.version.hip is not None:
pytest.skip("Skipping the test in ROCm", allow_module_level=True)

S8S4_LINEAR_CUTLASS_DTYPE = [torch.float16, torch.bfloat16]
S8S4_LINEAR_CUTLASS_BATCH_SIZE = [1, 4, 8, 16, 32, 64]
S8S4_LINEAR_CUTLASS_SIZE_MNK = [
Expand Down
29 changes: 0 additions & 29 deletions test/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,11 @@
import functools
import unittest
from unittest.mock import patch

import pytest
import torch

from torchao.utils import TorchAOBaseTensor, torch_version_at_least


def skip_if_rocm(message=None):
"""Decorator to skip tests on ROCm platform with custom message.
Args:
message (str, optional): Additional information about why the test is skipped.
"""

def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if torch.version.hip is not None:
skip_message = "Skipping the test in ROCm"
if message:
skip_message += f": {message}"
pytest.skip(skip_message)
return func(*args, **kwargs)

return wrapper

# Handle both @skip_if_rocm and @skip_if_rocm() syntax
if callable(message):
func = message
message = None
return decorator(func)
return decorator


class TestTorchVersionAtLeast(unittest.TestCase):
def test_torch_version_at_least(self):
test_cases = [
Expand Down
Loading

0 comments on commit 5104916

Please sign in to comment.