feat(library): add quantize_symmetric op

dacorvo · dacorvo · commit 89e365aea616 · 2024-02-09T16:17:43.000+01:00
diff --git a/quanto/library/ops.py b/quanto/library/ops.py
@@ -53,4 +53,5 @@ def impl(*args, **kwargs):
         return getattr(torch.ops.quanto_py, name)(*args, **kwargs)
 
 
+define("quantize_symmetric", "(Tensor self, Tensor scale, ScalarType dtype) -> Tensor")
 define("unpack", "(Tensor self, int bits) -> Tensor")
diff --git a/quanto/library/python/__init__.py b/quanto/library/python/__init__.py
@@ -1 +1,2 @@
+from .quantize import *
 from .unpack import *
diff --git a/quanto/library/python/quantize.py b/quanto/library/python/quantize.py
@@ -0,0 +1,15 @@
+import torch
+
+
+def dtype_info(dtype):
+    info = torch.finfo if dtype.is_floating_point else torch.iinfo
+    return info(dtype)
+
+
+@torch.library.impl("quanto_py::quantize_symmetric", "default")
+def quantize_symmetric(t: torch.Tensor, scale: torch.Tensor, dtype: torch.Tensor.dtype):
+    info = dtype_info(dtype)
+    data = t / scale
+    if not dtype.is_floating_point:
+        data = torch.round(data)
+    return torch.clamp(data, min=info.min, max=info.max).to(dtype)
diff --git a/test/library/test_quantize.py b/test/library/test_quantize.py
@@ -0,0 +1,32 @@
+import pytest
+import torch
+from helpers import random_tensor
+
+
+@pytest.mark.parametrize("shape", [(12,), (32, 32)], ids=["vector", "matrix"])
+@pytest.mark.parametrize("src_dtype", [torch.float32, torch.float16], ids=["fp32", "fp16"])
+@pytest.mark.parametrize("dst_dtype", [torch.int8, torch.float8_e4m3fn], ids=["int8", "float8"])
+@pytest.mark.parametrize("per_axis", [True, False], ids=["per-axis", "per-tensor"])
+def test_quantize_symmetric(shape, src_dtype, dst_dtype, per_axis, device):
+    if device.type == "mps" and dst_dtype != torch.int8:
+        pytest.skip("float8 types are not supported on MPS device")
+    # Craft manually data and scale
+    if dst_dtype.is_floating_point:
+        data = random_tensor(shape, torch.float16).to(dst_dtype).to(device)
+    else:
+        data = torch.randint(-127, 127, shape, dtype=dst_dtype).to(device)
+    if per_axis:
+        scale_shape = (shape[0],) + (1,) * (len(shape) - 1)
+    else:
+        scale_shape = ()
+    scale = torch.rand(scale_shape, dtype=src_dtype).to(device)
+    # Dequantize to obtain a float tensor
+    t = data.to(src_dtype) * scale
+    qdata = torch.ops.quanto.quantize_symmetric(t, scale, dst_dtype)
+    assert qdata.dtype == dst_dtype
+    assert qdata.shape == shape
+    # float8 tensors direct comparison is not supported yet on CPU
+    if dst_dtype.is_floating_point:
+        assert torch.equal(qdata.to(torch.float16), data.to(torch.float16))
+    else:
+        assert torch.equal(qdata, data)

Original file line number	Diff line number	Diff line change
`@@ -53,4 +53,5 @@ def impl(args, *kwargs):`
`53`	`53`	`return getattr(torch.ops.quanto_py, name)(args, *kwargs)`
`54`	`54`
`55`	`55`
	`56`	`+define("quantize_symmetric", "(Tensor self, Tensor scale, ScalarType dtype) -> Tensor")`
`56`	`57`	`define("unpack", "(Tensor self, int bits) -> Tensor")`
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
	`1`	`+from .quantize import *`
`1`	`2`	`from .unpack import *`