use bitshifting for power of 2 rounding

pytorch · Feb 6, 2025 · ab93e18 · ab93e18
1 parent 34cc033
commit ab93e18
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 3 deletions.
diff --git a/torchao/float8/float8_scaling_utils.py b/torchao/float8/float8_scaling_utils.py
@@ -27,6 +27,7 @@
 )
 
 
+# TODO(danielvegamyhre): refactor to accept Float8LinearConfig directly
 def hp_tensor_to_float8_dynamic(
     hp_tensor: torch.Tensor,
     float8_dtype: torch.dtype,

diff --git a/torchao/float8/float8_utils.py b/torchao/float8/float8_utils.py
@@ -45,12 +45,15 @@ def amax_to_scale(
     amax = amax.to(torch.float64)
     if float8_dtype in FP8_TYPES:
         res = torch.finfo(float8_dtype).max / torch.clamp(amax, min=EPS)
+        res = res.to(torch.float32)
     else:
         raise ValueError(f"Unsupported float8_dtype: {float8_dtype}")
     if round_scales_to_power_of_2:
-        # rounds down to the nearest power of 2.
-        res = torch.exp2(torch.floor(torch.log2(res)))
-    return res.to(torch.float32)
+        # rounds down to the nearest power of 2
+        res = res.view(torch.int32)
+        res = (res >> 23) << 23
+        res = res.view(torch.float32)
+    return res
 
 
 @torch.no_grad()