From 1e7ed466ebfcc4fb9f56485a5c7972493989b603 Mon Sep 17 00:00:00 2001
From: "bobrenjc93 (Meta Employee)" <bobren@meta.com>
Date: Tue, 7 Jan 2025 11:04:27 -0800
Subject: [PATCH 1/7] Migrate from Tuple -> tuple in benchmarks (#144259)

Summary:
X-link: https://github.com/pytorch/pytorch/pull/144259
Approved by: https://github.com/yanboliang

Reviewed By: clee2000

Differential Revision: D67905636

Pulled By: bobrenjc93

fbshipit-source-id: adaec218cc88d0a5d63f0d41058076a3ea33c64b
---
 userbenchmark/dynamo/dynamobench/common.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/userbenchmark/dynamo/dynamobench/common.py b/userbenchmark/dynamo/dynamobench/common.py
index e0aae9cdae..086cd68dc9 100644
--- a/userbenchmark/dynamo/dynamobench/common.py
+++ b/userbenchmark/dynamo/dynamobench/common.py
@@ -32,7 +32,6 @@
     NamedTuple,
     Optional,
     Sequence,
-    Tuple,
     Type,
     TYPE_CHECKING,
 )
@@ -746,7 +745,7 @@ def timed(
     return (time_total, result) if return_result else time_total
 
 
-def _normalize_bench_inputs(example_inputs) -> Tuple[Tuple[Any], Mapping[str, Any]]:
+def _normalize_bench_inputs(example_inputs) -> tuple[tuple[Any], Mapping[str, Any]]:
     # NOTE(bowbao): For huggingface benchmark, example_inputs are formatted as dictionary,
     # and consumed like `model(**example_inputs)`.
     # For other benchmarks, example_inputs are formatted as tuple and consumed

From 31cc1a499786be5d6e309334de8263c3834b2749 Mon Sep 17 00:00:00 2001
From: "Oguz Ulgen (Meta Employee)" <oulgen@meta.com>
Date: Wed, 8 Jan 2025 09:46:34 -0800
Subject: [PATCH 2/7] Introduce cache hot loading APIs (a.k.a. "Mega-cache")
 (#143341)

Summary:
This PR essentially introduces two new APIs
* torch.compiler.save_cache_artifacts
* torch.compiler.load_cache_artifacts

which aim to create a mega cache experience where the user can start collecting cache artifacts, and later call the save API to fetch them. In the next attempt, the user can "hot load" the cache artifacts via the load function.

This bundling approach reduces the need to rely on porting individual files one by one, or relying on many network requests.

Note that these APIs CANNOT log to structured logging as these functions will be called before and after compilation, as opposed to during compilation. Due to this limitation, the API returns a struct that the user can log with.

X-link: https://github.com/pytorch/pytorch/pull/143341
Approved by: https://github.com/jansel

Reviewed By: clee2000

Differential Revision: D67927135

Pulled By: oulgen

fbshipit-source-id: 00c4f3955bd098a61b40760a9a29cdf58caf04cf
---
 userbenchmark/dynamo/dynamobench/_dynamo/testing.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/userbenchmark/dynamo/dynamobench/_dynamo/testing.py b/userbenchmark/dynamo/dynamobench/_dynamo/testing.py
index d401c83f0a..3f5dd0255b 100644
--- a/userbenchmark/dynamo/dynamobench/_dynamo/testing.py
+++ b/userbenchmark/dynamo/dynamobench/_dynamo/testing.py
@@ -255,6 +255,11 @@ def __call__(
         self.graphs.append(gm)
         return lookup_backend(self.backend)(gm, example_inputs)
 
+    def clear(self) -> None:
+        self.frame_count = 0
+        self.op_count = 0
+        self.graphs = []
+
 
 # Equivalent to backend="eager", but also records graphs that
 # we can assert on

From d15a3cc5a5546b7bdc146582dba7c832ecd815d5 Mon Sep 17 00:00:00 2001
From: Aaron Gokaslan <aaronGokaslan@gmail.com>
Date: Wed, 8 Jan 2025 14:09:54 -0800
Subject: [PATCH 3/7] Remove unnecessary copy of gradients in util (#144329)

Summary:
No need to copy gradients to CPU too

X-link: https://github.com/pytorch/pytorch/pull/144329
Approved by: https://github.com/awgu, https://github.com/cyyever

Reviewed By: clee2000

Differential Revision: D67943148

fbshipit-source-id: dea67477dbc0a791cf4a8c72ca7c9e4d4381cc54
---
 userbenchmark/dynamo/dynamobench/_dynamo/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/userbenchmark/dynamo/dynamobench/_dynamo/utils.py b/userbenchmark/dynamo/dynamobench/_dynamo/utils.py
index 12a9376d70..5cbba76a9a 100644
--- a/userbenchmark/dynamo/dynamobench/_dynamo/utils.py
+++ b/userbenchmark/dynamo/dynamobench/_dynamo/utils.py
@@ -2492,7 +2492,7 @@ def to_tensor(t):
                 return True
             score = torch.nn.functional.cosine_similarity(ref, res, dim=0, eps=1e-6)
             if score < 0.99:
-                log.warning("Similarity score=%s", score.cpu().detach().item())
+                log.warning("Similarity score=%s", score.detach().cpu().item())
             return score >= 0.99
         else:
             if not exact_dtype:

From cfe2709e1ecc16bc27caf81906304699b486dc02 Mon Sep 17 00:00:00 2001
From: Xuehai Pan <XuehaiPan@pku.edu.cn>
Date: Wed, 8 Jan 2025 21:10:41 -0800
Subject: [PATCH 4/7] fix ruff rule E226: add missing whitespace around
 operator in f-strings (#144415)

Summary:
The fixes are generated by:

```bash
ruff check --fix --preview --unsafe-fixes --select=E226 .
lintrunner -a --take "RUFF,PYFMT" --all-files
```

X-link: https://github.com/pytorch/pytorch/pull/144415
Approved by: https://github.com/huydhn, https://github.com/Skylion007

Reviewed By: clee2000

Differential Revision: D67957574

fbshipit-source-id: 176af7b581aa8686d72a533af9cfe91a7728ac75
---
 userbenchmark/dynamo/dynamobench/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/userbenchmark/dynamo/dynamobench/common.py b/userbenchmark/dynamo/dynamobench/common.py
index 086cd68dc9..c1d11c596b 100644
--- a/userbenchmark/dynamo/dynamobench/common.py
+++ b/userbenchmark/dynamo/dynamobench/common.py
@@ -676,7 +676,7 @@ def print_summary_table(data, print_dataframe=False):
                 print(col.ljust(width), f"mean={data[col].mean():.3f}x")
             elif col in ("accuracy"):
                 pass_rate = (data[col] == "pass").mean()
-                print(col.ljust(width), f"pass_rate={100*pass_rate:.2f}%")
+                print(col.ljust(width), f"pass_rate={100 * pass_rate:.2f}%")
             else:
                 cdata = data[col]
                 print(
@@ -4993,7 +4993,7 @@ def detect_and_mark_batch(t):
         for i, name in enumerate(model_names):
             current_name = name
             if args.progress:
-                print(f"Running model {i+1}/{nmodels}", flush=True)
+                print(f"Running model {i + 1}/{nmodels}", flush=True)
 
             try:
                 timeout = args.timeout

From af94f0e64e5d0fe82c0c2acead6d74bfbeb56692 Mon Sep 17 00:00:00 2001
From: Sergii Dymchenko <sdym@fb.com>
Date: Thu, 9 Jan 2025 14:19:28 -0800
Subject: [PATCH 5/7] Update upload-artifact action to v4 (#2564)

Summary: Pull Request resolved: https://github.com/pytorch/benchmark/pull/2564

Reviewed By: huydhn

Differential Revision: D67988759

Pulled By: kit1980

fbshipit-source-id: 48bf851d11b3f8d6e6b912d3f0d813c5387a8433
---
 .github/workflows/userbenchmark-a100-bisection.yml      | 2 +-
 .github/workflows/userbenchmark-a100-release.yml        | 2 +-
 .github/workflows/userbenchmark-a100.yml                | 2 +-
 .github/workflows/userbenchmark-ai-cluster.yml          | 2 +-
 .github/workflows/userbenchmark-c5-24xlarge.yml         | 2 +-
 .github/workflows/userbenchmark-ibmcloud-testrunner.yml | 4 ++--
 .github/workflows/userbenchmark-regression-detector.yml | 2 +-
 .github/workflows/userbenchmark-t4-metal.yml            | 2 +-
 .github/workflows/v2-bisection.yml                      | 2 +-
 .github/workflows/v2-nightly.yml                        | 2 +-
 .github/workflows/v3-bisection.yml                      | 2 +-
 .github/workflows/v3-nightly.yml                        | 2 +-
 12 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/userbenchmark-a100-bisection.yml b/.github/workflows/userbenchmark-a100-bisection.yml
index e133f194c8..d5f9133894 100644
--- a/.github/workflows/userbenchmark-a100-bisection.yml
+++ b/.github/workflows/userbenchmark-a100-bisection.yml
@@ -90,7 +90,7 @@ jobs:
           cp -r "${BISECT_WORKDIR}" ../bisection-result
       - name: Upload artifact
         if: always()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: Bisection result
           path: bisection-result/
diff --git a/.github/workflows/userbenchmark-a100-release.yml b/.github/workflows/userbenchmark-a100-release.yml
index 90333dddc6..42f47b7bab 100644
--- a/.github/workflows/userbenchmark-a100-release.yml
+++ b/.github/workflows/userbenchmark-a100-release.yml
@@ -43,7 +43,7 @@ jobs:
           cp -r ./.userbenchmark/release-test ../benchmark-output
 
       - name: Upload artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: TorchBench result
           path: benchmark-output/
diff --git a/.github/workflows/userbenchmark-a100.yml b/.github/workflows/userbenchmark-a100.yml
index 70f8235efe..da542ec605 100644
--- a/.github/workflows/userbenchmark-a100.yml
+++ b/.github/workflows/userbenchmark-a100.yml
@@ -59,7 +59,7 @@ jobs:
             cp -r ./.userbenchmark/"${{ github.event.inputs.userbenchmark_name }}" ../benchmark-output
           fi
       - name: Upload artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: TorchBench result
           path: benchmark-output/
diff --git a/.github/workflows/userbenchmark-ai-cluster.yml b/.github/workflows/userbenchmark-ai-cluster.yml
index 0991c40f72..777f94a7ed 100644
--- a/.github/workflows/userbenchmark-ai-cluster.yml
+++ b/.github/workflows/userbenchmark-ai-cluster.yml
@@ -42,7 +42,7 @@ jobs:
           python ./.github/scripts/userbenchmark/schedule-benchmarks.py --platform ${PLATFORM_NAME}
           cp -r ./.userbenchmark ../benchmark-output
       - name: Upload artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: TorchBench result
           path: benchmark-output/
diff --git a/.github/workflows/userbenchmark-c5-24xlarge.yml b/.github/workflows/userbenchmark-c5-24xlarge.yml
index 0824197fd9..c7d0ce15cd 100644
--- a/.github/workflows/userbenchmark-c5-24xlarge.yml
+++ b/.github/workflows/userbenchmark-c5-24xlarge.yml
@@ -89,7 +89,7 @@ jobs:
             python ./scripts/userbenchmark/upload_s3.py --upload-file "${r}" --userbenchmark_platform "${PLATFORM_NAME}"
           done
       - name: Upload artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: TorchBench result
           path: benchmark-output/
diff --git a/.github/workflows/userbenchmark-ibmcloud-testrunner.yml b/.github/workflows/userbenchmark-ibmcloud-testrunner.yml
index 7a94d5a150..ec894c551b 100644
--- a/.github/workflows/userbenchmark-ibmcloud-testrunner.yml
+++ b/.github/workflows/userbenchmark-ibmcloud-testrunner.yml
@@ -1,4 +1,4 @@
-name: TorchBench Userbenchmark test on IBM Cloud 
+name: TorchBench Userbenchmark test on IBM Cloud
 on:
   workflow_dispatch:
     inputs:
@@ -85,7 +85,7 @@ jobs:
             python ./scripts/userbenchmark/upload_s3.py --upload-file "${r}" --userbenchmark_platform "${PLATFORM_NAME}"
           done
       - name: Upload artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: TorchBench result
           path: benchmark-output/
diff --git a/.github/workflows/userbenchmark-regression-detector.yml b/.github/workflows/userbenchmark-regression-detector.yml
index 5e1f7fcc04..7fbffa0d4a 100644
--- a/.github/workflows/userbenchmark-regression-detector.yml
+++ b/.github/workflows/userbenchmark-regression-detector.yml
@@ -95,7 +95,7 @@ jobs:
             python ./scripts/userbenchmark/upload_s3.py --upload-file "${r}" --userbenchmark_platform "${PLATFORM_NAME}"
           done
       - name: Upload artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: TorchBench result
           path: benchmark-output/
diff --git a/.github/workflows/userbenchmark-t4-metal.yml b/.github/workflows/userbenchmark-t4-metal.yml
index 6748253d6e..d0b1f4a277 100644
--- a/.github/workflows/userbenchmark-t4-metal.yml
+++ b/.github/workflows/userbenchmark-t4-metal.yml
@@ -79,7 +79,7 @@ jobs:
             cp -r ./.userbenchmark/"${{ github.event.inputs.userbenchmark_name }}" ../benchmark-output
           fi
       - name: Upload artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: TorchBench result
           path: benchmark-output/
diff --git a/.github/workflows/v2-bisection.yml b/.github/workflows/v2-bisection.yml
index 75ace9d7d6..fedf4d77dd 100644
--- a/.github/workflows/v2-bisection.yml
+++ b/.github/workflows/v2-bisection.yml
@@ -59,7 +59,7 @@ jobs:
           labels: |
             torchbench-perf-report
       - name: Upload artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: Bisection result
           path: bisection-result/
diff --git a/.github/workflows/v2-nightly.yml b/.github/workflows/v2-nightly.yml
index 8fc40295f5..9833245cca 100644
--- a/.github/workflows/v2-nightly.yml
+++ b/.github/workflows/v2-nightly.yml
@@ -104,7 +104,7 @@ jobs:
           # Upload result to Scribe
           python scripts/upload_scribe_${CONFIG_VER}.py --pytest_bench_json "${LATEST_RESULT}" --torchbench_score_file "${SCORE_FILE}"
       - name: Upload artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: Benchmark result
           path: benchmark-output/
diff --git a/.github/workflows/v3-bisection.yml b/.github/workflows/v3-bisection.yml
index e68cbee809..5ee412a24f 100644
--- a/.github/workflows/v3-bisection.yml
+++ b/.github/workflows/v3-bisection.yml
@@ -88,7 +88,7 @@ jobs:
             torchbench-perf-report
       - name: Upload artifact
         if: always()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: Bisection result
           path: bisection-result/
diff --git a/.github/workflows/v3-nightly.yml b/.github/workflows/v3-nightly.yml
index 3b512693cc..66ffd0b9ee 100644
--- a/.github/workflows/v3-nightly.yml
+++ b/.github/workflows/v3-nightly.yml
@@ -90,7 +90,7 @@ jobs:
             https://api.github.com/repos/pytorch/benchmark/actions/workflows/57994037/dispatches \
             -d '{"ref": "main", "inputs": {"regression_date": "${{ env.TORCHBENCH_REGRESSION_DETECTED }}" } }'
       - name: Upload result to GH Actions Artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: TorchBench V3 result
           path: benchmark-output/

From e7f85d274abc47d47ec63670189809593f229052 Mon Sep 17 00:00:00 2001
From: "Colin L. Rice (Meta Employee)" <clr@fb.com>
Date: Thu, 9 Jan 2025 19:43:10 -0800
Subject: [PATCH 6/7] easy: sort dictionary keys for inductor config when
 publishing (#143307)

Summary:
This means we should get consistent logging strings for the same
config on different ranks

X-link: https://github.com/pytorch/pytorch/pull/143307
Approved by: https://github.com/xmfan

Reviewed By: clee2000

Differential Revision: D67985940

Pulled By: c00w

fbshipit-source-id: 0754fcc6d49b5768ffc34f12b1e693c65624cda8
---
 userbenchmark/dynamo/dynamobench/_dynamo/utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/userbenchmark/dynamo/dynamobench/_dynamo/utils.py b/userbenchmark/dynamo/dynamobench/_dynamo/utils.py
index 5cbba76a9a..d5c2206392 100644
--- a/userbenchmark/dynamo/dynamobench/_dynamo/utils.py
+++ b/userbenchmark/dynamo/dynamobench/_dynamo/utils.py
@@ -1241,7 +1241,10 @@ def default(self, o):
                 del inductor_config_copy[key]
             # Stringify Inductor config
             inductor_conf_str = json.dumps(
-                inductor_config_copy, cls=TypeSafeSerializer, skipkeys=True
+                inductor_config_copy,
+                cls=TypeSafeSerializer,
+                skipkeys=True,
+                sort_keys=True,
             )
         except Exception:
             # Don't crash because of runtime logging errors

From 9b7cac9decaf16383d13a5dd0d28d38029fda5d2 Mon Sep 17 00:00:00 2001
From: "bobrenjc93 (Meta Employee)" <bobren@meta.com>
Date: Fri, 10 Jan 2025 21:21:50 -0800
Subject: [PATCH 7/7] Migrate from Tuple -> tuple in torch/_dynamo (#144261)

Summary:
X-link: https://github.com/pytorch/pytorch/pull/144261
Approved by: https://github.com/aorenste, https://github.com/zou3519

Reviewed By: clee2000

Differential Revision: D68023204

Pulled By: bobrenjc93

fbshipit-source-id: 2f8a435a35083bc00bc6edb2208223364a3c47a6
---
 .../dynamo/dynamobench/_dynamo/testing.py     |  3 +--
 .../dynamo/dynamobench/_dynamo/utils.py       | 21 +++++++++----------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/userbenchmark/dynamo/dynamobench/_dynamo/testing.py b/userbenchmark/dynamo/dynamobench/_dynamo/testing.py
index 3f5dd0255b..04aa008446 100644
--- a/userbenchmark/dynamo/dynamobench/_dynamo/testing.py
+++ b/userbenchmark/dynamo/dynamobench/_dynamo/testing.py
@@ -16,7 +16,6 @@
     Optional,
     overload,
     Sequence,
-    Tuple,
     TypeVar,
     Union,
 )
@@ -141,7 +140,7 @@ def reduce_to_scalar_loss(out: torch.Tensor) -> torch.Tensor:
 
 @overload
 def reduce_to_scalar_loss(
-    out: Union[List[Any], Tuple[Any, ...], Dict[Any, Any]]
+    out: Union[List[Any], tuple[Any, ...], Dict[Any, Any]]
 ) -> float:
     ...
 
diff --git a/userbenchmark/dynamo/dynamobench/_dynamo/utils.py b/userbenchmark/dynamo/dynamobench/_dynamo/utils.py
index d5c2206392..bac180e137 100644
--- a/userbenchmark/dynamo/dynamobench/_dynamo/utils.py
+++ b/userbenchmark/dynamo/dynamobench/_dynamo/utils.py
@@ -54,7 +54,6 @@
     Optional,
     overload,
     Set,
-    Tuple,
     Type,
     TypeVar,
     Union,
@@ -106,7 +105,7 @@
 
     # NOTE: Make sure `NP_SUPPORTED_MODULES` and `NP_TO_TNP_MODULE` are in sync.
     if np:
-        NP_SUPPORTED_MODULES: Tuple[types.ModuleType, ...] = (
+        NP_SUPPORTED_MODULES: tuple[types.ModuleType, ...] = (
             np,
             np.fft,
             np.linalg,
@@ -202,8 +201,8 @@ def log(cls):
 
 
 def tabulate(
-    rows: Union[List[Tuple[str, object]], List[List[object]]],
-    headers: Union[Tuple[str, ...], List[str]],
+    rows: Union[List[tuple[str, object]], List[List[object]]],
+    headers: Union[tuple[str, ...], List[str]],
 ) -> str:
     try:
         import tabulate
@@ -590,7 +589,7 @@ def compile_times(repr: Literal["str"], aggregate: bool = False) -> str:
 @overload
 def compile_times(
     repr: Literal["csv"], aggregate: bool = False
-) -> Tuple[List[str], List[object]]:
+) -> tuple[List[str], List[object]]:
     ...
 
 
@@ -658,7 +657,7 @@ def __init__(self, maxsize: int = 4096) -> None:
     def reset(self):
         self.set = OrderedDict()
 
-    def add(self, key: Union[str, Tuple[object, object]]) -> bool:
+    def add(self, key: Union[str, tuple[object, object]]) -> bool:
         if key in self.set:
             self.set.move_to_end(key, last=True)
             if not config.verbose:
@@ -797,7 +796,7 @@ def istype(obj: object, allowed_types: Type[T]) -> TypeIs[T]:
 
 @overload
 def istype(
-    obj: object, allowed_types: Tuple[Type[List[T]], Type[Tuple[T, ...]]]
+    obj: object, allowed_types: tuple[Type[List[T]], Type[tuple[T, ...]]]
 ) -> TypeIs[T]:
     ...
 
@@ -940,7 +939,7 @@ def is_numpy_ndarray(value):
 
 def istensor(obj):
     """Check of obj is a tensor"""
-    tensor_list: Tuple[type, ...] = (
+    tensor_list: tuple[type, ...] = (
         torch.Tensor,
         torch.nn.Parameter,
         *config.traceable_tensor_subclasses,
@@ -1900,7 +1899,7 @@ def is_namedtuple_cls(cls):
 
 
 @functools.lru_cache(1)
-def namedtuple_fields(cls) -> Tuple[str, ...]:
+def namedtuple_fields(cls) -> tuple[str, ...]:
     """Get the fields of a namedtuple or a torch.return_types.* quasi-namedtuple"""
     if cls is slice:
         return ("start", "stop", "step")
@@ -2188,7 +2187,7 @@ def tuple_iterator_getitem(it, index):
 iter_next = next
 
 
-def normalize_range_iter(range_iter) -> Tuple[int, int, int]:
+def normalize_range_iter(range_iter) -> tuple[int, int, int]:
     _, (range_obj,), maybe_idx = range_iter.__reduce__()
     # In 3.12+, `maybe_idx` could be None, and `range_obj.start` would've been
     # already incremented by the current index.
@@ -3070,7 +3069,7 @@ def tensor_always_has_static_shape(
     tensor: Union[torch.Tensor, Any],
     is_tensor: bool,
     tensor_source: Source,
-) -> Tuple[bool, Optional[TensorStaticReason]]:
+) -> tuple[bool, Optional[TensorStaticReason]]:
     """
     Given a tensor, source, and is_tensor flag, determine if a shape should be static.