Add device auto-discovery and cli option (#1787)

Signed-off-by: U. Artie Eoff <[email protected]>
huggingface · Feb 26, 2025 · 6512975 · 6512975
1 parent b936a89
commit 6512975
Show file tree

Hide file tree

Showing 21 changed files with 104 additions and 64 deletions.
diff --git a/conftest.py b/conftest.py
@@ -1,9 +1,12 @@
 import json
 import logging
+import os
 from pathlib import Path
 
 import pytest
 
+import tests.utils as oh_testutils
+
 
 BASELINE_DIRECTORY = Path(__file__).parent.resolve() / Path("tests") / Path("baselines") / Path("fixture")
 
@@ -89,6 +92,7 @@ def __str___(self):
 def pytest_addoption(parser):
     parser.addoption("--token", action="store", default=None)
     parser.addoption("--rebase", action="store_true", help="rebase baseline references from current run")
+    parser.addoption("--device", action="store", default=None)
 
 
 @pytest.fixture
@@ -99,6 +103,32 @@ def token(request):
 def pytest_sessionstart(session):
     session.stash["baseline"] = Baseline(session)
 
+    # User command-line option takes highest priority
+    if session.config.option.device is not None:
+        device = str(session.config.option.device).lower()
+    # User GAUDI2_CI environment variable takes second priority for backwards compatibility
+    elif "GAUDI2_CI" in os.environ:
+        device = "gaudi2" if os.environ["GAUDI2_CI"] == "1" else "gaudi1"
+    # Try to automatically detect it
+    else:
+        import habana_frameworks.torch.hpu as torch_hpu
+
+        name = torch_hpu.get_device_name().strip()
+        if not name:
+            raise RuntimeError("Expected a Gaudi device but did not detect one.")
+        device = name.split()[-1].lower()
+
+    # torch_hpu.get_device_name() returns GAUDI for G1
+    if "gaudi" == device:
+        # use "gaudi1" since this is used in tests, baselines, etc.
+        device = "gaudi1"
+
+    oh_testutils.OH_DEVICE_CONTEXT = device
+
+
+def pytest_report_header():
+    return [f"device context: {oh_testutils.OH_DEVICE_CONTEXT}"]
+
 
 def pytest_sessionfinish(session):
     session.stash["baseline"].finalize()

diff --git a/tests/test_bnb_inference.py b/tests/test_bnb_inference.py
@@ -14,18 +14,19 @@
 # limitations under the License.
 
 import copy
-import os
 
 import torch
 from habana_frameworks.torch.hpu import wrap_in_hpu_graph
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 
 from optimum.habana.transformers import modeling_utils
 
+from .utils import OH_DEVICE_CONTEXT
+
 
 modeling_utils.adapt_transformers_to_gaudi()
 
-assert os.environ.get("GAUDI2_CI", "0") == "1", "Execution does not support on Gaudi1"
+assert OH_DEVICE_CONTEXT != "gaudi1", "Execution does not support on Gaudi1"
 
 MODEL_ID = "meta-llama/Llama-3.2-1B"
 

diff --git a/tests/test_bnb_qlora.py b/tests/test_bnb_qlora.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import subprocess
 
 import pytest
@@ -24,10 +23,12 @@
 from optimum.habana import GaudiConfig, GaudiTrainer, GaudiTrainingArguments
 from optimum.habana.transformers import modeling_utils
 
+from .utils import OH_DEVICE_CONTEXT
+
 
 modeling_utils.adapt_transformers_to_gaudi()
 
-assert os.environ.get("GAUDI2_CI", "0") == "1", "Execution does not support on Gaudi1"
+assert OH_DEVICE_CONTEXT != "gaudi1", "Execution does not support on Gaudi1"
 try:
     import sys
 

diff --git a/tests/test_custom_file_input.py b/tests/test_custom_file_input.py
@@ -8,10 +8,12 @@
 import pytest
 from transformers.testing_utils import slow
 
+from .utils import OH_DEVICE_CONTEXT
+
 
 PATH_TO_RESOURCES = Path(__file__).resolve().parent.parent / "tests/resource"
 
-if os.environ.get("GAUDI2_CI", "0") == "1":
+if OH_DEVICE_CONTEXT in ["gaudi2"]:
     MODEL_FILE_OPTIONS_TO_TEST = {
         "bf16": [
             (

diff --git a/tests/test_diffusers.py b/tests/test_diffusers.py
@@ -125,12 +125,12 @@
 from optimum.habana.utils import set_seed
 
 from .clip_coco_utils import calculate_clip_score, download_files
+from .utils import OH_DEVICE_CONTEXT
 
 
-IS_GAUDI2 = os.environ.get("GAUDI2_CI", "0") == "1"
+IS_GAUDI1 = bool("gaudi1" == OH_DEVICE_CONTEXT)
 
-
-if IS_GAUDI2:
+if OH_DEVICE_CONTEXT in ["gaudi2"]:
     THROUGHPUT_BASELINE_BF16 = 1.086
     THROUGHPUT_BASELINE_AUTOCAST = 0.394
     TEXTUAL_INVERSION_THROUGHPUT = 131.7606336456344
@@ -1695,7 +1695,7 @@ def test_fused_qkv_projections(self):
 
     @slow
     @check_gated_model_access("stabilityai/stable-diffusion-3-medium-diffusers")
-    @pytest.mark.skipif(not IS_GAUDI2, reason="does not fit into Gaudi1 memory")
+    @pytest.mark.skipif(IS_GAUDI1, reason="does not fit into Gaudi1 memory")
     def test_sd3_inference(self):
         repo_id = "stabilityai/stable-diffusion-3-medium-diffusers"
 
@@ -5985,7 +5985,7 @@ def test_flux_prompt_embeds(self):
         assert max_diff < 1e-4
 
     @slow
-    @pytest.mark.skipif(not IS_GAUDI2, reason="does not fit into Gaudi1 memory")
+    @pytest.mark.skipif(IS_GAUDI1, reason="does not fit into Gaudi1 memory")
     def test_flux_inference(self):
         prompts = [
             "A cat holding a sign that says hello world",
@@ -6154,7 +6154,7 @@ def test_flux_prompt_embeds(self):
 
     @slow
     @check_gated_model_access("black-forest-labs/FLUX.1-dev")
-    @pytest.mark.skipif(not IS_GAUDI2, reason="does not fit into Gaudi1 memory")
+    @pytest.mark.skipif(IS_GAUDI1, reason="does not fit into Gaudi1 memory")
     def test_flux_img2img_inference(self):
         repo_id = "black-forest-labs/FLUX.1-dev"
         image_path = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png"

diff --git a/tests/test_encoder_decoder.py b/tests/test_encoder_decoder.py
@@ -1,5 +1,4 @@
 import json
-import os
 import re
 import subprocess
 from pathlib import Path
@@ -9,6 +8,7 @@
 import pytest
 
 from .test_examples import ACCURACY_PERF_FACTOR, TIME_PERF_FACTOR
+from .utils import OH_DEVICE_CONTEXT
 
 
 MODELS_TO_TEST = {
@@ -88,12 +88,10 @@ def _run_test(
             with open(Path(tmp_dir) / "predict_results.json") as fp:
                 results = json.load(fp)
 
-        device = "gaudi2" if os.environ.get("GAUDI2_CI", "0") == "1" else "gaudi1"
-
         # Ensure performance requirements (throughput) are met
         self.baseline.assertRef(
             compare=lambda actual, ref: actual >= (2 - TIME_PERF_FACTOR) * ref,
-            context=[device],
+            context=[OH_DEVICE_CONTEXT],
             predict_samples_per_second=results["predict_samples_per_second"],
         )
 
@@ -103,7 +101,7 @@ def _run_test(
             accuracy_metric = "predict_bleu"
         self.baseline.assertRef(
             compare=lambda actual, ref: actual >= ACCURACY_PERF_FACTOR * ref,
-            context=[device],
+            context=[OH_DEVICE_CONTEXT],
             **{accuracy_metric: results[accuracy_metric]},
         )
 

diff --git a/tests/test_examples.py b/tests/test_examples.py
@@ -50,6 +50,7 @@
     MODELS_TO_TEST_FOR_SEQUENCE_CLASSIFICATION,
     MODELS_TO_TEST_FOR_SPEECH_RECOGNITION,
     MODELS_TO_TEST_MAPPING,
+    OH_DEVICE_CONTEXT,
 )
 
 
@@ -60,7 +61,7 @@
 TIME_PERF_FACTOR = 1.05
 
 
-IS_GAUDI2 = os.environ.get("GAUDI2_CI", "0") == "1"
+IS_GAUDI2 = bool("gaudi2" == OH_DEVICE_CONTEXT)
 
 
 def _get_supported_models_for_script(
@@ -439,7 +440,7 @@ def test(self):
                     # Assess accuracy
                     with open(Path(tmp_dir) / "accuracy_metrics.json") as fp:
                         results = json.load(fp)
-                        baseline = 0.43 if os.environ.get("GAUDI2_CI", "0") == "1" else 0.42
+                        baseline = 0.43 if IS_GAUDI2 else 0.42
                         self.assertGreaterEqual(results["accuracy"], baseline)
                 return
             elif self.EXAMPLE_NAME == "run_clip":

diff --git a/tests/test_feature_extraction.py b/tests/test_feature_extraction.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import time
 from unittest import TestCase
 
@@ -25,10 +24,12 @@
 
 from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
 
+from .utils import OH_DEVICE_CONTEXT
+
 
 adapt_transformers_to_gaudi()
 
-if os.environ.get("GAUDI2_CI", "0") == "1":
+if OH_DEVICE_CONTEXT in ["gaudi2"]:
     # Gaudi2 CI baselines
     LATENCY_GTE_SMALL_BF16_GRAPH_BASELINE = 0.6812
 else:

diff --git a/tests/test_fp8_examples.py b/tests/test_fp8_examples.py
@@ -1,5 +1,4 @@
 import json
-import os
 import re
 import subprocess
 from pathlib import Path
@@ -8,9 +7,10 @@
 import pytest
 
 from .test_examples import ACCURACY_PERF_FACTOR, TIME_PERF_FACTOR
+from .utils import OH_DEVICE_CONTEXT
 
 
-if os.environ.get("GAUDI2_CI", "0") == "1":
+if OH_DEVICE_CONTEXT in ["gaudi2"]:
     # Gaudi2 CI baselines
     MODELS_TO_TEST = {
         "fp8": [
@@ -109,17 +109,15 @@ def _test_fp8_train(
         with open(Path(tmp_dir) / "all_results.json") as fp:
             results = json.load(fp)
 
-        device = "gaudi2" if os.environ.get("GAUDI2_CI", "0") == "1" else "gaudi1"
-
         # Ensure performance requirements (throughput) are met
         baseline.assertRef(
             compare=lambda actual, ref: actual >= (2 - TIME_PERF_FACTOR) * ref,
-            context=[device],
+            context=[OH_DEVICE_CONTEXT],
             train_samples_per_second=results["train_samples_per_second"],
         )
         baseline.assertRef(
             compare=lambda actual, ref: actual >= ACCURACY_PERF_FACTOR * ref,
-            context=[device],
+            context=[OH_DEVICE_CONTEXT],
             eval_accuracy=results["eval_accuracy"],
         )
 

diff --git a/tests/test_fsdp_examples.py b/tests/test_fsdp_examples.py
@@ -8,9 +8,10 @@
 import pytest
 
 from .test_examples import ACCURACY_PERF_FACTOR, TIME_PERF_FACTOR
+from .utils import OH_DEVICE_CONTEXT
 
 
-if os.environ.get("GAUDI2_CI", "0") == "1":
+if OH_DEVICE_CONTEXT in ["gaudi2"]:
     # Gaudi2 CI baselines
     MODELS_TO_TEST = {
         "bf16": [
@@ -145,24 +146,22 @@ def _test_fsdp(
         with open(Path(tmp_dir) / "all_results.json") as fp:
             results = json.load(fp)
 
-        device = "gaudi2" if os.environ.get("GAUDI2_CI", "0") == "1" else "gaudi1"
-
         # Ensure performance requirements (throughput) are met
         baseline.assertRef(
             compare=lambda actual, ref: actual >= (2 - TIME_PERF_FACTOR) * ref,
-            context=[device],
+            context=[OH_DEVICE_CONTEXT],
             train_samples_per_second=results["train_samples_per_second"],
         )
         if model_name == "bert-base-uncased":
             baseline.assertRef(
                 compare=lambda actual, ref: actual >= ACCURACY_PERF_FACTOR * ref,
-                context=[device],
+                context=[OH_DEVICE_CONTEXT],
                 eval_f1=results["eval_f1"],
             )
         else:
             baseline.assertRef(
                 compare=lambda actual, ref: actual <= (2 - ACCURACY_PERF_FACTOR) * ref,
-                context=[device],
+                context=[OH_DEVICE_CONTEXT],
                 train_loss=results["train_loss"],
             )
 

diff --git a/tests/test_functional_text_generation_example.py b/tests/test_functional_text_generation_example.py
@@ -9,8 +9,10 @@
 
 from optimum.habana.utils import set_seed
 
+from .utils import OH_DEVICE_CONTEXT
 
-if os.environ.get("GAUDI2_CI", "0") == "1":
+
+if OH_DEVICE_CONTEXT in ["gaudi2"]:
     MODEL_OUTPUTS = {
         "bigcode/starcoder": 'def print_hello_world():\n    print("Hello World")\n\ndef print_hello_world_twice():\n    print_hello_world()\n    print_hello_world()\n\ndef print_hello_world_thrice():\n    print_hello_world()\n    print_hello_world()\n    print_hello_world()\n\ndef print_hello_world_four_times():\n    print_hello_world()\n    print_hello_world()\n    print_hello_world()\n   ',
         "bigcode/starcoder2-3b": 'def print_hello_world():\n    print("Hello World")\n\ndef print_hello_world_with_name(name):\n    print("Hello World, " + name)\n\ndef print_hello_world_with_name_and_age(name, age):\n    print("Hello World, " + name + ", " + str(age))\n\ndef print_hello_world_with_name_and_age_and_gender(name, age, gender):\n    print("Hello',

diff --git a/tests/test_image_to_text_example.py b/tests/test_image_to_text_example.py
@@ -8,9 +8,10 @@
 import pytest
 
 from .test_examples import TIME_PERF_FACTOR
+from .utils import OH_DEVICE_CONTEXT
 
 
-if os.environ.get("GAUDI2_CI", "0") == "1":
+if OH_DEVICE_CONTEXT in ["gaudi2"]:
     # Gaudi2 CI baselines
     MODELS_TO_TEST = {
         "bf16": [
@@ -119,12 +120,10 @@ def _test_image_to_text(
         with open(Path(tmp_dir) / "results.json") as fp:
             results = json.load(fp)
 
-        device = "gaudi2" if os.environ.get("GAUDI2_CI", "0") == "1" else "gaudi1"
-
         # Ensure performance requirements (throughput) are met
         baseline.assertRef(
             compare=lambda actual, ref: actual >= (2 - TIME_PERF_FACTOR) * ref,
-            context=[device],
+            context=[OH_DEVICE_CONTEXT],
             throughput=results["throughput"],
         )
 

diff --git a/tests/test_object_detection.py b/tests/test_object_detection.py
@@ -27,11 +27,12 @@
 from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
 
 from .test_examples import TIME_PERF_FACTOR
+from .utils import OH_DEVICE_CONTEXT
 
 
 adapt_transformers_to_gaudi()
 
-if os.environ.get("GAUDI2_CI", "0") == "1":
+if OH_DEVICE_CONTEXT in ["gaudi2"]:
     # Gaudi2 CI baselines
     LATENCY_DETR_BF16_GRAPH_BASELINE = 7.0
 else:

diff --git a/tests/test_openclip_vqa.py b/tests/test_openclip_vqa.py
@@ -8,6 +8,7 @@
 import pytest
 
 from .test_examples import TIME_PERF_FACTOR
+from .utils import OH_DEVICE_CONTEXT
 
 
 MODELS_TO_TEST = {
@@ -62,12 +63,10 @@ def _test_openclip_vqa(model_name: str, baseline):
         with open(Path(tmp_dir) / "results.json") as fp:
             results = json.load(fp)
 
-        device = "gaudi2" if os.environ.get("GAUDI2_CI", "0") == "1" else "gaudi1"
-
         # Ensure performance requirements (throughput) are met
         baseline.assertRef(
             compare=lambda actual, ref: actual >= (2 - TIME_PERF_FACTOR) * ref,
-            context=[device],
+            context=[OH_DEVICE_CONTEXT],
             throughput=results["throughput"],
         )