Enable Pytorch 2.6 (#8309)

Partially addresses #8303. ### Description This changes the maximum Numpy version to be below 3.0 for testing with 2.x compatibility. This appears to be resolved with newer versions of dependencies. This will also include fixes for Pytorch 2.6 mostly relating to `torch.load` and `autocast` usage. ### Types of changes  - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [ ] Breaking change (fix or new feature that would cause existing functionality to change). - [ ] New tests added to cover the changes. - [ ] Integration tests passed locally by running `./runtests.sh -f -u --net --coverage`. - [ ] Quick tests passed locally by running `./runtests.sh --quick --unittests --disttests`. - [ ] In-line docstrings updated. - [ ] Documentation updated, tested `make html` command in the `docs/` folder. --------- Signed-off-by: Eric Kerfoot <[email protected]> Signed-off-by: Eric Kerfoot <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Project-MONAI · Mar 8, 2025 · 7c26e5a · 7c26e5a
1 parent 1983f27
commit 7c26e5a
Show file tree

Hide file tree

Showing 49 changed files with 184 additions and 178 deletions.
diff --git a/monai/apps/deepedit/interaction.py b/monai/apps/deepedit/interaction.py
@@ -72,7 +72,7 @@ def __call__(self, engine: SupervisedTrainer | SupervisedEvaluator, batchdata: d
 
                 with torch.no_grad():
                     if engine.amp:
-                        with torch.cuda.amp.autocast():
+                        with torch.autocast("cuda"):
                             predictions = engine.inferer(inputs, engine.network)
                     else:
                         predictions = engine.inferer(inputs, engine.network)

diff --git a/monai/apps/deepgrow/interaction.py b/monai/apps/deepgrow/interaction.py
@@ -67,7 +67,7 @@ def __call__(self, engine: SupervisedTrainer | SupervisedEvaluator, batchdata: d
             engine.network.eval()
             with torch.no_grad():
                 if engine.amp:
-                    with torch.cuda.amp.autocast():
+                    with torch.autocast("cuda"):
                         predictions = engine.inferer(inputs, engine.network)
                 else:
                     predictions = engine.inferer(inputs, engine.network)

diff --git a/monai/apps/detection/networks/retinanet_detector.py b/monai/apps/detection/networks/retinanet_detector.py
@@ -180,7 +180,7 @@ def forward(self, images: torch.Tensor):
                 nesterov=True,
             )
             torch.save(detector.network.state_dict(), 'model.pt')  # save model
-            detector.network.load_state_dict(torch.load('model.pt'))  # load model
+            detector.network.load_state_dict(torch.load('model.pt', weights_only=True))  # load model
     """
 
     def __init__(

diff --git a/monai/apps/detection/networks/retinanet_network.py b/monai/apps/detection/networks/retinanet_network.py
@@ -88,8 +88,8 @@ def __init__(
 
         for layer in self.conv.children():
             if isinstance(layer, conv_type):  # type: ignore
-                torch.nn.init.normal_(layer.weight, std=0.01)
-                torch.nn.init.constant_(layer.bias, 0)
+                torch.nn.init.normal_(layer.weight, std=0.01)  # type: ignore[arg-type]
+                torch.nn.init.constant_(layer.bias, 0)  # type: ignore[arg-type]
 
         self.cls_logits = conv_type(in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1)
         torch.nn.init.normal_(self.cls_logits.weight, std=0.01)
@@ -167,8 +167,8 @@ def __init__(self, in_channels: int, num_anchors: int, spatial_dims: int):
 
         for layer in self.conv.children():
             if isinstance(layer, conv_type):  # type: ignore
-                torch.nn.init.normal_(layer.weight, std=0.01)
-                torch.nn.init.zeros_(layer.bias)
+                torch.nn.init.normal_(layer.weight, std=0.01)  # type: ignore[arg-type]
+                torch.nn.init.zeros_(layer.bias)  # type: ignore[arg-type]
 
     def forward(self, x: list[Tensor]) -> list[Tensor]:
         """
@@ -297,7 +297,7 @@ def __init__(
             )
         self.feature_extractor = feature_extractor
 
-        self.feature_map_channels: int = self.feature_extractor.out_channels
+        self.feature_map_channels: int = self.feature_extractor.out_channels  # type: ignore[assignment]
         self.num_anchors = num_anchors
         self.classification_head = RetinaNetClassificationHead(
             self.feature_map_channels, self.num_anchors, self.num_classes, spatial_dims=self.spatial_dims

diff --git a/monai/apps/detection/utils/box_coder.py b/monai/apps/detection/utils/box_coder.py
@@ -221,15 +221,15 @@ def decode_single(self, rel_codes: Tensor, reference_boxes: Tensor) -> Tensor:
 
             pred_ctr_xyx_axis = dxyz_axis * whd_axis[:, None] + ctr_xyz_axis[:, None]
             pred_whd_axis = torch.exp(dwhd_axis) * whd_axis[:, None]
-            pred_whd_axis = pred_whd_axis.to(dxyz_axis.dtype)
+            pred_whd_axis = pred_whd_axis.to(dxyz_axis.dtype)  # type: ignore[union-attr]
 
             # When convert float32 to float16, Inf or Nan may occur
             if torch.isnan(pred_whd_axis).any() or torch.isinf(pred_whd_axis).any():
                 raise ValueError("pred_whd_axis is NaN or Inf.")
 
             # Distance from center to box's corner.
             c_to_c_whd_axis = (
-                torch.tensor(0.5, dtype=pred_ctr_xyx_axis.dtype, device=pred_whd_axis.device) * pred_whd_axis
+                torch.tensor(0.5, dtype=pred_ctr_xyx_axis.dtype, device=pred_whd_axis.device) * pred_whd_axis  # type: ignore[arg-type]
             )
 
             pred_boxes.append(pred_ctr_xyx_axis - c_to_c_whd_axis)

diff --git a/monai/apps/mmars/mmars.py b/monai/apps/mmars/mmars.py
@@ -241,7 +241,7 @@ def load_from_mmar(
         return torch.jit.load(_model_file, map_location=map_location)
 
     # loading with `torch.load`
-    model_dict = torch.load(_model_file, map_location=map_location)
+    model_dict = torch.load(_model_file, map_location=map_location, weights_only=True)
     if weights_only:
         return model_dict.get(model_key, model_dict)  # model_dict[model_key] or model_dict directly
 

diff --git a/monai/apps/reconstruction/networks/blocks/varnetblock.py b/monai/apps/reconstruction/networks/blocks/varnetblock.py
@@ -55,7 +55,7 @@ def soft_dc(self, x: Tensor, ref_kspace: Tensor, mask: Tensor) -> Tensor:
         Returns:
             Output of DC block with the same shape as x
         """
-        return torch.where(mask, x - ref_kspace, self.zeros) * self.dc_weight
+        return torch.where(mask, x - ref_kspace, self.zeros) * self.dc_weight  # type: ignore
 
     def forward(self, current_kspace: Tensor, ref_kspace: Tensor, mask: Tensor, sens_maps: Tensor) -> Tensor:
         """

diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py
@@ -760,7 +760,7 @@ def load(
     if load_ts_module is True:
         return load_net_with_metadata(full_path, map_location=torch.device(device), more_extra_files=config_files)
     # loading with `torch.load`
-    model_dict = torch.load(full_path, map_location=torch.device(device))
+    model_dict = torch.load(full_path, map_location=torch.device(device), weights_only=True)
 
     if not isinstance(model_dict, Mapping):
         warnings.warn(f"the state dictionary from {full_path} should be a dictionary but got {type(model_dict)}.")
@@ -1279,9 +1279,8 @@ def verify_net_in_out(
         if input_dtype == torch.float16:
             # fp16 can only be executed in gpu mode
             net.to("cuda")
-            from torch.cuda.amp import autocast
 
-            with autocast():
+            with torch.autocast("cuda"):
                 output = net(test_data.cuda(), **extra_forward_args_)
             net.to(device_)
         else:
@@ -1330,7 +1329,7 @@ def _export(
         # here we use ignite Checkpoint to support nested weights and be compatible with MONAI CheckpointSaver
         Checkpoint.load_objects(to_load={key_in_ckpt: net}, checkpoint=ckpt_file)
     else:
-        ckpt = torch.load(ckpt_file)
+        ckpt = torch.load(ckpt_file, weights_only=True)
         copy_model_state(dst=net, src=ckpt if key_in_ckpt == "" else ckpt[key_in_ckpt])
 
     # Use the given converter to convert a model and save with metadata, config content

diff --git a/monai/data/dataset.py b/monai/data/dataset.py
@@ -22,7 +22,6 @@
 import warnings
 from collections.abc import Callable, Sequence
 from copy import copy, deepcopy
-from inspect import signature
 from multiprocessing.managers import ListProxy
 from multiprocessing.pool import ThreadPool
 from pathlib import Path
@@ -372,10 +371,7 @@ def _cachecheck(self, item_transformed):
 
         if hashfile is not None and hashfile.is_file():  # cache hit
             try:
-                if "weights_only" in signature(torch.load).parameters:
-                    return torch.load(hashfile, weights_only=False)
-                else:
-                    return torch.load(hashfile)
+                return torch.load(hashfile, weights_only=False)
             except PermissionError as e:
                 if sys.platform != "win32":
                     raise e
@@ -1674,7 +1670,4 @@ def _load_meta_cache(self, meta_hash_file_name):
         if meta_hash_file_name in self._meta_cache:
             return self._meta_cache[meta_hash_file_name]
         else:
-            if "weights_only" in signature(torch.load).parameters:
-                return torch.load(self.cache_dir / meta_hash_file_name, weights_only=False)
-            else:
-                return torch.load(self.cache_dir / meta_hash_file_name)
+            return torch.load(self.cache_dir / meta_hash_file_name, weights_only=False)
diff --git a/monai/data/utils.py b/monai/data/utils.py
@@ -753,7 +753,7 @@ def affine_to_spacing(affine: NdarrayTensor, r: int = 3, dtype=float, suppress_z
     if isinstance(_affine, torch.Tensor):
         spacing = torch.sqrt(torch.sum(_affine * _affine, dim=0))
     else:
-        spacing = np.sqrt(np.sum(_affine * _affine, axis=0))
+        spacing = np.sqrt(np.sum(_affine * _affine, axis=0))  # type: ignore[operator]
     if suppress_zeros:
         spacing[spacing == 0] = 1.0
     spacing_, *_ = convert_to_dst_type(spacing, dst=affine, dtype=dtype)

diff --git a/monai/data/video_dataset.py b/monai/data/video_dataset.py
@@ -177,7 +177,7 @@ def get_available_codecs() -> dict[str, str]:
                 for codec, ext in all_codecs.items():
                     writer = cv2.VideoWriter()
                     fname = os.path.join(tmp_dir, f"test{ext}")
-                    fourcc = cv2.VideoWriter_fourcc(*codec)
+                    fourcc = cv2.VideoWriter_fourcc(*codec)  # type: ignore[attr-defined]
                     noviderr = writer.open(fname, fourcc, 1, (10, 10))
                     if noviderr:
                         codecs[codec] = ext

diff --git a/monai/engines/evaluator.py b/monai/engines/evaluator.py
@@ -82,8 +82,8 @@ class Evaluator(Workflow):
             default to `True`.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.autocast("cuda")` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.autocast.
 
     """
 
@@ -214,8 +214,8 @@ class SupervisedEvaluator(Evaluator):
             default to `True`.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.autocast("cuda")` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.autocast.
         compile: whether to use `torch.compile`, default is False. If True, MetaTensor inputs will be converted to
             `torch.Tensor` before forward pass,  then converted back afterward with copied meta information.
         compile_kwargs: dict of the args for `torch.compile()` API, for more details:
@@ -324,7 +324,7 @@ def _iteration(self, engine: SupervisedEvaluator, batchdata: dict[str, torch.Ten
         # execute forward computation
         with engine.mode(engine.network):
             if engine.amp:
-                with torch.cuda.amp.autocast(**engine.amp_kwargs):
+                with torch.autocast("cuda", **engine.amp_kwargs):
                     engine.state.output[Keys.PRED] = engine.inferer(inputs, engine.network, *args, **kwargs)
             else:
                 engine.state.output[Keys.PRED] = engine.inferer(inputs, engine.network, *args, **kwargs)
@@ -394,8 +394,8 @@ class EnsembleEvaluator(Evaluator):
             default to `True`.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.autocast("cuda")` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.autocast.
 
     """
 
@@ -487,7 +487,7 @@ def _iteration(self, engine: EnsembleEvaluator, batchdata: dict[str, torch.Tenso
         for idx, network in enumerate(engine.networks):
             with engine.mode(network):
                 if engine.amp:
-                    with torch.cuda.amp.autocast(**engine.amp_kwargs):
+                    with torch.autocast("cuda", **engine.amp_kwargs):
                         if isinstance(engine.state.output, dict):
                             engine.state.output.update(
                                 {engine.pred_keys[idx]: engine.inferer(inputs, network, *args, **kwargs)}

diff --git a/monai/engines/trainer.py b/monai/engines/trainer.py
@@ -125,8 +125,8 @@ class SupervisedTrainer(Trainer):
             more details: https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.autocast("cuda")` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.autocast.
         compile: whether to use `torch.compile`, default is False. If True, MetaTensor inputs will be converted to
             `torch.Tensor` before forward pass,  then converted back afterward with copied meta information.
         compile_kwargs: dict of the args for `torch.compile()` API, for more details:
@@ -249,7 +249,7 @@ def _compute_pred_loss():
         engine.optimizer.zero_grad(set_to_none=engine.optim_set_to_none)
 
         if engine.amp and engine.scaler is not None:
-            with torch.cuda.amp.autocast(**engine.amp_kwargs):
+            with torch.autocast("cuda", **engine.amp_kwargs):
                 _compute_pred_loss()
             engine.scaler.scale(engine.state.output[Keys.LOSS]).backward()
             engine.fire_event(IterationEvents.BACKWARD_COMPLETED)
@@ -335,8 +335,8 @@ class GanTrainer(Trainer):
             more details: https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.autocast("cuda")` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.autocast.
 
     """
 
@@ -512,8 +512,8 @@ class AdversarialTrainer(Trainer):
             more details: https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.autocast("cuda")` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.autocast.
     """
 
     def __init__(
@@ -683,7 +683,7 @@ def _compute_generator_loss() -> None:
         engine.state.g_optimizer.zero_grad(set_to_none=engine.optim_set_to_none)
 
         if engine.amp and engine.state.g_scaler is not None:
-            with torch.cuda.amp.autocast(**engine.amp_kwargs):
+            with torch.autocast("cuda", **engine.amp_kwargs):
                 _compute_generator_loss()
 
             engine.state.output[Keys.LOSS] = (
@@ -731,7 +731,7 @@ def _compute_discriminator_loss() -> None:
         engine.state.d_network.zero_grad(set_to_none=engine.optim_set_to_none)
 
         if engine.amp and engine.state.d_scaler is not None:
-            with torch.cuda.amp.autocast(**engine.amp_kwargs):
+            with torch.autocast("cuda", **engine.amp_kwargs):
                 _compute_discriminator_loss()
 
             engine.state.d_scaler.scale(engine.state.output[AdversarialKeys.DISCRIMINATOR_LOSS]).backward()

diff --git a/monai/engines/utils.py b/monai/engines/utils.py
@@ -309,7 +309,7 @@ def __init__(self, scheduler: nn.Module, num_train_timesteps: int, condition_nam
         self.scheduler = scheduler
 
     def get_target(self, images, noise, timesteps):
-        return self.scheduler.get_velocity(images, noise, timesteps)
+        return self.scheduler.get_velocity(images, noise, timesteps)  # type: ignore[operator]
 
 
 def default_make_latent(

diff --git a/monai/engines/workflow.py b/monai/engines/workflow.py
@@ -90,8 +90,8 @@ class Workflow(Engine):
             default to `True`.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.autocast("cuda")` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.autocast.
 
     Raises:
         TypeError: When ``data_loader`` is not a ``torch.utils.data.DataLoader``.

diff --git a/monai/fl/client/monai_algo.py b/monai/fl/client/monai_algo.py
@@ -574,7 +574,7 @@ def get_weights(self, extra=None):
                 model_path = os.path.join(self.bundle_root, cast(str, self.model_filepaths[model_type]))
                 if not os.path.isfile(model_path):
                     raise ValueError(f"No best model checkpoint exists at {model_path}")
-                weights = torch.load(model_path, map_location="cpu")
+                weights = torch.load(model_path, map_location="cpu", weights_only=True)
                 # if weights contain several state dicts, use the one defined by `save_dict_key`
                 if isinstance(weights, dict) and self.save_dict_key in weights:
                     weights = weights.get(self.save_dict_key)

diff --git a/monai/handlers/checkpoint_loader.py b/monai/handlers/checkpoint_loader.py
@@ -122,7 +122,7 @@ def __call__(self, engine: Engine) -> None:
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
         """
-        checkpoint = torch.load(self.load_path, map_location=self.map_location)
+        checkpoint = torch.load(self.load_path, map_location=self.map_location, weights_only=False)
 
         k, _ = list(self.load_dict.items())[0]
         # single object and checkpoint is directly a state_dict