diff --git a/monai/apps/deepedit/interaction.py b/monai/apps/deepedit/interaction.py
index 07302575c6..33e50700ca 100644
--- a/monai/apps/deepedit/interaction.py
+++ b/monai/apps/deepedit/interaction.py
@@ -72,7 +72,7 @@ def __call__(self, engine: SupervisedTrainer | SupervisedEvaluator, batchdata: d
 
                 with torch.no_grad():
                     if engine.amp:
-                        with torch.cuda.amp.autocast():
+                        with torch.autocast("cuda"):
                             predictions = engine.inferer(inputs, engine.network)
                     else:
                         predictions = engine.inferer(inputs, engine.network)
diff --git a/monai/apps/deepgrow/interaction.py b/monai/apps/deepgrow/interaction.py
index fa3a28bfef..287f2d607c 100644
--- a/monai/apps/deepgrow/interaction.py
+++ b/monai/apps/deepgrow/interaction.py
@@ -67,7 +67,7 @@ def __call__(self, engine: SupervisedTrainer | SupervisedEvaluator, batchdata: d
             engine.network.eval()
             with torch.no_grad():
                 if engine.amp:
-                    with torch.cuda.amp.autocast():
+                    with torch.autocast("cuda"):
                         predictions = engine.inferer(inputs, engine.network)
                 else:
                     predictions = engine.inferer(inputs, engine.network)
diff --git a/monai/apps/detection/networks/retinanet_detector.py b/monai/apps/detection/networks/retinanet_detector.py
index a0573d6cd1..e996ae81bc 100644
--- a/monai/apps/detection/networks/retinanet_detector.py
+++ b/monai/apps/detection/networks/retinanet_detector.py
@@ -180,7 +180,7 @@ def forward(self, images: torch.Tensor):
                 nesterov=True,
             )
             torch.save(detector.network.state_dict(), 'model.pt')  # save model
-            detector.network.load_state_dict(torch.load('model.pt'))  # load model
+            detector.network.load_state_dict(torch.load('model.pt', weights_only=True))  # load model
     """
 
     def __init__(
diff --git a/monai/apps/detection/networks/retinanet_network.py b/monai/apps/detection/networks/retinanet_network.py
index ca6a8f5c19..ead57d74c2 100644
--- a/monai/apps/detection/networks/retinanet_network.py
+++ b/monai/apps/detection/networks/retinanet_network.py
@@ -88,8 +88,8 @@ def __init__(
 
         for layer in self.conv.children():
             if isinstance(layer, conv_type):  # type: ignore
-                torch.nn.init.normal_(layer.weight, std=0.01)
-                torch.nn.init.constant_(layer.bias, 0)
+                torch.nn.init.normal_(layer.weight, std=0.01)  # type: ignore[arg-type]
+                torch.nn.init.constant_(layer.bias, 0)  # type: ignore[arg-type]
 
         self.cls_logits = conv_type(in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1)
         torch.nn.init.normal_(self.cls_logits.weight, std=0.01)
@@ -167,8 +167,8 @@ def __init__(self, in_channels: int, num_anchors: int, spatial_dims: int):
 
         for layer in self.conv.children():
             if isinstance(layer, conv_type):  # type: ignore
-                torch.nn.init.normal_(layer.weight, std=0.01)
-                torch.nn.init.zeros_(layer.bias)
+                torch.nn.init.normal_(layer.weight, std=0.01)  # type: ignore[arg-type]
+                torch.nn.init.zeros_(layer.bias)  # type: ignore[arg-type]
 
     def forward(self, x: list[Tensor]) -> list[Tensor]:
         """
@@ -297,7 +297,7 @@ def __init__(
             )
         self.feature_extractor = feature_extractor
 
-        self.feature_map_channels: int = self.feature_extractor.out_channels
+        self.feature_map_channels: int = self.feature_extractor.out_channels  # type: ignore[assignment]
         self.num_anchors = num_anchors
         self.classification_head = RetinaNetClassificationHead(
             self.feature_map_channels, self.num_anchors, self.num_classes, spatial_dims=self.spatial_dims
diff --git a/monai/apps/detection/utils/box_coder.py b/monai/apps/detection/utils/box_coder.py
index 504ae21d0f..d0f3adf71d 100644
--- a/monai/apps/detection/utils/box_coder.py
+++ b/monai/apps/detection/utils/box_coder.py
@@ -221,7 +221,7 @@ def decode_single(self, rel_codes: Tensor, reference_boxes: Tensor) -> Tensor:
 
             pred_ctr_xyx_axis = dxyz_axis * whd_axis[:, None] + ctr_xyz_axis[:, None]
             pred_whd_axis = torch.exp(dwhd_axis) * whd_axis[:, None]
-            pred_whd_axis = pred_whd_axis.to(dxyz_axis.dtype)
+            pred_whd_axis = pred_whd_axis.to(dxyz_axis.dtype)  # type: ignore[union-attr]
 
             # When convert float32 to float16, Inf or Nan may occur
             if torch.isnan(pred_whd_axis).any() or torch.isinf(pred_whd_axis).any():
@@ -229,7 +229,7 @@ def decode_single(self, rel_codes: Tensor, reference_boxes: Tensor) -> Tensor:
 
             # Distance from center to box's corner.
             c_to_c_whd_axis = (
-                torch.tensor(0.5, dtype=pred_ctr_xyx_axis.dtype, device=pred_whd_axis.device) * pred_whd_axis
+                torch.tensor(0.5, dtype=pred_ctr_xyx_axis.dtype, device=pred_whd_axis.device) * pred_whd_axis  # type: ignore[arg-type]
             )
 
             pred_boxes.append(pred_ctr_xyx_axis - c_to_c_whd_axis)
diff --git a/monai/apps/mmars/mmars.py b/monai/apps/mmars/mmars.py
index 31c88a17be..1fc0690cc9 100644
--- a/monai/apps/mmars/mmars.py
+++ b/monai/apps/mmars/mmars.py
@@ -241,7 +241,7 @@ def load_from_mmar(
         return torch.jit.load(_model_file, map_location=map_location)
 
     # loading with `torch.load`
-    model_dict = torch.load(_model_file, map_location=map_location)
+    model_dict = torch.load(_model_file, map_location=map_location, weights_only=True)
     if weights_only:
         return model_dict.get(model_key, model_dict)  # model_dict[model_key] or model_dict directly
 
diff --git a/monai/apps/reconstruction/networks/blocks/varnetblock.py b/monai/apps/reconstruction/networks/blocks/varnetblock.py
index 75dc7e15ce..289505a057 100644
--- a/monai/apps/reconstruction/networks/blocks/varnetblock.py
+++ b/monai/apps/reconstruction/networks/blocks/varnetblock.py
@@ -55,7 +55,7 @@ def soft_dc(self, x: Tensor, ref_kspace: Tensor, mask: Tensor) -> Tensor:
         Returns:
             Output of DC block with the same shape as x
         """
-        return torch.where(mask, x - ref_kspace, self.zeros) * self.dc_weight
+        return torch.where(mask, x - ref_kspace, self.zeros) * self.dc_weight  # type: ignore
 
     def forward(self, current_kspace: Tensor, ref_kspace: Tensor, mask: Tensor, sens_maps: Tensor) -> Tensor:
         """
diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py
index b43f7e0fa0..6f35179e96 100644
--- a/monai/bundle/scripts.py
+++ b/monai/bundle/scripts.py
@@ -760,7 +760,7 @@ def load(
     if load_ts_module is True:
         return load_net_with_metadata(full_path, map_location=torch.device(device), more_extra_files=config_files)
     # loading with `torch.load`
-    model_dict = torch.load(full_path, map_location=torch.device(device))
+    model_dict = torch.load(full_path, map_location=torch.device(device), weights_only=True)
 
     if not isinstance(model_dict, Mapping):
         warnings.warn(f"the state dictionary from {full_path} should be a dictionary but got {type(model_dict)}.")
@@ -1279,9 +1279,8 @@ def verify_net_in_out(
         if input_dtype == torch.float16:
             # fp16 can only be executed in gpu mode
             net.to("cuda")
-            from torch.cuda.amp import autocast
 
-            with autocast():
+            with torch.autocast("cuda"):
                 output = net(test_data.cuda(), **extra_forward_args_)
             net.to(device_)
         else:
@@ -1330,7 +1329,7 @@ def _export(
         # here we use ignite Checkpoint to support nested weights and be compatible with MONAI CheckpointSaver
         Checkpoint.load_objects(to_load={key_in_ckpt: net}, checkpoint=ckpt_file)
     else:
-        ckpt = torch.load(ckpt_file)
+        ckpt = torch.load(ckpt_file, weights_only=True)
         copy_model_state(dst=net, src=ckpt if key_in_ckpt == "" else ckpt[key_in_ckpt])
 
     # Use the given converter to convert a model and save with metadata, config content
diff --git a/monai/data/dataset.py b/monai/data/dataset.py
index 8c53338d66..691425994d 100644
--- a/monai/data/dataset.py
+++ b/monai/data/dataset.py
@@ -22,7 +22,6 @@
 import warnings
 from collections.abc import Callable, Sequence
 from copy import copy, deepcopy
-from inspect import signature
 from multiprocessing.managers import ListProxy
 from multiprocessing.pool import ThreadPool
 from pathlib import Path
@@ -372,10 +371,7 @@ def _cachecheck(self, item_transformed):
 
         if hashfile is not None and hashfile.is_file():  # cache hit
             try:
-                if "weights_only" in signature(torch.load).parameters:
-                    return torch.load(hashfile, weights_only=False)
-                else:
-                    return torch.load(hashfile)
+                return torch.load(hashfile, weights_only=False)
             except PermissionError as e:
                 if sys.platform != "win32":
                     raise e
@@ -1674,7 +1670,4 @@ def _load_meta_cache(self, meta_hash_file_name):
         if meta_hash_file_name in self._meta_cache:
             return self._meta_cache[meta_hash_file_name]
         else:
-            if "weights_only" in signature(torch.load).parameters:
-                return torch.load(self.cache_dir / meta_hash_file_name, weights_only=False)
-            else:
-                return torch.load(self.cache_dir / meta_hash_file_name)
+            return torch.load(self.cache_dir / meta_hash_file_name, weights_only=False)
diff --git a/monai/data/utils.py b/monai/data/utils.py
index d03dbd3234..988b813272 100644
--- a/monai/data/utils.py
+++ b/monai/data/utils.py
@@ -753,7 +753,7 @@ def affine_to_spacing(affine: NdarrayTensor, r: int = 3, dtype=float, suppress_z
     if isinstance(_affine, torch.Tensor):
         spacing = torch.sqrt(torch.sum(_affine * _affine, dim=0))
     else:
-        spacing = np.sqrt(np.sum(_affine * _affine, axis=0))
+        spacing = np.sqrt(np.sum(_affine * _affine, axis=0))  # type: ignore[operator]
     if suppress_zeros:
         spacing[spacing == 0] = 1.0
     spacing_, *_ = convert_to_dst_type(spacing, dst=affine, dtype=dtype)
diff --git a/monai/data/video_dataset.py b/monai/data/video_dataset.py
index 031e85db26..9ff23ebeff 100644
--- a/monai/data/video_dataset.py
+++ b/monai/data/video_dataset.py
@@ -177,7 +177,7 @@ def get_available_codecs() -> dict[str, str]:
                 for codec, ext in all_codecs.items():
                     writer = cv2.VideoWriter()
                     fname = os.path.join(tmp_dir, f"test{ext}")
-                    fourcc = cv2.VideoWriter_fourcc(*codec)
+                    fourcc = cv2.VideoWriter_fourcc(*codec)  # type: ignore[attr-defined]
                     noviderr = writer.open(fname, fourcc, 1, (10, 10))
                     if noviderr:
                         codecs[codec] = ext
diff --git a/monai/engines/evaluator.py b/monai/engines/evaluator.py
index 35d4928465..836b407ac5 100644
--- a/monai/engines/evaluator.py
+++ b/monai/engines/evaluator.py
@@ -82,8 +82,8 @@ class Evaluator(Workflow):
             default to `True`.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.autocast("cuda")` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.autocast.
 
     """
 
@@ -214,8 +214,8 @@ class SupervisedEvaluator(Evaluator):
             default to `True`.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.autocast("cuda")` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.autocast.
         compile: whether to use `torch.compile`, default is False. If True, MetaTensor inputs will be converted to
             `torch.Tensor` before forward pass,  then converted back afterward with copied meta information.
         compile_kwargs: dict of the args for `torch.compile()` API, for more details:
@@ -324,7 +324,7 @@ def _iteration(self, engine: SupervisedEvaluator, batchdata: dict[str, torch.Ten
         # execute forward computation
         with engine.mode(engine.network):
             if engine.amp:
-                with torch.cuda.amp.autocast(**engine.amp_kwargs):
+                with torch.autocast("cuda", **engine.amp_kwargs):
                     engine.state.output[Keys.PRED] = engine.inferer(inputs, engine.network, *args, **kwargs)
             else:
                 engine.state.output[Keys.PRED] = engine.inferer(inputs, engine.network, *args, **kwargs)
@@ -394,8 +394,8 @@ class EnsembleEvaluator(Evaluator):
             default to `True`.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.autocast("cuda")` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.autocast.
 
     """
 
@@ -487,7 +487,7 @@ def _iteration(self, engine: EnsembleEvaluator, batchdata: dict[str, torch.Tenso
         for idx, network in enumerate(engine.networks):
             with engine.mode(network):
                 if engine.amp:
-                    with torch.cuda.amp.autocast(**engine.amp_kwargs):
+                    with torch.autocast("cuda", **engine.amp_kwargs):
                         if isinstance(engine.state.output, dict):
                             engine.state.output.update(
                                 {engine.pred_keys[idx]: engine.inferer(inputs, network, *args, **kwargs)}
diff --git a/monai/engines/trainer.py b/monai/engines/trainer.py
index fdb45fbab8..b69a5015bb 100644
--- a/monai/engines/trainer.py
+++ b/monai/engines/trainer.py
@@ -125,8 +125,8 @@ class SupervisedTrainer(Trainer):
             more details: https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.autocast("cuda")` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.autocast.
         compile: whether to use `torch.compile`, default is False. If True, MetaTensor inputs will be converted to
             `torch.Tensor` before forward pass,  then converted back afterward with copied meta information.
         compile_kwargs: dict of the args for `torch.compile()` API, for more details:
@@ -249,7 +249,7 @@ def _compute_pred_loss():
         engine.optimizer.zero_grad(set_to_none=engine.optim_set_to_none)
 
         if engine.amp and engine.scaler is not None:
-            with torch.cuda.amp.autocast(**engine.amp_kwargs):
+            with torch.autocast("cuda", **engine.amp_kwargs):
                 _compute_pred_loss()
             engine.scaler.scale(engine.state.output[Keys.LOSS]).backward()
             engine.fire_event(IterationEvents.BACKWARD_COMPLETED)
@@ -335,8 +335,8 @@ class GanTrainer(Trainer):
             more details: https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.autocast("cuda")` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.autocast.
 
     """
 
@@ -512,8 +512,8 @@ class AdversarialTrainer(Trainer):
             more details: https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.autocast("cuda")` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.autocast.
     """
 
     def __init__(
@@ -683,7 +683,7 @@ def _compute_generator_loss() -> None:
         engine.state.g_optimizer.zero_grad(set_to_none=engine.optim_set_to_none)
 
         if engine.amp and engine.state.g_scaler is not None:
-            with torch.cuda.amp.autocast(**engine.amp_kwargs):
+            with torch.autocast("cuda", **engine.amp_kwargs):
                 _compute_generator_loss()
 
             engine.state.output[Keys.LOSS] = (
@@ -731,7 +731,7 @@ def _compute_discriminator_loss() -> None:
         engine.state.d_network.zero_grad(set_to_none=engine.optim_set_to_none)
 
         if engine.amp and engine.state.d_scaler is not None:
-            with torch.cuda.amp.autocast(**engine.amp_kwargs):
+            with torch.autocast("cuda", **engine.amp_kwargs):
                 _compute_discriminator_loss()
 
             engine.state.d_scaler.scale(engine.state.output[AdversarialKeys.DISCRIMINATOR_LOSS]).backward()
diff --git a/monai/engines/utils.py b/monai/engines/utils.py
index 8e19a18601..9095f8d943 100644
--- a/monai/engines/utils.py
+++ b/monai/engines/utils.py
@@ -309,7 +309,7 @@ def __init__(self, scheduler: nn.Module, num_train_timesteps: int, condition_nam
         self.scheduler = scheduler
 
     def get_target(self, images, noise, timesteps):
-        return self.scheduler.get_velocity(images, noise, timesteps)
+        return self.scheduler.get_velocity(images, noise, timesteps)  # type: ignore[operator]
 
 
 def default_make_latent(
diff --git a/monai/engines/workflow.py b/monai/engines/workflow.py
index 0c36da6d3d..ecb0c4a070 100644
--- a/monai/engines/workflow.py
+++ b/monai/engines/workflow.py
@@ -90,8 +90,8 @@ class Workflow(Engine):
             default to `True`.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.autocast("cuda")` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.autocast.
 
     Raises:
         TypeError: When ``data_loader`` is not a ``torch.utils.data.DataLoader``.
diff --git a/monai/fl/client/monai_algo.py b/monai/fl/client/monai_algo.py
index a3ac58c221..6e9a6fd1fe 100644
--- a/monai/fl/client/monai_algo.py
+++ b/monai/fl/client/monai_algo.py
@@ -574,7 +574,7 @@ def get_weights(self, extra=None):
                 model_path = os.path.join(self.bundle_root, cast(str, self.model_filepaths[model_type]))
                 if not os.path.isfile(model_path):
                     raise ValueError(f"No best model checkpoint exists at {model_path}")
-                weights = torch.load(model_path, map_location="cpu")
+                weights = torch.load(model_path, map_location="cpu", weights_only=True)
                 # if weights contain several state dicts, use the one defined by `save_dict_key`
                 if isinstance(weights, dict) and self.save_dict_key in weights:
                     weights = weights.get(self.save_dict_key)
diff --git a/monai/handlers/checkpoint_loader.py b/monai/handlers/checkpoint_loader.py
index f48968ecfd..16cb875d03 100644
--- a/monai/handlers/checkpoint_loader.py
+++ b/monai/handlers/checkpoint_loader.py
@@ -122,7 +122,7 @@ def __call__(self, engine: Engine) -> None:
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
         """
-        checkpoint = torch.load(self.load_path, map_location=self.map_location)
+        checkpoint = torch.load(self.load_path, map_location=self.map_location, weights_only=False)
 
         k, _ = list(self.load_dict.items())[0]
         # single object and checkpoint is directly a state_dict
diff --git a/monai/inferers/inferer.py b/monai/inferers/inferer.py
index 156677d992..df23b9aea0 100644
--- a/monai/inferers/inferer.py
+++ b/monai/inferers/inferer.py
@@ -882,7 +882,7 @@ def sample(
                 )
 
             # 2. compute previous image: x_t -> x_t-1
-            image, _ = scheduler.step(model_output, t, image)
+            image, _ = scheduler.step(model_output, t, image)  # type: ignore[operator]
             if save_intermediates and t % intermediate_steps == 0:
                 intermediates.append(image)
         if save_intermediates:
@@ -986,8 +986,8 @@ def get_likelihood(
             predicted_mean = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * noisy_image
 
             # get the posterior mean and variance
-            posterior_mean = scheduler._get_mean(timestep=t, x_0=inputs, x_t=noisy_image)
-            posterior_variance = scheduler._get_variance(timestep=t, predicted_variance=predicted_variance)
+            posterior_mean = scheduler._get_mean(timestep=t, x_0=inputs, x_t=noisy_image)  # type: ignore[operator]
+            posterior_variance = scheduler._get_variance(timestep=t, predicted_variance=predicted_variance)  # type: ignore[operator]
 
             log_posterior_variance = torch.log(posterior_variance)
             log_predicted_variance = torch.log(predicted_variance) if predicted_variance else log_posterior_variance
@@ -1436,7 +1436,7 @@ def sample(  # type: ignore[override]
                 )
 
             # 3. compute previous image: x_t -> x_t-1
-            image, _ = scheduler.step(model_output, t, image)
+            image, _ = scheduler.step(model_output, t, image)  # type: ignore[operator]
             if save_intermediates and t % intermediate_steps == 0:
                 intermediates.append(image)
         if save_intermediates:
@@ -1562,8 +1562,8 @@ def get_likelihood(  # type: ignore[override]
             predicted_mean = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * noisy_image
 
             # get the posterior mean and variance
-            posterior_mean = scheduler._get_mean(timestep=t, x_0=inputs, x_t=noisy_image)
-            posterior_variance = scheduler._get_variance(timestep=t, predicted_variance=predicted_variance)
+            posterior_mean = scheduler._get_mean(timestep=t, x_0=inputs, x_t=noisy_image)  # type: ignore[operator]
+            posterior_variance = scheduler._get_variance(timestep=t, predicted_variance=predicted_variance)  # type: ignore[operator]
 
             log_posterior_variance = torch.log(posterior_variance)
             log_predicted_variance = torch.log(predicted_variance) if predicted_variance else log_posterior_variance
diff --git a/monai/inferers/merger.py b/monai/inferers/merger.py
index 1344207e18..a1ab8e8a56 100644
--- a/monai/inferers/merger.py
+++ b/monai/inferers/merger.py
@@ -53,8 +53,11 @@ def __init__(
         cropped_shape: Sequence[int] | None = None,
         device: torch.device | str | None = None,
     ) -> None:
-        self.merged_shape = merged_shape
-        self.cropped_shape = self.merged_shape if cropped_shape is None else cropped_shape
+        if merged_shape is None:
+            raise ValueError("Argument `merged_shape` must be provided")
+
+        self.merged_shape: tuple[int, ...] = tuple(merged_shape)
+        self.cropped_shape: tuple[int, ...] = tuple(self.merged_shape if cropped_shape is None else cropped_shape)
         self.device = device
         self.is_finalized = False
 
@@ -231,9 +234,9 @@ def __init__(
         dtype: np.dtype | str = "float32",
         value_dtype: np.dtype | str = "float32",
         count_dtype: np.dtype | str = "uint8",
-        store: zarr.storage.Store | str = "merged.zarr",
-        value_store: zarr.storage.Store | str | None = None,
-        count_store: zarr.storage.Store | str | None = None,
+        store: zarr.storage.Store | str = "merged.zarr",  # type: ignore
+        value_store: zarr.storage.Store | str | None = None,  # type: ignore
+        count_store: zarr.storage.Store | str | None = None,  # type: ignore
         compressor: str | None = None,
         value_compressor: str | None = None,
         count_compressor: str | None = None,
@@ -251,18 +254,18 @@ def __init__(
         if version_geq(get_package_version("zarr"), "3.0.0"):
             if value_store is None:
                 self.tmpdir = TemporaryDirectory()
-                self.value_store = zarr.storage.LocalStore(self.tmpdir.name)
+                self.value_store = zarr.storage.LocalStore(self.tmpdir.name)  # type: ignore
             else:
-                self.value_store = value_store
+                self.value_store = value_store  # type: ignore
             if count_store is None:
                 self.tmpdir = TemporaryDirectory()
-                self.count_store = zarr.storage.LocalStore(self.tmpdir.name)
+                self.count_store = zarr.storage.LocalStore(self.tmpdir.name)  # type: ignore
             else:
-                self.count_store = count_store
+                self.count_store = count_store  # type: ignore
         else:
             self.tmpdir = None
-            self.value_store = zarr.storage.TempStore() if value_store is None else value_store
-            self.count_store = zarr.storage.TempStore() if count_store is None else count_store
+            self.value_store = zarr.storage.TempStore() if value_store is None else value_store  # type: ignore
+            self.count_store = zarr.storage.TempStore() if count_store is None else count_store  # type: ignore
         self.chunks = chunks
         self.compressor = compressor
         self.value_compressor = value_compressor
@@ -314,7 +317,7 @@ def aggregate(self, values: torch.Tensor, location: Sequence[int]) -> None:
         map_slice = ensure_tuple_size(map_slice, values.ndim, pad_val=slice(None), pad_from_start=True)
         with self.lock:
             self.values[map_slice] += values.numpy()
-            self.counts[map_slice] += 1
+            self.counts[map_slice] += 1  # type: ignore[operator]
 
     def finalize(self) -> zarr.Array:
         """
@@ -332,7 +335,7 @@ def finalize(self) -> zarr.Array:
         if not self.is_finalized:
             # use chunks for division to fit into memory
             for chunk in iterate_over_chunks(self.values.chunks, self.values.cdata_shape):
-                self.output[chunk] = self.values[chunk] / self.counts[chunk]
+                self.output[chunk] = self.values[chunk] / self.counts[chunk]  # type: ignore[operator]
             # finalize the shape
             self.output.resize(self.cropped_shape)
             # set finalize flag to protect performing in-place division again
diff --git a/monai/losses/perceptual.py b/monai/losses/perceptual.py
index a8ae90993a..ee653fac9d 100644
--- a/monai/losses/perceptual.py
+++ b/monai/losses/perceptual.py
@@ -374,7 +374,7 @@ def __init__(
         else:
             network = torchvision.models.resnet50(weights=None)
             if pretrained is True:
-                state_dict = torch.load(pretrained_path)
+                state_dict = torch.load(pretrained_path, weights_only=True)
                 if pretrained_state_dict_key is not None:
                     state_dict = state_dict[pretrained_state_dict_key]
                 network.load_state_dict(state_dict)
diff --git a/monai/losses/sure_loss.py b/monai/losses/sure_loss.py
index ebf25613a6..fa8820885d 100644
--- a/monai/losses/sure_loss.py
+++ b/monai/losses/sure_loss.py
@@ -92,7 +92,7 @@ def sure_loss_function(
         y_ref = operator(x)
 
     # get perturbed output
-    x_perturbed = x + eps * perturb_noise
+    x_perturbed = x + eps * perturb_noise  # type: ignore
     y_perturbed = operator(x_perturbed)
     # divergence
     divergence = torch.sum(1.0 / eps * torch.matmul(perturb_noise.permute(0, 1, 3, 2), y_perturbed - y_ref))  # type: ignore
diff --git a/monai/networks/blocks/feature_pyramid_network.py b/monai/networks/blocks/feature_pyramid_network.py
index 7de899803c..759a4efe0d 100644
--- a/monai/networks/blocks/feature_pyramid_network.py
+++ b/monai/networks/blocks/feature_pyramid_network.py
@@ -54,7 +54,9 @@
 
 from collections import OrderedDict
 from collections.abc import Callable
+from typing import cast
 
+import torch
 import torch.nn.functional as F
 from torch import Tensor, nn
 
@@ -194,8 +196,8 @@ def __init__(
         conv_type_: type[nn.Module] = Conv[Conv.CONV, spatial_dims]
         for m in self.modules():
             if isinstance(m, conv_type_):
-                nn.init.kaiming_uniform_(m.weight, a=1)
-                nn.init.constant_(m.bias, 0.0)
+                nn.init.kaiming_uniform_(cast(torch.Tensor, m.weight), a=1)
+                nn.init.constant_(cast(torch.Tensor, m.bias), 0.0)
 
         if extra_blocks is not None:
             if not isinstance(extra_blocks, ExtraFPNBlock):
diff --git a/monai/networks/layers/vector_quantizer.py b/monai/networks/layers/vector_quantizer.py
index 9c354e1009..0ff7143b69 100644
--- a/monai/networks/layers/vector_quantizer.py
+++ b/monai/networks/layers/vector_quantizer.py
@@ -100,7 +100,7 @@ def quantize(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, to
             torch.Tensor: Quantization indices of shape [B,H,W,D,1]
 
         """
-        with torch.cuda.amp.autocast(enabled=False):
+        with torch.autocast("cuda", enabled=False):
             encoding_indices_view = list(inputs.shape)
             del encoding_indices_view[1]
 
@@ -138,7 +138,7 @@ def embed(self, embedding_indices: torch.Tensor) -> torch.Tensor:
         Returns:
             torch.Tensor: Quantize space representation of encoding_indices in channel first format.
         """
-        with torch.cuda.amp.autocast(enabled=False):
+        with torch.autocast("cuda", enabled=False):
             embedding: torch.Tensor = (
                 self.embedding(embedding_indices).permute(self.quantization_permutation).contiguous()
             )
diff --git a/monai/networks/nets/hovernet.py b/monai/networks/nets/hovernet.py
index 3745b66bb5..b773af91d4 100644
--- a/monai/networks/nets/hovernet.py
+++ b/monai/networks/nets/hovernet.py
@@ -633,9 +633,9 @@ def _remap_preact_resnet_model(model_url: str):
     # download the pretrained weights into torch hub's default dir
     weights_dir = os.path.join(torch.hub.get_dir(), "preact-resnet50.pth")
     download_url(model_url, fuzzy=True, filepath=weights_dir, progress=False)
-    state_dict = torch.load(weights_dir, map_location=None if torch.cuda.is_available() else torch.device("cpu"))[
-        "desc"
-    ]
+    map_location = None if torch.cuda.is_available() else torch.device("cpu")
+    state_dict = torch.load(weights_dir, map_location=map_location, weights_only=True)["desc"]
+
     for key in list(state_dict.keys()):
         new_key = None
         if pattern_conv0.match(key):
@@ -668,7 +668,8 @@ def _remap_standard_resnet_model(model_url: str, state_dict_key: str | None = No
     # download the pretrained weights into torch hub's default dir
     weights_dir = os.path.join(torch.hub.get_dir(), "resnet50.pth")
     download_url(model_url, fuzzy=True, filepath=weights_dir, progress=False)
-    state_dict = torch.load(weights_dir, map_location=None if torch.cuda.is_available() else torch.device("cpu"))
+    map_location = None if torch.cuda.is_available() else torch.device("cpu")
+    state_dict = torch.load(weights_dir, map_location=map_location, weights_only=True)
     if state_dict_key is not None:
         state_dict = state_dict[state_dict_key]
 
diff --git a/monai/networks/nets/resnet.py b/monai/networks/nets/resnet.py
index d62722478e..d24b86d27d 100644
--- a/monai/networks/nets/resnet.py
+++ b/monai/networks/nets/resnet.py
@@ -493,7 +493,7 @@ def _resnet(
         if isinstance(pretrained, str):
             if Path(pretrained).exists():
                 logger.info(f"Loading weights from {pretrained}...")
-                model_state_dict = torch.load(pretrained, map_location=device)
+                model_state_dict = torch.load(pretrained, map_location=device, weights_only=True)
             else:
                 # Throw error
                 raise FileNotFoundError("The pretrained checkpoint file is not found")
@@ -665,7 +665,7 @@ def get_pretrained_resnet_medicalnet(resnet_depth: int, device: str = "cpu", dat
                 raise EntryNotFoundError(
                     f"{filename} not found on {medicalnet_huggingface_repo_basename}{resnet_depth}"
                 ) from None
-        checkpoint = torch.load(pretrained_path, map_location=torch.device(device))
+        checkpoint = torch.load(pretrained_path, map_location=torch.device(device), weights_only=True)
     else:
         raise NotImplementedError("Supported resnet_depth are: [10, 18, 34, 50, 101, 152, 200]")
     logger.info(f"{filename} downloaded")
diff --git a/monai/networks/nets/senet.py b/monai/networks/nets/senet.py
index 51435a9ea2..c14118ad20 100644
--- a/monai/networks/nets/senet.py
+++ b/monai/networks/nets/senet.py
@@ -302,7 +302,7 @@ def _load_state_dict(model: nn.Module, arch: str, progress: bool):
 
     if isinstance(model_url, dict):
         download_url(model_url["url"], filepath=model_url["filename"])
-        state_dict = torch.load(model_url["filename"], map_location=None)
+        state_dict = torch.load(model_url["filename"], map_location=None, weights_only=True)
     else:
         state_dict = load_state_dict_from_url(model_url, progress=progress)
     for key in list(state_dict.keys()):
diff --git a/monai/networks/nets/swin_unetr.py b/monai/networks/nets/swin_unetr.py
index cfc5dda41f..22e1e6f659 100644
--- a/monai/networks/nets/swin_unetr.py
+++ b/monai/networks/nets/swin_unetr.py
@@ -272,53 +272,50 @@ def __init__(
         self.out = UnetOutBlock(spatial_dims=spatial_dims, in_channels=feature_size, out_channels=out_channels)
 
     def load_from(self, weights):
+        layers1_0: BasicLayer = self.swinViT.layers1[0]  # type: ignore[assignment]
+        layers2_0: BasicLayer = self.swinViT.layers2[0]  # type: ignore[assignment]
+        layers3_0: BasicLayer = self.swinViT.layers3[0]  # type: ignore[assignment]
+        layers4_0: BasicLayer = self.swinViT.layers4[0]  # type: ignore[assignment]
+        wstate = weights["state_dict"]
+
         with torch.no_grad():
-            self.swinViT.patch_embed.proj.weight.copy_(weights["state_dict"]["module.patch_embed.proj.weight"])
-            self.swinViT.patch_embed.proj.bias.copy_(weights["state_dict"]["module.patch_embed.proj.bias"])
-            for bname, block in self.swinViT.layers1[0].blocks.named_children():
-                block.load_from(weights, n_block=bname, layer="layers1")
-            self.swinViT.layers1[0].downsample.reduction.weight.copy_(
-                weights["state_dict"]["module.layers1.0.downsample.reduction.weight"]
-            )
-            self.swinViT.layers1[0].downsample.norm.weight.copy_(
-                weights["state_dict"]["module.layers1.0.downsample.norm.weight"]
-            )
-            self.swinViT.layers1[0].downsample.norm.bias.copy_(
-                weights["state_dict"]["module.layers1.0.downsample.norm.bias"]
-            )
-            for bname, block in self.swinViT.layers2[0].blocks.named_children():
-                block.load_from(weights, n_block=bname, layer="layers2")
-            self.swinViT.layers2[0].downsample.reduction.weight.copy_(
-                weights["state_dict"]["module.layers2.0.downsample.reduction.weight"]
-            )
-            self.swinViT.layers2[0].downsample.norm.weight.copy_(
-                weights["state_dict"]["module.layers2.0.downsample.norm.weight"]
-            )
-            self.swinViT.layers2[0].downsample.norm.bias.copy_(
-                weights["state_dict"]["module.layers2.0.downsample.norm.bias"]
-            )
-            for bname, block in self.swinViT.layers3[0].blocks.named_children():
-                block.load_from(weights, n_block=bname, layer="layers3")
-            self.swinViT.layers3[0].downsample.reduction.weight.copy_(
-                weights["state_dict"]["module.layers3.0.downsample.reduction.weight"]
-            )
-            self.swinViT.layers3[0].downsample.norm.weight.copy_(
-                weights["state_dict"]["module.layers3.0.downsample.norm.weight"]
-            )
-            self.swinViT.layers3[0].downsample.norm.bias.copy_(
-                weights["state_dict"]["module.layers3.0.downsample.norm.bias"]
-            )
-            for bname, block in self.swinViT.layers4[0].blocks.named_children():
-                block.load_from(weights, n_block=bname, layer="layers4")
-            self.swinViT.layers4[0].downsample.reduction.weight.copy_(
-                weights["state_dict"]["module.layers4.0.downsample.reduction.weight"]
-            )
-            self.swinViT.layers4[0].downsample.norm.weight.copy_(
-                weights["state_dict"]["module.layers4.0.downsample.norm.weight"]
-            )
-            self.swinViT.layers4[0].downsample.norm.bias.copy_(
-                weights["state_dict"]["module.layers4.0.downsample.norm.bias"]
-            )
+            self.swinViT.patch_embed.proj.weight.copy_(wstate["module.patch_embed.proj.weight"])
+            self.swinViT.patch_embed.proj.bias.copy_(wstate["module.patch_embed.proj.bias"])
+            for bname, block in layers1_0.blocks.named_children():
+                block.load_from(weights, n_block=bname, layer="layers1")  # type: ignore[operator]
+
+            if layers1_0.downsample is not None:
+                d = layers1_0.downsample
+                d.reduction.weight.copy_(wstate["module.layers1.0.downsample.reduction.weight"])  # type: ignore
+                d.norm.weight.copy_(wstate["module.layers1.0.downsample.norm.weight"])  # type: ignore
+                d.norm.bias.copy_(wstate["module.layers1.0.downsample.norm.bias"])  # type: ignore
+
+            for bname, block in layers2_0.blocks.named_children():
+                block.load_from(weights, n_block=bname, layer="layers2")  # type: ignore[operator]
+
+            if layers2_0.downsample is not None:
+                d = layers2_0.downsample
+                d.reduction.weight.copy_(wstate["module.layers2.0.downsample.reduction.weight"])  # type: ignore
+                d.norm.weight.copy_(wstate["module.layers2.0.downsample.norm.weight"])  # type: ignore
+                d.norm.bias.copy_(wstate["module.layers2.0.downsample.norm.bias"])  # type: ignore
+
+            for bname, block in layers3_0.blocks.named_children():
+                block.load_from(weights, n_block=bname, layer="layers3")  # type: ignore[operator]
+
+            if layers3_0.downsample is not None:
+                d = layers3_0.downsample
+                d.reduction.weight.copy_(wstate["module.layers3.0.downsample.reduction.weight"])  # type: ignore
+                d.norm.weight.copy_(wstate["module.layers3.0.downsample.norm.weight"])  # type: ignore
+                d.norm.bias.copy_(wstate["module.layers3.0.downsample.norm.bias"])  # type: ignore
+
+            for bname, block in layers4_0.blocks.named_children():
+                block.load_from(weights, n_block=bname, layer="layers4")  # type: ignore[operator]
+
+            if layers4_0.downsample is not None:
+                d = layers4_0.downsample
+                d.reduction.weight.copy_(wstate["module.layers4.0.downsample.reduction.weight"])  # type: ignore
+                d.norm.weight.copy_(wstate["module.layers4.0.downsample.norm.weight"])  # type: ignore
+                d.norm.bias.copy_(wstate["module.layers4.0.downsample.norm.bias"])  # type: ignore
 
     @torch.jit.unused
     def _check_input_size(self, spatial_shape):
@@ -532,7 +529,7 @@ def forward(self, x, mask):
         q = q * self.scale
         attn = q @ k.transpose(-2, -1)
         relative_position_bias = self.relative_position_bias_table[
-            self.relative_position_index.clone()[:n, :n].reshape(-1)
+            self.relative_position_index.clone()[:n, :n].reshape(-1)  # type: ignore[operator]
         ].reshape(n, n, -1)
         relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()
         attn = attn + relative_position_bias.unsqueeze(0)
@@ -691,7 +688,7 @@ def load_from(self, weights, n_block, layer):
             self.norm1.weight.copy_(weights["state_dict"][root + block_names[0]])
             self.norm1.bias.copy_(weights["state_dict"][root + block_names[1]])
             self.attn.relative_position_bias_table.copy_(weights["state_dict"][root + block_names[2]])
-            self.attn.relative_position_index.copy_(weights["state_dict"][root + block_names[3]])
+            self.attn.relative_position_index.copy_(weights["state_dict"][root + block_names[3]])  # type: ignore[operator]
             self.attn.qkv.weight.copy_(weights["state_dict"][root + block_names[4]])
             self.attn.qkv.bias.copy_(weights["state_dict"][root + block_names[5]])
             self.attn.proj.weight.copy_(weights["state_dict"][root + block_names[6]])
@@ -1118,7 +1115,7 @@ def filter_swinunetr(key, value):
         )
         ssl_weights_path = "./ssl_pretrained_weights.pth"
         download_url(resource, ssl_weights_path)
-        ssl_weights = torch.load(ssl_weights_path)["model"]
+        ssl_weights = torch.load(ssl_weights_path, weights_only=True)["model"]
 
         dst_dict, loaded, not_loaded = copy_model_state(model, ssl_weights, filter_func=filter_swinunetr)
 
diff --git a/monai/networks/nets/transchex.py b/monai/networks/nets/transchex.py
index 6bfff3c956..bd756ec214 100644
--- a/monai/networks/nets/transchex.py
+++ b/monai/networks/nets/transchex.py
@@ -43,7 +43,7 @@ def __init__(self, *inputs, **kwargs) -> None:
 
     def init_bert_weights(self, module):
         if isinstance(module, (nn.Linear, nn.Embedding)):
-            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)  # type: ignore[union-attr,arg-type]
         elif isinstance(module, torch.nn.LayerNorm):
             module.bias.data.zero_()
             module.weight.data.fill_(1.0)
@@ -68,7 +68,8 @@ def from_pretrained(
         weights_path = cached_file(path_or_repo_id, filename, cache_dir=cache_dir)
         model = cls(num_language_layers, num_vision_layers, num_mixed_layers, bert_config, *inputs, **kwargs)
         if state_dict is None and not from_tf:
-            state_dict = torch.load(weights_path, map_location="cpu" if not torch.cuda.is_available() else None)
+            map_location = "cpu" if not torch.cuda.is_available() else None
+            state_dict = torch.load(weights_path, map_location=map_location, weights_only=True)
         if from_tf:
             return load_tf_weights_in_bert(model, weights_path)
         old_keys = []
diff --git a/monai/networks/nets/vista3d.py b/monai/networks/nets/vista3d.py
index 6ecb664b85..a5c2cc13ef 100644
--- a/monai/networks/nets/vista3d.py
+++ b/monai/networks/nets/vista3d.py
@@ -315,7 +315,7 @@ def set_auto_grad(self, auto_freeze: bool = False, point_freeze: bool = False):
         """
         if auto_freeze != self.auto_freeze:
             if hasattr(self.image_encoder, "set_auto_grad"):
-                self.image_encoder.set_auto_grad(auto_freeze=auto_freeze, point_freeze=point_freeze)
+                self.image_encoder.set_auto_grad(auto_freeze=auto_freeze, point_freeze=point_freeze)  # type: ignore[operator]
             else:
                 for param in self.image_encoder.parameters():
                     param.requires_grad = (not auto_freeze) and (not point_freeze)
@@ -325,7 +325,7 @@ def set_auto_grad(self, auto_freeze: bool = False, point_freeze: bool = False):
 
         if point_freeze != self.point_freeze:
             if hasattr(self.image_encoder, "set_auto_grad"):
-                self.image_encoder.set_auto_grad(auto_freeze=auto_freeze, point_freeze=point_freeze)
+                self.image_encoder.set_auto_grad(auto_freeze=auto_freeze, point_freeze=point_freeze)  # type: ignore[operator]
             else:
                 for param in self.image_encoder.parameters():
                     param.requires_grad = (not auto_freeze) and (not point_freeze)
@@ -543,10 +543,10 @@ def forward(
         point_embedding = self.pe_layer.forward_with_coords(points, out_shape)  # type: ignore
         point_embedding[point_labels == -1] = 0.0
         point_embedding[point_labels == -1] += self.not_a_point_embed.weight
-        point_embedding[point_labels == 0] += self.point_embeddings[0].weight
-        point_embedding[point_labels == 1] += self.point_embeddings[1].weight
-        point_embedding[point_labels == 2] += self.point_embeddings[0].weight + self.special_class_embed.weight
-        point_embedding[point_labels == 3] += self.point_embeddings[1].weight + self.special_class_embed.weight
+        point_embedding[point_labels == 0] += self.point_embeddings[0].weight  # type: ignore[arg-type]
+        point_embedding[point_labels == 1] += self.point_embeddings[1].weight  # type: ignore[arg-type]
+        point_embedding[point_labels == 2] += self.point_embeddings[0].weight + self.special_class_embed.weight  # type: ignore[operator]
+        point_embedding[point_labels == 3] += self.point_embeddings[1].weight + self.special_class_embed.weight  # type: ignore[operator]
         output_tokens = self.mask_tokens.weight
 
         output_tokens = output_tokens.unsqueeze(0).expand(point_embedding.size(0), -1, -1)
@@ -884,7 +884,7 @@ def _pe_encoding(self, coords: torch.torch.Tensor) -> torch.torch.Tensor:
         coords = 2 * coords - 1
         # [bs=1,N=2,2] @ [2,128]
         # [bs=1, N=2, 128]
-        coords = coords @ self.positional_encoding_gaussian_matrix
+        coords = coords @ self.positional_encoding_gaussian_matrix  # type: ignore[operator]
         coords = 2 * np.pi * coords
         # outputs d_1 x ... x d_n x C shape
         # [bs=1, N=2, 128+128=256]
diff --git a/monai/networks/utils.py b/monai/networks/utils.py
index 2279bed0b4..a41d4b1e33 100644
--- a/monai/networks/utils.py
+++ b/monai/networks/utils.py
@@ -22,7 +22,7 @@
 from collections.abc import Callable, Mapping, Sequence
 from contextlib import contextmanager
 from copy import deepcopy
-from typing import Any
+from typing import Any, Iterable
 
 import numpy as np
 import torch
@@ -1238,7 +1238,7 @@ def __init__(self, mod):
 
     def forward(self, x):
         dtype = x.dtype
-        with torch.amp.autocast("cuda", enabled=False):
+        with torch.autocast("cuda", enabled=False):
             ret = self.mod.forward(x.to(torch.float32)).to(dtype)
         return ret
 
@@ -1255,7 +1255,7 @@ def __init__(self, mod):
 
     def forward(self, *args):
         from_dtype = args[0].dtype
-        with torch.amp.autocast("cuda", enabled=False):
+        with torch.autocast("cuda", enabled=False):
             ret = self.mod.forward(*cast_all(args, from_dtype=from_dtype, to_dtype=torch.float32))
         return cast_all(ret, from_dtype=torch.float32, to_dtype=from_dtype)
 
@@ -1291,7 +1291,8 @@ def simple_replace(base_t: type[nn.Module], dest_t: type[nn.Module]) -> Callable
     def expansion_fn(mod: nn.Module) -> nn.Module | None:
         if not isinstance(mod, base_t):
             return None
-        args = [getattr(mod, name, None) for name in mod.__constants__]
+        constants: Iterable = mod.__constants__  # type: ignore[assignment]
+        args = [getattr(mod, name, None) for name in constants]
         out = dest_t(*args)
         return out
 
diff --git a/monai/transforms/intensity/array.py b/monai/transforms/intensity/array.py
index 8fe658ad3e..ed0a1ad9ac 100644
--- a/monai/transforms/intensity/array.py
+++ b/monai/transforms/intensity/array.py
@@ -1856,7 +1856,7 @@ def interp(self, x: NdarrayOrTensor, xp: NdarrayOrTensor, fp: NdarrayOrTensor) -
         indices = ns.searchsorted(xp.reshape(-1), x.reshape(-1)) - 1
         indices = ns.clip(indices, 0, len(m) - 1)
 
-        f = (m[indices] * x.reshape(-1) + b[indices]).reshape(x.shape)
+        f: NdarrayOrTensor = (m[indices] * x.reshape(-1) + b[indices]).reshape(x.shape)
         f[x < xp[0]] = fp[0]
         f[x > xp[-1]] = fp[-1]
         return f
diff --git a/monai/transforms/spatial/array.py b/monai/transforms/spatial/array.py
index e4ed196eff..a75bb390cd 100644
--- a/monai/transforms/spatial/array.py
+++ b/monai/transforms/spatial/array.py
@@ -1758,13 +1758,13 @@ def __call__(
         if self.affine is None:
             affine = torch.eye(spatial_dims + 1, device=_device)
             if self.rotate_params:
-                affine @= create_rotate(spatial_dims, self.rotate_params, device=_device, backend=_b)
+                affine @= create_rotate(spatial_dims, self.rotate_params, device=_device, backend=_b)  # type: ignore[assignment]
             if self.shear_params:
-                affine @= create_shear(spatial_dims, self.shear_params, device=_device, backend=_b)
+                affine @= create_shear(spatial_dims, self.shear_params, device=_device, backend=_b)  # type: ignore[assignment]
             if self.translate_params:
-                affine @= create_translate(spatial_dims, self.translate_params, device=_device, backend=_b)
+                affine @= create_translate(spatial_dims, self.translate_params, device=_device, backend=_b)  # type: ignore[assignment]
             if self.scale_params:
-                affine @= create_scale(spatial_dims, self.scale_params, device=_device, backend=_b)
+                affine @= create_scale(spatial_dims, self.scale_params, device=_device, backend=_b)  # type: ignore[assignment]
         else:
             affine = self.affine  # type: ignore
         affine = to_affine_nd(spatial_dims, affine)
@@ -1780,7 +1780,7 @@ def __call__(
             grid_ = ((affine @ sc) @ grid_.view((grid_.shape[0], -1))).view([-1] + list(grid_.shape[1:]))
         else:
             grid_ = (affine @ grid_.view((grid_.shape[0], -1))).view([-1] + list(grid_.shape[1:]))
-        return grid_, affine
+        return grid_, affine  # type: ignore[return-value]
 
 
 class RandAffineGrid(Randomizable, LazyTransform):
@@ -3257,7 +3257,7 @@ def filter_threshold(self, image_np: NdarrayOrTensor, locations: np.ndarray) ->
             tuple[NdarrayOrTensor, numpy.ndarray]:  tuple of filtered patches and locations.
         """
         n_dims = len(image_np.shape)
-        idx = argwhere(image_np.sum(tuple(range(1, n_dims))) < self.threshold).reshape(-1)
+        idx = argwhere(image_np.sum(tuple(range(1, n_dims))) < self.threshold).reshape(-1)  # type: ignore[operator]
         idx_np = convert_data_type(idx, np.ndarray)[0]
         return image_np[idx], locations[idx_np]
 
diff --git a/monai/utils/state_cacher.py b/monai/utils/state_cacher.py
index 60a074544b..c59436525c 100644
--- a/monai/utils/state_cacher.py
+++ b/monai/utils/state_cacher.py
@@ -124,7 +124,7 @@ def retrieve(self, key: Hashable) -> Any:
         fn = self.cached[key]["obj"]  # pytype: disable=attribute-error
         if not os.path.exists(fn):  # pytype: disable=wrong-arg-types
             raise RuntimeError(f"Failed to load state in {fn}. File doesn't exist anymore.")
-        data_obj = torch.load(fn, map_location=lambda storage, location: storage)
+        data_obj = torch.load(fn, map_location=lambda storage, location: storage, weights_only=False)
         # copy back to device if necessary
         if "device" in self.cached[key]:
             data_obj = data_obj.to(self.cached[key]["device"])
diff --git a/requirements.txt b/requirements.txt
index 452a62adda..ad394ce807 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
-torch>=2.3.0,<2.6; sys_platform != 'win32'
-torch>=2.4.1,<2.6; sys_platform == 'win32'
+torch>=2.3.0; sys_platform != 'win32'
+torch>=2.4.1; sys_platform == 'win32'
 numpy>=1.24,<3.0
diff --git a/runtests.sh b/runtests.sh
index 2a399d5c3a..fd7df79722 100755
--- a/runtests.sh
+++ b/runtests.sh
@@ -120,7 +120,7 @@ function print_usage {
 
 # FIXME: https://github.com/Project-MONAI/MONAI/issues/4354
 protobuf_major_version=$("${PY_EXE}" -m pip list | grep '^protobuf ' | tr -s ' ' | cut -d' ' -f2 | cut -d'.' -f1)
-if [ "$protobuf_major_version" -ge "4" ]
+if [ ! -z "$protobuf_major_version" ] && [ "$protobuf_major_version" -ge "4" ]
 then
     export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 fi
diff --git a/tests/bundle/test_bundle_download.py b/tests/bundle/test_bundle_download.py
index 38620d98ff..da58a6313e 100644
--- a/tests/bundle/test_bundle_download.py
+++ b/tests/bundle/test_bundle_download.py
@@ -266,6 +266,7 @@ def test_load_weights(self, bundle_files, bundle_name, repo, device, model_file)
         with skip_if_downloading_fails():
             # download bundle, and load weights from the downloaded path
             with tempfile.TemporaryDirectory() as tempdir:
+                bundle_root = os.path.join(tempdir, bundle_name)
                 # load weights
                 weights = load(
                     name=bundle_name,
@@ -278,7 +279,7 @@ def test_load_weights(self, bundle_files, bundle_name, repo, device, model_file)
                     return_state_dict=True,
                 )
                 # prepare network
-                with open(os.path.join(tempdir, bundle_name, bundle_files[2])) as f:
+                with open(os.path.join(bundle_root, bundle_files[2])) as f:
                     net_args = json.load(f)["network_def"]
                 model_name = net_args["_target_"]
                 del net_args["_target_"]
@@ -288,9 +289,13 @@ def test_load_weights(self, bundle_files, bundle_name, repo, device, model_file)
                 model.eval()
 
                 # prepare data and test
-                input_tensor = torch.load(os.path.join(tempdir, bundle_name, bundle_files[4]), map_location=device)
+                input_tensor = torch.load(
+                    os.path.join(bundle_root, bundle_files[4]), map_location=device, weights_only=True
+                )
                 output = model.forward(input_tensor)
-                expected_output = torch.load(os.path.join(tempdir, bundle_name, bundle_files[3]), map_location=device)
+                expected_output = torch.load(
+                    os.path.join(bundle_root, bundle_files[3]), map_location=device, weights_only=True
+                )
                 assert_allclose(output, expected_output, atol=1e-4, rtol=1e-4, type_test=False)
 
                 # load instantiated model directly and test, since the bundle has been downloaded,
@@ -350,7 +355,7 @@ def test_load_weights_with_net_override(self, bundle_name, device, net_override)
                     config_file=f"{tempdir}/spleen_ct_segmentation/configs/train.json", workflow_type="train"
                 )
                 expected_model = workflow.network_def.to(device)
-                expected_model.load_state_dict(torch.load(model_path))
+                expected_model.load_state_dict(torch.load(model_path, weights_only=True))
                 expected_output = expected_model(input_tensor)
                 assert_allclose(output, expected_output, atol=1e-4, rtol=1e-4, type_test=False)
 
@@ -378,6 +383,7 @@ def test_load_ts_module(self, bundle_files, bundle_name, version, repo, device,
         with skip_if_downloading_fails():
             # load ts module
             with tempfile.TemporaryDirectory() as tempdir:
+                bundle_root = os.path.join(tempdir, bundle_name)
                 # load ts module
                 model_ts, metadata, extra_file_dict = load(
                     name=bundle_name,
@@ -393,9 +399,13 @@ def test_load_ts_module(self, bundle_files, bundle_name, version, repo, device,
                 )
 
                 # prepare and test ts
-                input_tensor = torch.load(os.path.join(tempdir, bundle_name, bundle_files[1]), map_location=device)
+                input_tensor = torch.load(
+                    os.path.join(bundle_root, bundle_files[1]), map_location=device, weights_only=True
+                )
                 output = model_ts.forward(input_tensor)
-                expected_output = torch.load(os.path.join(tempdir, bundle_name, bundle_files[0]), map_location=device)
+                expected_output = torch.load(
+                    os.path.join(bundle_root, bundle_files[0]), map_location=device, weights_only=True
+                )
                 assert_allclose(output, expected_output, atol=1e-4, rtol=1e-4, type_test=False)
                 # test metadata
                 self.assertTrue(metadata["pytorch_version"] == "1.7.1")
diff --git a/tests/config/test_cv2_dist.py b/tests/config/test_cv2_dist.py
index 2ef8e5b10f..3bcb68e553 100644
--- a/tests/config/test_cv2_dist.py
+++ b/tests/config/test_cv2_dist.py
@@ -16,7 +16,6 @@
 import numpy as np
 import torch
 import torch.distributed as dist
-from torch.cuda.amp import autocast
 
 # FIXME: test for the workaround of https://github.com/Project-MONAI/MONAI/issues/5291
 from monai.config.deviceconfig import print_config
@@ -33,7 +32,7 @@ def main_worker(rank, ngpus_per_node, port):
         model, device_ids=[rank], output_device=rank, find_unused_parameters=False
     )
     x = torch.ones(1, 1, 12, 12, 12).to(rank)
-    with autocast(enabled=True):
+    with torch.autocast("cuda"):
         model(x)
 
     if dist.is_initialized():
diff --git a/tests/data/meta_tensor/test_meta_tensor.py b/tests/data/meta_tensor/test_meta_tensor.py
index cd3def4de1..f52d70e7b6 100644
--- a/tests/data/meta_tensor/test_meta_tensor.py
+++ b/tests/data/meta_tensor/test_meta_tensor.py
@@ -245,7 +245,7 @@ def test_pickling(self):
         with tempfile.TemporaryDirectory() as tmp_dir:
             fname = os.path.join(tmp_dir, "im.pt")
             torch.save(m, fname)
-            m2 = torch.load(fname)
+            m2 = torch.load(fname, weights_only=False)
         self.check(m2, m, ids=False)
 
     @skip_if_no_cuda
@@ -256,7 +256,7 @@ def test_amp(self):
         conv = torch.nn.Conv2d(im.shape[1], 5, 3)
         conv.to(device)
         im_conv = conv(im)
-        with torch.cuda.amp.autocast():
+        with torch.autocast("cuda"):
             im_conv2 = conv(im)
         self.check(im_conv2, im_conv, ids=False, rtol=1e-2, atol=1e-2)
 
diff --git a/tests/integration/test_integration_classification_2d.py b/tests/integration/test_integration_classification_2d.py
index fd9e58aaf8..aecfa2efab 100644
--- a/tests/integration/test_integration_classification_2d.py
+++ b/tests/integration/test_integration_classification_2d.py
@@ -166,7 +166,7 @@ def run_inference_test(root_dir, test_x, test_y, device="cuda:0", num_workers=10
     model = DenseNet121(spatial_dims=2, in_channels=1, out_channels=len(np.unique(test_y))).to(device)
 
     model_filename = os.path.join(root_dir, "best_metric_model.pth")
-    model.load_state_dict(torch.load(model_filename))
+    model.load_state_dict(torch.load(model_filename, weights_only=True))
     y_true = []
     y_pred = []
     with eval_mode(model):
diff --git a/tests/integration/test_integration_fast_train.py b/tests/integration/test_integration_fast_train.py
index f9beb5613d..814c4b182c 100644
--- a/tests/integration/test_integration_fast_train.py
+++ b/tests/integration/test_integration_fast_train.py
@@ -186,7 +186,7 @@ def test_train_timing(self):
                 step += 1
                 optimizer.zero_grad()
                 # set AMP for training
-                with torch.cuda.amp.autocast():
+                with torch.autocast("cuda"):
                     outputs = model(batch_data["image"])
                     loss = loss_function(outputs, batch_data["label"])
                 scaler.scale(loss).backward()
@@ -207,7 +207,7 @@ def test_train_timing(self):
                         roi_size = (96, 96, 96)
                         sw_batch_size = 4
                         # set AMP for validation
-                        with torch.cuda.amp.autocast():
+                        with torch.autocast("cuda"):
                             val_outputs = sliding_window_inference(val_data["image"], roi_size, sw_batch_size, model)
 
                         val_outputs = [post_pred(i) for i in decollate_batch(val_outputs)]
diff --git a/tests/integration/test_integration_segmentation_3d.py b/tests/integration/test_integration_segmentation_3d.py
index fb2937739f..7c30150505 100644
--- a/tests/integration/test_integration_segmentation_3d.py
+++ b/tests/integration/test_integration_segmentation_3d.py
@@ -216,7 +216,7 @@ def run_inference_test(root_dir, device="cuda:0"):
     ).to(device)
 
     model_filename = os.path.join(root_dir, "best_metric_model.pth")
-    model.load_state_dict(torch.load(model_filename))
+    model.load_state_dict(torch.load(model_filename, weights_only=True))
     with eval_mode(model):
         # resampling with align_corners=True or dtype=float64 will generate
         # slight different results between PyTorch 1.5 an 1.6
diff --git a/tests/metrics/test_compute_multiscalessim_metric.py b/tests/metrics/test_compute_multiscalessim_metric.py
index 3df8026c2b..d85e6f7bf6 100644
--- a/tests/metrics/test_compute_multiscalessim_metric.py
+++ b/tests/metrics/test_compute_multiscalessim_metric.py
@@ -32,7 +32,7 @@ def test2d_gaussian(self):
         metric(preds, target)
         result = metric.aggregate()
         expected_value = 0.023176
-        self.assertTrue(expected_value - result.item() < 0.000001)
+        self.assertAlmostEqual(expected_value, result.item(), 4)
 
     def test2d_uniform(self):
         set_determinism(0)
@@ -45,7 +45,7 @@ def test2d_uniform(self):
         metric(preds, target)
         result = metric.aggregate()
         expected_value = 0.022655
-        self.assertTrue(expected_value - result.item() < 0.000001)
+        self.assertAlmostEqual(expected_value, result.item(), 4)
 
     def test3d_gaussian(self):
         set_determinism(0)
@@ -58,7 +58,7 @@ def test3d_gaussian(self):
         metric(preds, target)
         result = metric.aggregate()
         expected_value = 0.061796
-        self.assertTrue(expected_value - result.item() < 0.000001)
+        self.assertAlmostEqual(expected_value, result.item(), 4)
 
     def input_ill_input_shape2d(self):
         metric = MultiScaleSSIMMetric(spatial_dims=3, weights=[0.5, 0.5])
diff --git a/tests/networks/nets/test_autoencoderkl.py b/tests/networks/nets/test_autoencoderkl.py
index 0a3db60830..2d4c5b66ca 100644
--- a/tests/networks/nets/test_autoencoderkl.py
+++ b/tests/networks/nets/test_autoencoderkl.py
@@ -330,7 +330,7 @@ def test_compatibility_with_monai_generative(self):
             weight_path = os.path.join(tmpdir, filename)
             download_url(url=url, filepath=weight_path, hash_val=hash_val, hash_type=hash_type)
 
-            net.load_old_state_dict(torch.load(weight_path), verbose=False)
+            net.load_old_state_dict(torch.load(weight_path, weights_only=True), verbose=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/networks/nets/test_controlnet.py b/tests/networks/nets/test_controlnet.py
index 9503518762..6158dc2eef 100644
--- a/tests/networks/nets/test_controlnet.py
+++ b/tests/networks/nets/test_controlnet.py
@@ -208,7 +208,7 @@ def test_compatibility_with_monai_generative(self):
             weight_path = os.path.join(tmpdir, filename)
             download_url(url=url, filepath=weight_path, hash_val=hash_val, hash_type=hash_type)
 
-            net.load_old_state_dict(torch.load(weight_path), verbose=False)
+            net.load_old_state_dict(torch.load(weight_path, weights_only=True), verbose=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/networks/nets/test_diffusion_model_unet.py b/tests/networks/nets/test_diffusion_model_unet.py
index a7c823709d..3bca26882c 100644
--- a/tests/networks/nets/test_diffusion_model_unet.py
+++ b/tests/networks/nets/test_diffusion_model_unet.py
@@ -578,7 +578,7 @@ def test_compatibility_with_monai_generative(self):
             weight_path = os.path.join(tmpdir, filename)
             download_url(url=url, filepath=weight_path, hash_val=hash_val, hash_type=hash_type)
 
-            net.load_old_state_dict(torch.load(weight_path), verbose=False)
+            net.load_old_state_dict(torch.load(weight_path, weights_only=True), verbose=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/networks/nets/test_network_consistency.py b/tests/networks/nets/test_network_consistency.py
index e09826de75..4ce198b92f 100644
--- a/tests/networks/nets/test_network_consistency.py
+++ b/tests/networks/nets/test_network_consistency.py
@@ -55,7 +55,7 @@ def test_network_consistency(self, net_name, data_path, json_path):
         print("JSON path: " + json_path)
 
         # Load data
-        loaded_data = torch.load(data_path)
+        loaded_data = torch.load(data_path, weights_only=True)
 
         # Load json from file
         json_file = open(json_path)
diff --git a/tests/networks/nets/test_swin_unetr.py b/tests/networks/nets/test_swin_unetr.py
index 4908907bfe..2c4532ecc4 100644
--- a/tests/networks/nets/test_swin_unetr.py
+++ b/tests/networks/nets/test_swin_unetr.py
@@ -128,7 +128,7 @@ def test_filter_swinunetr(self, input_param, key, value):
                     data_spec["url"], weight_path, hash_val=data_spec["hash_val"], hash_type=data_spec["hash_type"]
                 )
 
-                ssl_weight = torch.load(weight_path)["model"]
+                ssl_weight = torch.load(weight_path, weights_only=True)["model"]
                 net = SwinUNETR(**input_param)
                 dst_dict, loaded, not_loaded = copy_model_state(net, ssl_weight, filter_func=filter_swinunetr)
                 assert_allclose(dst_dict[key][:8], value, atol=1e-4, rtol=1e-4, type_test=False)
diff --git a/tests/networks/nets/test_transformer.py b/tests/networks/nets/test_transformer.py
index f9264ba153..daf424c174 100644
--- a/tests/networks/nets/test_transformer.py
+++ b/tests/networks/nets/test_transformer.py
@@ -101,7 +101,7 @@ def test_compatibility_with_monai_generative(self):
             weight_path = os.path.join(tmpdir, filename)
             download_url(url=url, filepath=weight_path, hash_val=hash_val, hash_type=hash_type)
 
-            net.load_old_state_dict(torch.load(weight_path), verbose=False)
+            net.load_old_state_dict(torch.load(weight_path, weights_only=True), verbose=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/networks/test_save_state.py b/tests/networks/test_save_state.py
index 0581a3ce1f..329065da2b 100644
--- a/tests/networks/test_save_state.py
+++ b/tests/networks/test_save_state.py
@@ -64,7 +64,7 @@ def test_file(self, src, expected_keys, create_dir=True, atomic=True, func=None,
             if kwargs is None:
                 kwargs = {}
             save_state(src=src, path=path, create_dir=create_dir, atomic=atomic, func=func, **kwargs)
-            ckpt = dict(torch.load(path))
+            ckpt = dict(torch.load(path, weights_only=True))
             for k in ckpt.keys():
                 self.assertIn(k, expected_keys)