From 51e7ee4b61c12d73bbca869e938fca748c492ef2 Mon Sep 17 00:00:00 2001 From: Yorick van Pelt Date: Thu, 10 Oct 2024 17:25:28 +0200 Subject: [PATCH] Check video memory to decide when to offload --- predict.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/predict.py b/predict.py index 6396154..60e29ef 100644 --- a/predict.py +++ b/predict.py @@ -165,7 +165,10 @@ def base_setup( self.falcon_processor = ViTImageProcessor.from_pretrained(FALCON_MODEL_NAME) # need > 48 GB of ram to store all models in VRAM - self.offload = "A40" in gpu_name + total_mem = torch.cuda.get_device_properties(0).total_memory + self.offload = total_mem < 48 * 1024**3 + if self.offload: + print("GPU memory is:", total_mem / 1024 ** 3, ", offloading models") device = "cuda" max_length = 256 if self.flow_model_name == "flux-schnell" else 512