From 51e7ee4b61c12d73bbca869e938fca748c492ef2 Mon Sep 17 00:00:00 2001
From: Yorick van Pelt <yorick@yorickvanpelt.nl>
Date: Thu, 10 Oct 2024 17:25:28 +0200
Subject: [PATCH] Check video memory to decide when to offload

---
 predict.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/predict.py b/predict.py
index 6396154..60e29ef 100644
--- a/predict.py
+++ b/predict.py
@@ -165,7 +165,10 @@ def base_setup(
         self.falcon_processor = ViTImageProcessor.from_pretrained(FALCON_MODEL_NAME)
 
         # need > 48 GB of ram to store all models in VRAM
-        self.offload = "A40" in gpu_name
+        total_mem = torch.cuda.get_device_properties(0).total_memory
+        self.offload = total_mem < 48 * 1024**3
+        if self.offload:
+            print("GPU memory is:", total_mem / 1024 ** 3, ", offloading models")
 
         device = "cuda"
         max_length = 256 if self.flow_model_name == "flux-schnell" else 512