Fix issues found in review

recris · recris · commit 740ec1d5265f · 2024-11-28T20:38:32.000Z
diff --git a/fine_tune.py b/fine_tune.py
@@ -411,7 +411,7 @@ def fn_recursive_set_mem_eff(module: torch.nn.Module):
                     loss = loss.mean()  # mean over batch dimension
                 else:
                     loss = train_util.conditional_loss(
-                        args, noise_pred.float(), target.float(), timesteps, "none", noise_scheduler
+                        args, noise_pred.float(), target.float(), timesteps, "mean", noise_scheduler
                     )
 
                 accelerator.backward(loss)
diff --git a/library/train_util.py b/library/train_util.py
@@ -5829,8 +5829,8 @@ def save_sd_model_on_train_end_common(
 
 
 def get_timesteps(min_timestep, max_timestep, b_size, device):
-    timesteps = torch.randint(min_timestep, max_timestep, (b_size,), device=device)
-    timesteps = timesteps.long()
+    timesteps = torch.randint(min_timestep, max_timestep, (b_size,), device="cpu")
+    timesteps = timesteps.long().to(device)
     return timesteps
 
 
@@ -5875,8 +5875,8 @@ def get_huber_threshold(args, timesteps: torch.Tensor, noise_scheduler) -> torch
         alpha = -math.log(args.huber_c) / noise_scheduler.config.num_train_timesteps
         result = torch.exp(-alpha * timesteps) * args.huber_scale
     elif args.huber_schedule == "snr":
-        if not hasattr(noise_scheduler, 'alphas_cumprod'):
-            raise NotImplementedError(f"Huber schedule 'snr' is not supported with the current model.")
+        if not hasattr(noise_scheduler, "alphas_cumprod"):
+            raise NotImplementedError("Huber schedule 'snr' is not supported with the current model.")
         alphas_cumprod = torch.index_select(noise_scheduler.alphas_cumprod, 0, timesteps.cpu())
         sigmas = ((1.0 - alphas_cumprod) / alphas_cumprod) ** 0.5
         result = (1 - args.huber_c) / (1 + sigmas) ** 2 + args.huber_c

Original file line number	Diff line number	Diff line change
`@@ -411,7 +411,7 @@ def fn_recursive_set_mem_eff(module: torch.nn.Module):`
`411`	`411`	`loss = loss.mean() # mean over batch dimension`
`412`	`412`	`else:`
`413`	`413`	`loss = train_util.conditional_loss(`
`414`		`- args, noise_pred.float(), target.float(), timesteps, "none", noise_scheduler`
	`414`	`+ args, noise_pred.float(), target.float(), timesteps, "mean", noise_scheduler`
`415`	`415`	`)`
`416`	`416`
`417`	`417`	`accelerator.backward(loss)`