AbstractPhil
/

sd15-flow-matching

@@ -51,7 +51,7 @@ class BaseConfig:
     pooling: str = "mean"
     # Flow training
-    epochs: int = 10
     lr: float = 1e-4
     weight_decay: float = 1e-3
     grad_clip: float = 1.0
@@ -964,35 +964,44 @@ class FlowMatchDavidTrainer:
         steps = steps or self.cfg.sample_steps
         guidance = guidance if guidance is not None else self.cfg.guidance_scale
-        # Get model dtype from student
-        model_dtype = next(self.student.unet.parameters()).dtype
-        cond_e = self.teacher.encode(prompts)
-        uncond_e = self.teacher.encode([""]*len(prompts))
-        sched = self.teacher.sched
-        sched.set_timesteps(steps, device=self.device)
-        # Create latents with correct dtype
-        x_t = torch.randn(len(prompts), 4, 64, 64, device=self.device, dtype=model_dtype)
-        for t_scalar in sched.timesteps:
-            t = torch.full((x_t.shape[0],), t_scalar, device=self.device, dtype=torch.long)
-            v_u, _ = self.student(x_t, t, uncond_e)
-            v_c, _ = self.student(x_t, t, cond_e)
-            v_hat = v_u + guidance*(v_c - v_u)
-            alpha, sigma = self.teacher.alpha_sigma(t)
-            denom = (alpha**2 + sigma**2)
-            x0_hat = (alpha * x_t - sigma * v_hat) / (denom + 1e-8)
-            eps_hat = (x_t - alpha * x0_hat) / (sigma + 1e-8)
-            step = sched.step(model_output=eps_hat, timestep=t_scalar, sample=x_t)
-            x_t = step.prev_sample
-        imgs = self.teacher.pipe.vae.decode(x_t / 0.18215).sample
         return imgs.clamp(-1,1)
 # =====================================================================================
 # 9) MAIN
 # =====================================================================================

     pooling: str = "mean"
     # Flow training
+    epochs: int = 20
     lr: float = 1e-4
     weight_decay: float = 1e-3
     grad_clip: float = 1.0
         steps = steps or self.cfg.sample_steps
         guidance = guidance if guidance is not None else self.cfg.guidance_scale
+        # Ensure student is in eval mode
+        was_training = self.student.training
+        self.student.eval()
+        # Use autocast to handle dtype conversions automatically
+        with torch.cuda.amp.autocast(enabled=self.cfg.amp):
+            cond_e = self.teacher.encode(prompts)
+            uncond_e = self.teacher.encode([""]*len(prompts))
+            sched = self.teacher.sched
+            sched.set_timesteps(steps, device=self.device)
+            # Create latents (autocast will handle dtype)
+            x_t = torch.randn(len(prompts), 4, 64, 64, device=self.device)
+            for t_scalar in sched.timesteps:
+                t = torch.full((x_t.shape[0],), t_scalar, device=self.device, dtype=torch.long)
+                v_u, _ = self.student(x_t, t, uncond_e)
+                v_c, _ = self.student(x_t, t, cond_e)
+                v_hat = v_u + guidance*(v_c - v_u)
+                alpha, sigma = self.teacher.alpha_sigma(t)
+                denom = (alpha**2 + sigma**2)
+                x0_hat = (alpha * x_t - sigma * v_hat) / (denom + 1e-8)
+                eps_hat = (x_t - alpha * x0_hat) / (sigma + 1e-8)
+                step = sched.step(model_output=eps_hat, timestep=t_scalar, sample=x_t)
+                x_t = step.prev_sample
+            # Decode (keep x_t at current dtype for VAE)
+            imgs = self.teacher.pipe.vae.decode(x_t / 0.18215).sample
+        # Restore training mode
+        if was_training:
+            self.student.train()
         return imgs.clamp(-1,1)
 # =====================================================================================
 # 9) MAIN
 # =====================================================================================