Spaces:

prithivMLmods
/

Qwen-Image-Edit-2509-LoRAs-Fast-Fusion

Running on Zero

App Files Files Community

prithivMLmods commited on 2 days ago

Commit

fdd9313

verified ·

1 Parent(s): ee4b2e1

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -19

app.py CHANGED Viewed

@@ -1,4 +1,10 @@
 import os
 import gradio as gr
 import numpy as np
 import spaces
@@ -9,7 +15,7 @@ from typing import Iterable
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
-# --- Theme Configuration ---
 colors.orange_red = colors.Color(
     name="orange_red",
     c50="#FFF0E5",
@@ -78,7 +84,6 @@ class OrangeRedTheme(Soft):
 orange_red_theme = OrangeRedTheme()
-# --- Device Setup ---
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 dtype = torch.bfloat16
@@ -87,7 +92,6 @@ from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
 from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
 from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
-# --- Model Loading ---
 print("Loading Qwen Image Edit Pipeline...")
 pipe = QwenImageEditPlusPipeline.from_pretrained(
     "Qwen/Qwen-Image-Edit-2509",
@@ -100,6 +104,13 @@ pipe = QwenImageEditPlusPipeline.from_pretrained(
     torch_dtype=dtype
 ).to(device)
 print("Loading and Fusing Lightning LoRA...")
 pipe.load_lora_weights("lightx2v/Qwen-Image-Lightning",
                        weight_name="Qwen-Image-Lightning-4steps-V2.0-bf16.safetensors",
@@ -173,6 +184,10 @@ def infer(
     steps,
     progress=gr.Progress(track_tqdm=True)
 ):
     if image_1 is None or image_2 is None:
         raise gr.Error("Please upload both images for Fusion/Texture/FaceSwap tasks.")
@@ -217,13 +232,9 @@ def infer(
     width, height = update_dimensions_on_upload(img1_pil)
-    # --- Fix: Explicit Memory Management ---
-    # Clear cache before starting the heavy inference process
-    torch.cuda.empty_cache()
     try:
-        # Use no_grad to prevent gradient calculation and save memory
-        with torch.no_grad():
             result = pipe(
                 image=[img1_pil, img2_pil],
                 prompt=prompt,
@@ -234,24 +245,24 @@ def infer(
                 generator=generator,
                 true_cfg_scale=guidance_scale,
             ).images[0]
     except Exception as e:
-        # If an error occurs, ensure we still clear cache before raising
-        torch.cuda.empty_cache()
         raise e
-    # Clear cache after inference is done
-    torch.cuda.empty_cache()
-    return result, seed
 @spaces.GPU
 def infer_example(image_1, image_2, prompt, lora_adapter):
     if image_1 is None or image_2 is None:
         return None, 0
-    # Optional: Clear cache before example inference as well
-    torch.cuda.empty_cache()
     result, seed = infer(
         image_1.convert("RGB"),
         image_2.convert("RGB"),

 import os
+import gc
+# 1. FIX: Set memory allocation configuration BEFORE importing torch
+# 'expandable_segments:True' prevents the specific CUDACachingAllocator assertion failure
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 import gradio as gr
 import numpy as np
 import spaces
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
+# Define Theme
 colors.orange_red = colors.Color(
     name="orange_red",
     c50="#FFF0E5",
 orange_red_theme = OrangeRedTheme()
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 dtype = torch.bfloat16
 from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
 from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
 print("Loading Qwen Image Edit Pipeline...")
 pipe = QwenImageEditPlusPipeline.from_pretrained(
     "Qwen/Qwen-Image-Edit-2509",
     torch_dtype=dtype
 ).to(device)
+# 2. FIX: Enable VAE Tiling. This is crucial for decoding large images without OOM.
+try:
+    pipe.enable_vae_tiling()
+    print("VAE Tiling enabled.")
+except Exception as e:
+    print(f"Warning: Could not enable VAE tiling: {e}")
 print("Loading and Fusing Lightning LoRA...")
 pipe.load_lora_weights("lightx2v/Qwen-Image-Lightning",
                        weight_name="Qwen-Image-Lightning-4steps-V2.0-bf16.safetensors",
     steps,
     progress=gr.Progress(track_tqdm=True)
 ):
+    # 3. FIX: Aggressive Garbage Collection before run
+    gc.collect()
+    torch.cuda.empty_cache()
     if image_1 is None or image_2 is None:
         raise gr.Error("Please upload both images for Fusion/Texture/FaceSwap tasks.")
     width, height = update_dimensions_on_upload(img1_pil)
     try:
+        # 3. FIX: Use inference_mode for better memory efficiency
+        with torch.inference_mode():
             result = pipe(
                 image=[img1_pil, img2_pil],
                 prompt=prompt,
                 generator=generator,
                 true_cfg_scale=guidance_scale,
             ).images[0]
+        return result, seed
     except Exception as e:
+        # Rethrow so Gradio sees the error, but allow finally block to run
         raise e
+    finally:
+        # 3. FIX: Cleanup after run regardless of success or failure
+        gc.collect()
+        torch.cuda.empty_cache()
 @spaces.GPU
 def infer_example(image_1, image_2, prompt, lora_adapter):
     if image_1 is None or image_2 is None:
         return None, 0
+    # Simple wrapper call
     result, seed = infer(
         image_1.convert("RGB"),
         image_2.convert("RGB"),