File size: 3,740 Bytes
aaafaf0
454ba5e
b6ed610
454ba5e
 
b232019
4fc2b33
454ba5e
 
b6ed610
454ba5e
 
 
a205585
 
 
 
6d99887
cb0b907
077eede
6d99887
 
e64ed84
b232019
b6ed610
 
6d99887
 
 
 
 
 
c8737e5
4ee3e9f
d50e969
b6ed610
 
759c1b8
0814a7a
6d99887
759c1b8
6d99887
759c1b8
 
 
6d99887
1543860
11b62ea
b6ed610
cb0b907
 
 
 
6d99887
 
cb0b907
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
759c1b8
6d99887
 
 
 
 
759c1b8
b6ed610
 
 
6d99887
759c1b8
6d99887
759c1b8
 
d50e969
 
1c7e849
6d99887
d50e969
6d99887
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
953168d
759c1b8
6d99887
 
 
 
 
 
 
759c1b8
6d99887
 
953168d
 
6d99887
953168d
454ba5e
7a633fa
cadbb5a
d50e969
c932701
d50e969
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import copy
import math
import random
import os
import tempfile
import sys

import numpy as np
import torch
from PIL import Image
import gradio as gr
import spaces

import subprocess
GIT_TOKEN = os.environ.get("GIT_TOKEN")
subprocess.run(f"pip install git+https://eleazhong:{GIT_TOKEN}@github.com/wand-ai/wand-ml", shell=True)

from qwenimage.datamodels import QwenConfig
from qwenimage.debug import ctimed, ftimed
from qwenimage.experiments.experiments_qwen import ExperimentRegistry
from qwenimage.finetuner import QwenLoraFinetuner
from qwenimage.foundation import QwenImageFoundation
from qwenimage.prompt import build_camera_prompt

# --- Model Loading ---

foundation = QwenImageFoundation(QwenConfig(
    vae_image_size=1024 * 1024,
    regression_base_pipe_steps=4,
))
finetuner = QwenLoraFinetuner(foundation, foundation.config)
finetuner.load("checkpoints/reg-mse-pixel-lpips_005000", lora_rank=32)



MAX_SEED = np.iinfo(np.int32).max


@spaces.GPU
def run_pipe(
    image,
    prompt,
    seed,
    randomize_seed,
    num_inference_steps,
    shift,
    prev_output = None,
    progress=gr.Progress(track_tqdm=True)
):
    with ctimed("pre pipe"):

        if randomize_seed:
            seed = random.randint(0, MAX_SEED)
        
        device = "cuda" if torch.cuda.is_available() else "cpu"
        generator = torch.Generator(device=device).manual_seed(seed)

        # Choose input image (prefer uploaded, else last output)
        pil_images = []
        if image is not None:
            if isinstance(image, Image.Image):
                pil_images.append(image.convert("RGB"))
            elif hasattr(image, "name"):
                pil_images.append(Image.open(image.name).convert("RGB"))
        elif prev_output:
            pil_images.append(prev_output.convert("RGB"))

        if len(pil_images) == 0:
            raise gr.Error("Please upload an image first.")
        
        print(f"{len(pil_images)=}")

    finetuner.enable()
    foundation.scheduler.config["base_shift"] = shift
    foundation.scheduler.config["max_shift"] = shift

    result = foundation.base_pipe(foundation.INPUT_MODEL(
        image=pil_images,
        prompt=prompt,
        num_inference_steps=num_inference_steps,
        generator=generator,
    ))[0]

    return result, seed


# --- UI ---


with gr.Blocks(theme=gr.themes.Citrus()) as demo:

    gr.Markdown("Qwen Image Demo")

    with gr.Row():
        with gr.Column():
            image = gr.Image(label="Input Image", type="pil")
            prev_output = gr.Image(value=None, visible=False)
            is_reset = gr.Checkbox(value=False, visible=False)
            prompt = gr.Textbox(label="Prompt", placeholder="Prompt", lines=2)


            run_btn = gr.Button("Generate", variant="primary")

            with gr.Accordion("Advanced Settings", open=False):
                seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=2)
                shift = gr.Slider(label="Timestep Shift", minimum=0.0, maximum=4.0, step=0.1, value=2.0)

        with gr.Column():
            result = gr.Image(label="Output Image", interactive=False)
                    
    inputs = [
        image,
        prompt,
        seed, 
        randomize_seed,
        num_inference_steps,
        shift,
        prev_output,
    ]
    outputs = [result, seed]

    
    run_event = run_btn.click(
        fn=run_pipe, 
        inputs=inputs, 
        outputs=outputs
    )

    run_event.then(lambda img, *_: img, inputs=[result], outputs=[prev_output])

demo.launch()