Elea Zhong commited on
Commit
454ba5e
·
1 Parent(s): 242b3f5

simplify code

Browse files
Files changed (9) hide show
  1. app.py +17 -132
  2. disaster_girl.jpg +0 -3
  3. grumpy.png +0 -3
  4. metropolis.jpg +0 -0
  5. monkey.jpg +0 -3
  6. prompt.py +30 -0
  7. requirements.txt +2 -2
  8. tool_of_the_sea.png +0 -3
  9. wednesday.png +0 -3
app.py CHANGED
@@ -1,26 +1,23 @@
1
- import gradio as gr
2
- import numpy as np
3
  import random
4
- import torch
5
- import spaces
6
 
 
 
7
  from PIL import Image
 
 
 
8
  from diffusers import FlowMatchEulerDiscreteScheduler
 
 
 
9
  from optimization import optimize_pipeline_
10
  from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
11
  from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
12
  from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
13
-
14
- import math
15
- from huggingface_hub import hf_hub_download
16
- from safetensors.torch import load_file
17
-
18
- from PIL import Image
19
- import os
20
- import gradio as gr
21
- from gradio_client import Client, handle_file
22
- import tempfile
23
-
24
 
25
  # --- Model Loading ---
26
  dtype = torch.bfloat16
@@ -47,7 +44,6 @@ pipe.fuse_lora(adapter_names=["angles"], lora_scale=1.25)
47
  pipe.unload_lora_weights()
48
 
49
 
50
-
51
  pipe.transformer.__class__ = QwenImageTransformer2DModel
52
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
53
 
@@ -56,48 +52,6 @@ optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB",
56
 
57
  MAX_SEED = np.iinfo(np.int32).max
58
 
59
- def _generate_video_segment(input_image_path: str, output_image_path: str, prompt: str, request: gr.Request) -> str:
60
- """Generates a single video segment using the external service."""
61
- x_ip_token = request.headers['x-ip-token']
62
- video_client = Client("multimodalart/wan-2-2-first-last-frame", headers={"x-ip-token": x_ip_token})
63
- result = video_client.predict(
64
- start_image_pil=handle_file(input_image_path),
65
- end_image_pil=handle_file(output_image_path),
66
- prompt=prompt, api_name="/generate_video",
67
- )
68
- return result[0]["video"]
69
-
70
- def build_camera_prompt(rotate_deg, move_forward, vertical_tilt, wideangle):
71
- prompt_parts = []
72
-
73
- # Rotation
74
- if rotate_deg != 0:
75
- direction = "left" if rotate_deg > 0 else "right"
76
- if direction == "left":
77
- prompt_parts.append(f"将镜头向左旋转{abs(rotate_deg)}度 Rotate the camera {abs(rotate_deg)} degrees to the left.")
78
- else:
79
- prompt_parts.append(f"将镜头向右旋转{abs(rotate_deg)}度 Rotate the camera {abs(rotate_deg)} degrees to the right.")
80
-
81
-
82
- # Move forward / close-up
83
- if move_forward > 5:
84
- prompt_parts.append("将镜头转为特写镜头 Turn the camera to a close-up.")
85
- elif move_forward >= 1:
86
- prompt_parts.append("将镜头向前移动 Move the camera forward.")
87
-
88
- # Vertical tilt
89
- if vertical_tilt <= -1:
90
- prompt_parts.append("将相机转向鸟瞰视角 Turn the camera to a bird's-eye view.")
91
- elif vertical_tilt >= 1:
92
- prompt_parts.append("将相机切换到仰视视角 Turn the camera to a worm's-eye view.")
93
-
94
- # Lens option
95
- if wideangle:
96
- prompt_parts.append(" 将镜头转为广角镜头 Turn the camera to a wide-angle lens.")
97
-
98
- final_prompt = " ".join(prompt_parts).strip()
99
- return final_prompt if final_prompt else "no camera movement"
100
-
101
 
102
  @spaces.GPU
103
  def infer_camera_edit(
@@ -150,32 +104,6 @@ def infer_camera_edit(
150
 
151
  return result, seed, prompt
152
 
153
- def create_video_between_images(input_image, output_image, prompt: str, request: gr.Request) -> str:
154
- """Create a video between the input and output images."""
155
- if input_image is None or output_image is None:
156
- raise gr.Error("Both input and output images are required to create a video.")
157
-
158
- try:
159
-
160
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
161
- input_image.save(tmp.name)
162
- input_image_path = tmp.name
163
-
164
- output_pil = Image.fromarray(output_image.astype('uint8'))
165
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
166
- output_pil.save(tmp.name)
167
- output_image_path = tmp.name
168
-
169
- video_path = _generate_video_segment(
170
- input_image_path,
171
- output_image_path,
172
- prompt if prompt else "Camera movement transformation",
173
- request
174
- )
175
- return video_path
176
- except Exception as e:
177
- raise gr.Error(f"Video generation failed: {e}")
178
-
179
 
180
  # --- UI ---
181
  css = '''#col-container { max-width: 800px; margin: 0 auto; }
@@ -245,9 +173,6 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
245
  with gr.Column():
246
  result = gr.Image(label="Output Image", interactive=False)
247
  prompt_preview = gr.Textbox(label="Processed Prompt", interactive=False)
248
- create_video_button = gr.Button("🎥 Create Video Between Images", variant="secondary", visible=False)
249
- with gr.Group(visible=False) as video_group:
250
- video_output = gr.Video(label="Generated Video", show_download_button=True, autoplay=True)
251
 
252
  inputs = [
253
  image,rotate_deg, move_forward,
@@ -263,48 +188,11 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
263
  outputs=[rotate_deg, move_forward, vertical_tilt, wideangle, is_reset],
264
  queue=False
265
  ).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False)
266
-
267
- # Manual generation with video button visibility control
268
- def infer_and_show_video_button(*args):
269
- result_img, result_seed, result_prompt = infer_camera_edit(*args)
270
- # Show video button if we have both input and output images
271
- show_button = args[0] is not None and result_img is not None
272
- return result_img, result_seed, result_prompt, gr.update(visible=show_button)
273
 
274
  run_event = run_btn.click(
275
- fn=infer_and_show_video_button,
276
  inputs=inputs,
277
- outputs=outputs + [create_video_button]
278
- )
279
-
280
- # Video creation
281
- create_video_button.click(
282
- fn=lambda: gr.update(visible=True),
283
- outputs=[video_group],
284
- api_name=False
285
- ).then(
286
- fn=create_video_between_images,
287
- inputs=[image, result, prompt_preview],
288
- outputs=[video_output],
289
- api_name=False
290
- )
291
-
292
- # Examples
293
- gr.Examples(
294
- examples=[
295
- ["tool_of_the_sea.png", 90, 0, 0, False, 0, True, 1.0, 4, 568, 1024],
296
- ["monkey.jpg", -90, 0, 0, False, 0, True, 1.0, 4, 704, 1024],
297
- ["metropolis.jpg", 0, 0, -1, False, 0, True, 1.0, 4, 816, 1024],
298
- ["disaster_girl.jpg", -45, 0, 1, False, 0, True, 1.0, 4, 768, 1024],
299
- ["grumpy.png", 90, 0, 1, False, 0, True, 1.0, 4, 576, 1024]
300
- ],
301
- inputs=[image,rotate_deg, move_forward,
302
- vertical_tilt, wideangle,
303
- seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width],
304
- outputs=outputs,
305
- fn=infer_camera_edit,
306
- cache_examples="lazy",
307
- elem_id="examples"
308
  )
309
 
310
  # Image upload triggers dimension update and control reset
@@ -330,10 +218,7 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
330
  if is_reset:
331
  return gr.update(), gr.update(), gr.update(), gr.update()
332
  else:
333
- result_img, result_seed, result_prompt = infer_camera_edit(*args)
334
- # Show video button if we have both input and output
335
- show_button = args[0] is not None and result_img is not None
336
- return result_img, result_seed, result_prompt, gr.update(visible=show_button)
337
 
338
  control_inputs = [
339
  image, rotate_deg, move_forward,
@@ -343,9 +228,9 @@ with gr.Blocks(theme=gr.themes.Citrus(), css=css) as demo:
343
  control_inputs_with_flag = [is_reset] + control_inputs
344
 
345
  for control in [rotate_deg, move_forward, vertical_tilt]:
346
- control.release(fn=maybe_infer, inputs=control_inputs_with_flag, outputs=outputs + [create_video_button])
347
 
348
- wideangle.input(fn=maybe_infer, inputs=control_inputs_with_flag, outputs=outputs + [create_video_button])
349
 
350
  run_event.then(lambda img, *_: img, inputs=[result], outputs=[prev_output])
351
 
 
1
+ import math
 
2
  import random
3
+ import os
4
+ import tempfile
5
 
6
+ import numpy as np
7
+ import torch
8
  from PIL import Image
9
+ import gradio as gr
10
+ from gradio_client import Client, handle_file
11
+ import spaces
12
  from diffusers import FlowMatchEulerDiscreteScheduler
13
+ from huggingface_hub import hf_hub_download
14
+ from safetensors.torch import load_file
15
+
16
  from optimization import optimize_pipeline_
17
  from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
18
  from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
19
  from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
20
+ from prompt import build_camera_prompt
 
 
 
 
 
 
 
 
 
 
21
 
22
  # --- Model Loading ---
23
  dtype = torch.bfloat16
 
44
  pipe.unload_lora_weights()
45
 
46
 
 
47
  pipe.transformer.__class__ = QwenImageTransformer2DModel
48
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
49
 
 
52
 
53
  MAX_SEED = np.iinfo(np.int32).max
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  @spaces.GPU
57
  def infer_camera_edit(
 
104
 
105
  return result, seed, prompt
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  # --- UI ---
109
  css = '''#col-container { max-width: 800px; margin: 0 auto; }
 
173
  with gr.Column():
174
  result = gr.Image(label="Output Image", interactive=False)
175
  prompt_preview = gr.Textbox(label="Processed Prompt", interactive=False)
 
 
 
176
 
177
  inputs = [
178
  image,rotate_deg, move_forward,
 
188
  outputs=[rotate_deg, move_forward, vertical_tilt, wideangle, is_reset],
189
  queue=False
190
  ).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False)
 
 
 
 
 
 
 
191
 
192
  run_event = run_btn.click(
193
+ fn=infer_camera_edit,
194
  inputs=inputs,
195
+ outputs=outputs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  )
197
 
198
  # Image upload triggers dimension update and control reset
 
218
  if is_reset:
219
  return gr.update(), gr.update(), gr.update(), gr.update()
220
  else:
221
+ return infer_camera_edit(*args)
 
 
 
222
 
223
  control_inputs = [
224
  image, rotate_deg, move_forward,
 
228
  control_inputs_with_flag = [is_reset] + control_inputs
229
 
230
  for control in [rotate_deg, move_forward, vertical_tilt]:
231
+ control.release(fn=maybe_infer, inputs=control_inputs_with_flag, outputs=outputs)
232
 
233
+ wideangle.input(fn=maybe_infer, inputs=control_inputs_with_flag, outputs=outputs)
234
 
235
  run_event.then(lambda img, *_: img, inputs=[result], outputs=[prev_output])
236
 
disaster_girl.jpg DELETED

Git LFS Details

  • SHA256: 21aa297dafc67ac89bff93255a026eba67b63023e921a8bf918e7b0e81c09eae
  • Pointer size: 131 Bytes
  • Size of remote file: 372 kB
grumpy.png DELETED

Git LFS Details

  • SHA256: 1344c764ac72d26bee7f8e76020ba81ba05df251a0122beea57a65ce85f6d05f
  • Pointer size: 131 Bytes
  • Size of remote file: 715 kB
metropolis.jpg DELETED
Binary file (56.9 kB)
 
monkey.jpg DELETED

Git LFS Details

  • SHA256: e49ad4d8e649dc6e8f38356dc7b3ea1de5a3c112b58a61ed321f6c107810a93d
  • Pointer size: 132 Bytes
  • Size of remote file: 3.33 MB
prompt.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def build_camera_prompt(rotate_deg, move_forward, vertical_tilt, wideangle):
2
+ prompt_parts = []
3
+
4
+ # Rotation
5
+ if rotate_deg != 0:
6
+ direction = "left" if rotate_deg > 0 else "right"
7
+ if direction == "left":
8
+ prompt_parts.append(f"将镜头向左旋转{abs(rotate_deg)}度 Rotate the camera {abs(rotate_deg)} degrees to the left.")
9
+ else:
10
+ prompt_parts.append(f"将镜头向右旋转{abs(rotate_deg)}度 Rotate the camera {abs(rotate_deg)} degrees to the right.")
11
+
12
+
13
+ # Move forward / close-up
14
+ if move_forward > 5:
15
+ prompt_parts.append("将镜头转为特写镜头 Turn the camera to a close-up.")
16
+ elif move_forward >= 1:
17
+ prompt_parts.append("将镜头向前移动 Move the camera forward.")
18
+
19
+ # Vertical tilt
20
+ if vertical_tilt <= -1:
21
+ prompt_parts.append("将相机转向鸟瞰视角 Turn the camera to a bird's-eye view.")
22
+ elif vertical_tilt >= 1:
23
+ prompt_parts.append("将相机切换到仰视视角 Turn the camera to a worm's-eye view.")
24
+
25
+ # Lens option
26
+ if wideangle:
27
+ prompt_parts.append(" 将镜头转为广角镜头 Turn the camera to a wide-angle lens.")
28
+
29
+ final_prompt = " ".join(prompt_parts).strip()
30
+ return final_prompt if final_prompt else "no camera movement"
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  git+https://github.com/huggingface/diffusers.git
2
 
3
-
4
-
5
  transformers
6
  accelerate
7
  safetensors
 
1
  git+https://github.com/huggingface/diffusers.git
2
 
3
+ huggingface-hub
4
+ spaces
5
  transformers
6
  accelerate
7
  safetensors
tool_of_the_sea.png DELETED

Git LFS Details

  • SHA256: 0fe3667f2e073b314158f58ad66c58dd9f31cea12496589d4acf576f923adf45
  • Pointer size: 131 Bytes
  • Size of remote file: 948 kB
wednesday.png DELETED

Git LFS Details

  • SHA256: 27193cae97c24a31ab574a21fc3c598627c28ab60edb0c60209acdb8071cf1ea
  • Pointer size: 132 Bytes
  • Size of remote file: 1.36 MB