Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import numpy as np
|
| 3 |
+
import random
|
| 4 |
+
|
| 5 |
+
import spaces #[uncomment to use ZeroGPU]
|
| 6 |
+
import torch
|
| 7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 8 |
+
from transformers.generation import GenerationConfig
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
device = "cuda"
|
| 12 |
+
torch_dtype = torch.bfloat16
|
| 13 |
+
model_name_or_path = "X-Omni/X-Omni-Zh"
|
| 14 |
+
flux_model_name_or_path = "zhangxiaosong18/FLUX.1-dev-VAE"
|
| 15 |
+
|
| 16 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
| 17 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 18 |
+
model_name_or_path,
|
| 19 |
+
torch_dtype=torch_dtype,
|
| 20 |
+
trust_remote_code=True,
|
| 21 |
+
).cuda()
|
| 22 |
+
model.init_vision(flux_model_name_or_path)
|
| 23 |
+
model.set_generation_mode('image')
|
| 24 |
+
model.eval()
|
| 25 |
+
|
| 26 |
+
@spaces.GPU(duration=220) #[uncomment to use ZeroGPU]
|
| 27 |
+
def generate_image(
|
| 28 |
+
image_prompt,
|
| 29 |
+
image_size,
|
| 30 |
+
top_p,
|
| 31 |
+
min_p,
|
| 32 |
+
seed,
|
| 33 |
+
):
|
| 34 |
+
image_prompt = image_prompt.strip()
|
| 35 |
+
image_size = tuple(map(int, image_size.split('x')))
|
| 36 |
+
token_h, token_w = image_size[0] // 16, image_size[1] // 16
|
| 37 |
+
image_prefix = f'<SOM>{token_h} {token_w}<IMAGE>'
|
| 38 |
+
generation_config = GenerationConfig(
|
| 39 |
+
max_new_tokens=token_h * token_w,
|
| 40 |
+
do_sample=True,
|
| 41 |
+
temperature=1.0,
|
| 42 |
+
min_p=min_p,
|
| 43 |
+
top_p=top_p,
|
| 44 |
+
guidance_scale=1.0,
|
| 45 |
+
suppress_tokens=tokenizer.convert_tokens_to_ids(model.config.mm_special_tokens),
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
tokens = tokenizer(
|
| 49 |
+
[image_prompt + image_prefix],
|
| 50 |
+
return_tensors='pt',
|
| 51 |
+
padding='longest',
|
| 52 |
+
padding_side='left',
|
| 53 |
+
)
|
| 54 |
+
input_ids = tokens.input_ids.cuda()
|
| 55 |
+
attention_mask = tokens.attention_mask.cuda()
|
| 56 |
+
|
| 57 |
+
torch.manual_seed(seed)
|
| 58 |
+
tokens = model.generate(
|
| 59 |
+
inputs=input_ids,
|
| 60 |
+
attention_mask=attention_mask,
|
| 61 |
+
generation_config=generation_config,
|
| 62 |
+
)
|
| 63 |
+
torch.manual_seed(seed)
|
| 64 |
+
_, images = model.mmdecode(tokenizer, tokens[0], skip_special_tokens=False)
|
| 65 |
+
|
| 66 |
+
return images[0]
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
examples = [
|
| 70 |
+
'''
|
| 71 |
+
生成一张雪中的紫禁城全景封面图,作为北京冬季旅游指南的主题。画面以近景构图展现建筑,红墙金瓦被皑皑白雪覆盖,朱红色宫墙,金黄色瓦片与洁白雪色形成强烈对比,琉璃瓦顶的积雪在阳光下折射出晶莹光泽。前景一枝腊梅花正在盛开,背景为灰蓝色冬日天空,飘落细雪,远处角楼轮廓若隐若现,增添朦胧诗意感。图片上有标题“雪落北平·穿越600年”,另有副标题“北京古建筑雪景深度游”。文字艺术感极强,与图片良好融合起来
|
| 72 |
+
'''.strip(),
|
| 73 |
+
'''
|
| 74 |
+
画面的中心摆放着一个复古花瓶,瓶身主体为浓郁的蓝色,这种蓝色深邃而典雅,仿佛带着岁月的沉淀。花瓶设计极具复古风格,瓶颈处环绕着细致的金色雕花,宛如华丽的项链点缀其上;瓶身绘制着精美的花卉图案,笔触细腻,色彩过渡自然,展现出极高的工艺水准,整体彰显出优雅的古典韵味。花瓶放置在深色木质的圆桌上,旁边搭配了一束新鲜绽放的百合花,为画面增添了几分生机与活力。背景是一幅淡蓝色的壁纸,上面有着若隐若现的花纹,营造出一种静谧而温馨的氛围。图片中的文字信息十分醒目。“家居美学盛典”位于顶部中央,字体较大,在视觉上十分突出,吸引观众的目光;左下角写着“下单直降 100”,下方紧跟数字“399”,强调了价格优惠;右下角有“限量抢购 速来咨询”的提示,引导观众进一步咨询;最底部中央,“前 50 名买一送一”的字样突出促销活动的紧迫性和吸引力。这些文字信息通过巧妙的颜色、大小和背景设计,在空间布局上层次分明,重点突出,有效地引导观众关注促销信息和价格优势。
|
| 75 |
+
'''.strip(),
|
| 76 |
+
]
|
| 77 |
+
examples = [[prompt, '1152x1152', 1.0, 0.03, 0] for prompt in examples]
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
css = """
|
| 81 |
+
.app {
|
| 82 |
+
max-width: 800px !important;
|
| 83 |
+
margin: 0 auto !important;
|
| 84 |
+
}
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
with gr.Blocks(css=css) as demo:
|
| 88 |
+
gr.HTML('''
|
| 89 |
+
<h1 style="text-align:center">🎨X-Omni: Reinforcement Learning Makes Discrete Autoregressive Image Generative Models Great Again</h1>
|
| 90 |
+
<h3 style="text-align:center">Model: <a href="https://huggingface.co/X-Omni/X-Omni-Zh">X-Omni-Zh</a> (support Chinese text rendering)</h3>
|
| 91 |
+
<p align="center">
|
| 92 |
+
<a href="https://x-omni-team.github.io">🏠 Project Page</a> |
|
| 93 |
+
<a href="https://x-omni-team.github.io">📄 Paper</a> |
|
| 94 |
+
<a href="https://github.com/X-Omni-Team/X-Omni">💻 Code</a> |
|
| 95 |
+
<a href="https://huggingface.co/collections/X-Omni/x-omni-models-6888aadcc54baad7997d7982">🤗 HuggingFace Model</a>
|
| 96 |
+
</p>
|
| 97 |
+
'''.strip())
|
| 98 |
+
with gr.Row():
|
| 99 |
+
textbox = gr.Textbox(lines=2, placeholder='text prompt for image generation', show_label=False)
|
| 100 |
+
image = gr.Image(show_label=False, type='pil')
|
| 101 |
+
with gr.Row():
|
| 102 |
+
button = gr.Button("Generate", variant="primary")
|
| 103 |
+
with gr.Accordion("Advanced Settings", open=False):
|
| 104 |
+
image_size = gr.Dropdown(label="Image Size", choices=["1152x1152", "1152x768", "768x1152"], value="1152x1152")
|
| 105 |
+
top_p = gr.Slider(label="Top P", minimum=0.0, maximum=1.0, value=1.0, step=0.01)
|
| 106 |
+
min_p = gr.Slider(label="Min P", minimum=0.0, maximum=1.0, value=0.03, step=0.01)
|
| 107 |
+
seed_input = gr.Number(label="Seed", value=0, precision=0)
|
| 108 |
+
with gr.Row():
|
| 109 |
+
gr.Examples(examples=examples, inputs=(textbox, image_size, top_p, min_p, seed_input), outputs=image, fn=generate_image, cache_examples=False, run_on_click=True)
|
| 110 |
+
button.click(
|
| 111 |
+
generate_image,
|
| 112 |
+
inputs=(textbox, image_size, top_p, min_p, seed_input),
|
| 113 |
+
outputs=image,
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
if __name__ == "__main__":
|
| 117 |
+
demo.launch(ssr_mode=False)
|