Update app.py
Browse files
app.py
CHANGED
|
@@ -18,13 +18,13 @@ def generate(input_ids,
|
|
| 18 |
cfg_weight: float = 5,
|
| 19 |
image_token_num_per_image: int = 576,
|
| 20 |
patch_size: int = 16):
|
| 21 |
-
tokens = torch.zeros((parallel_size * 2, len(input_ids)), dtype=torch.int)
|
| 22 |
for i in range(parallel_size * 2):
|
| 23 |
tokens[i, :] = input_ids
|
| 24 |
if i % 2 != 0:
|
| 25 |
tokens[i, 1:-1] = processor.pad_id
|
| 26 |
inputs_embeds = model.language_model.get_input_embeddings()(tokens)
|
| 27 |
-
generated_tokens = torch.zeros((parallel_size, image_token_num_per_image), dtype=torch.int)
|
| 28 |
|
| 29 |
pkv = None
|
| 30 |
for i in range(image_token_num_per_image):
|
|
@@ -89,8 +89,8 @@ with gr.Blocks() as demo:
|
|
| 89 |
with gr.Row():
|
| 90 |
with gr.Column():
|
| 91 |
prompt = gr.Textbox(label='Prompt', value='portrait, color, cinematic')
|
| 92 |
-
width = gr.Slider(
|
| 93 |
-
height = gr.Slider(
|
| 94 |
guidance = gr.Slider(1.0, 10.0, 5, step=0.1, label='Guidance')
|
| 95 |
seed = gr.Number(-1, precision=0, label='Seed (-1 for random)')
|
| 96 |
|
|
@@ -113,6 +113,7 @@ with gr.Blocks() as demo:
|
|
| 113 |
)
|
| 114 |
|
| 115 |
if __name__ == '__main__':
|
|
|
|
| 116 |
model_path = 'deepseek-ai/Janus-1.3B'
|
| 117 |
processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
|
| 118 |
tokenizer = processor.tokenizer
|
|
@@ -120,6 +121,8 @@ if __name__ == '__main__':
|
|
| 120 |
config = AutoConfig.from_pretrained(model_path)
|
| 121 |
config.language_config._attn_implementation = 'eager'
|
| 122 |
model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
| 125 |
demo.launch()
|
|
|
|
| 18 |
cfg_weight: float = 5,
|
| 19 |
image_token_num_per_image: int = 576,
|
| 20 |
patch_size: int = 16):
|
| 21 |
+
tokens = torch.zeros((parallel_size * 2, len(input_ids)), dtype=torch.int).to(cuda_device)
|
| 22 |
for i in range(parallel_size * 2):
|
| 23 |
tokens[i, :] = input_ids
|
| 24 |
if i % 2 != 0:
|
| 25 |
tokens[i, 1:-1] = processor.pad_id
|
| 26 |
inputs_embeds = model.language_model.get_input_embeddings()(tokens)
|
| 27 |
+
generated_tokens = torch.zeros((parallel_size, image_token_num_per_image), dtype=torch.int).to(cuda_device)
|
| 28 |
|
| 29 |
pkv = None
|
| 30 |
for i in range(image_token_num_per_image):
|
|
|
|
| 89 |
with gr.Row():
|
| 90 |
with gr.Column():
|
| 91 |
prompt = gr.Textbox(label='Prompt', value='portrait, color, cinematic')
|
| 92 |
+
width = gr.Slider(128, 1536, 128, step=16, label='Width')
|
| 93 |
+
height = gr.Slider(128, 1536, 128, step=16, label='Height')
|
| 94 |
guidance = gr.Slider(1.0, 10.0, 5, step=0.1, label='Guidance')
|
| 95 |
seed = gr.Number(-1, precision=0, label='Seed (-1 for random)')
|
| 96 |
|
|
|
|
| 113 |
)
|
| 114 |
|
| 115 |
if __name__ == '__main__':
|
| 116 |
+
cuda_device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 117 |
model_path = 'deepseek-ai/Janus-1.3B'
|
| 118 |
processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
|
| 119 |
tokenizer = processor.tokenizer
|
|
|
|
| 121 |
config = AutoConfig.from_pretrained(model_path)
|
| 122 |
config.language_config._attn_implementation = 'eager'
|
| 123 |
model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
|
| 124 |
+
if torch.cuda.is_available():
|
| 125 |
+
model = model.to(torch.bfloat16).cuda()
|
| 126 |
+
else:
|
| 127 |
+
model = model.to(torch.float16)
|
| 128 |
demo.launch()
|