vibethinker / app.py
akhaliq's picture
akhaliq HF Staff
Update Gradio app with multiple files
f96aa87 verified
import gradio as gr
from transformers import pipeline
import torch
import spaces
# Initialize the pipeline
print("Loading VibeThinker model...")
pipe = pipeline(
"text-generation",
model="WeiboAI/VibeThinker-1.5B",
torch_dtype=torch.bfloat16,
device_map="auto"
)
print("Model loaded successfully!")
@spaces.GPU
def respond(message, history):
"""
Generate streaming response for the chatbot.
Args:
message: The user's current message
history: List of previous conversation messages in [user, assistant] format
"""
# Convert history to messages format
messages = []
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Generate response with streaming
full_response = ""
for output in pipe(
messages,
max_new_tokens=4096,
do_sample=True,
temperature=0.6,
top_p=0.95,
return_full_text=False,
streamer=None
):
# Get the generated text
generated_text = output[0]["generated_text"]
# Extract only the assistant's response
if isinstance(generated_text, list):
assistant_response = generated_text[-1]["content"]
else:
assistant_response = generated_text
# Stream character by character
for char in assistant_response[len(full_response):]:
full_response += char
yield full_response
# Create the Gradio interface
with gr.Blocks(
theme=gr.themes.Soft(),
css="""
.header-link { text-decoration: none; color: inherit; }
.header-link:hover { text-decoration: underline; }
"""
) as demo:
gr.Markdown(
"""
# 💭 VibeThinker Chatbot
Chat with [WeiboAI/VibeThinker-1.5B](https://huggingface.co/WeiboAI/VibeThinker-1.5B) - a powerful conversational AI model.
<a href="https://huggingface.co/spaces/akhaliq/anycoder" class="header-link">Built with anycoder</a>
"""
)
chatbot = gr.ChatInterface(
fn=respond,
type="messages",
title="",
description="Ask me anything! I'm powered by VibeThinker with ZeroGPU acceleration.",
examples=[
"What is the meaning of life?",
"Explain quantum computing in simple terms",
"Write a short poem about artificial intelligence",
"How can I improve my productivity?",
],
cache_examples=False,
)
gr.Markdown(
"""
### About VibeThinker
VibeThinker is a 1.5B parameter conversational AI model designed for engaging and thoughtful conversations.
The model uses temperature sampling (0.6) for balanced creativity and coherence.
**Powered by ZeroGPU** for efficient GPU resource allocation.
"""
)
if __name__ == "__main__":
demo.launch()