vibethinker

Running on Zero

App Files Files Community

akhaliq HF Staff commited on 30 days ago

Commit

be1c6d2

verified ·

1 Parent(s): 5e1305b

Update Gradio app with multiple files

Browse files

Files changed (2) hide show

app.py +27 -45
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,54 +1,34 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 import torch
-class VibeThinker:
-    def __init__(self, model_path):
-        self.model_path = model_path
-        self.model = AutoModelForCausalLM.from_pretrained(
-            self.model_path,
-            low_cpu_mem_usage=True,
-            torch_dtype=torch.bfloat16,
-            device_map="auto"
-        )
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)
-    def infer_text(self, messages):
-        text = self.tokenizer.apply_chat_template(
-            messages,
-            tokenize=False,
-            add_generation_prompt=True
-        )
-        model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
-        generation_config = dict(
-            max_new_tokens=4096,
-            do_sample=True,
-            temperature=0.6,
-            top_p=0.95,
-            top_k=-1
-        )
-        generated_ids = self.model.generate(
-            **model_inputs,
-            generation_config=GenerationConfig(**generation_config)
-        )
-        generated_ids = [
-            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
-        ]
-        response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        return response
-# Initialize the model
 print("Loading VibeThinker model...")
-vibe_model = VibeThinker('WeiboAI/VibeThinker-1.5B')
 print("Model loaded successfully!")
 def respond(message, history):
     """
     Generate response for the chatbot.
@@ -67,7 +47,7 @@ def respond(message, history):
     messages.append({"role": "user", "content": message})
     # Generate response
-    response = vibe_model.infer_text(messages)
     return response
@@ -93,7 +73,7 @@ with gr.Blocks(
         fn=respond,
         type="messages",
         title="",
-        description="Ask me anything! I'm powered by VibeThinker.",
         examples=[
             "What is the meaning of life?",
             "Explain quantum computing in simple terms",
@@ -111,6 +91,8 @@ with gr.Blocks(
         ### About VibeThinker
         VibeThinker is a 1.5B parameter conversational AI model designed for engaging and thoughtful conversations.
         The model uses temperature sampling (0.6) for balanced creativity and coherence.
         """
     )

 import gradio as gr
+from transformers import pipeline
 import torch
+import spaces
+# Initialize the pipeline
 print("Loading VibeThinker model...")
+pipe = pipeline(
+    "text-generation",
+    model="WeiboAI/VibeThinker-1.5B",
+    torch_dtype=torch.bfloat16,
+    device_map="auto"
+)
 print("Model loaded successfully!")
+@spaces.GPU
+def generate_response(messages):
+    """Generate response using the pipeline."""
+    response = pipe(
+        messages,
+        max_new_tokens=4096,
+        do_sample=True,
+        temperature=0.6,
+        top_p=0.95,
+        top_k=-1
+    )
+    return response[0]["generated_text"][-1]["content"]
 def respond(message, history):
     """
     Generate response for the chatbot.
     messages.append({"role": "user", "content": message})
     # Generate response
+    response = generate_response(messages)
     return response
         fn=respond,
         type="messages",
         title="",
+        description="Ask me anything! I'm powered by VibeThinker with ZeroGPU acceleration.",
         examples=[
             "What is the meaning of life?",
             "Explain quantum computing in simple terms",
         ### About VibeThinker
         VibeThinker is a 1.5B parameter conversational AI model designed for engaging and thoughtful conversations.
         The model uses temperature sampling (0.6) for balanced creativity and coherence.
+        **Powered by ZeroGPU** for efficient GPU resource allocation.
         """
     )

requirements.txt CHANGED Viewed

@@ -5,3 +5,4 @@ accelerate
 bitsandbytes
 sentencepiece
 protobuf

 bitsandbytes
 sentencepiece
 protobuf
+spaces