akhaliq HF Staff commited on
Commit
be1c6d2
·
verified ·
1 Parent(s): 5e1305b

Update Gradio app with multiple files

Browse files
Files changed (2) hide show
  1. app.py +27 -45
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,54 +1,34 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
3
  import torch
 
4
 
5
 
6
- class VibeThinker:
7
- def __init__(self, model_path):
8
- self.model_path = model_path
9
- self.model = AutoModelForCausalLM.from_pretrained(
10
- self.model_path,
11
- low_cpu_mem_usage=True,
12
- torch_dtype=torch.bfloat16,
13
- device_map="auto"
14
- )
15
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)
16
-
17
- def infer_text(self, messages):
18
- text = self.tokenizer.apply_chat_template(
19
- messages,
20
- tokenize=False,
21
- add_generation_prompt=True
22
- )
23
- model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
24
-
25
- generation_config = dict(
26
- max_new_tokens=4096,
27
- do_sample=True,
28
- temperature=0.6,
29
- top_p=0.95,
30
- top_k=-1
31
- )
32
-
33
- generated_ids = self.model.generate(
34
- **model_inputs,
35
- generation_config=GenerationConfig(**generation_config)
36
- )
37
-
38
- generated_ids = [
39
- output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
40
- ]
41
-
42
- response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
43
- return response
44
-
45
-
46
- # Initialize the model
47
  print("Loading VibeThinker model...")
48
- vibe_model = VibeThinker('WeiboAI/VibeThinker-1.5B')
 
 
 
 
 
49
  print("Model loaded successfully!")
50
 
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  def respond(message, history):
53
  """
54
  Generate response for the chatbot.
@@ -67,7 +47,7 @@ def respond(message, history):
67
  messages.append({"role": "user", "content": message})
68
 
69
  # Generate response
70
- response = vibe_model.infer_text(messages)
71
 
72
  return response
73
 
@@ -93,7 +73,7 @@ with gr.Blocks(
93
  fn=respond,
94
  type="messages",
95
  title="",
96
- description="Ask me anything! I'm powered by VibeThinker.",
97
  examples=[
98
  "What is the meaning of life?",
99
  "Explain quantum computing in simple terms",
@@ -111,6 +91,8 @@ with gr.Blocks(
111
  ### About VibeThinker
112
  VibeThinker is a 1.5B parameter conversational AI model designed for engaging and thoughtful conversations.
113
  The model uses temperature sampling (0.6) for balanced creativity and coherence.
 
 
114
  """
115
  )
116
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
  import torch
4
+ import spaces
5
 
6
 
7
+ # Initialize the pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  print("Loading VibeThinker model...")
9
+ pipe = pipeline(
10
+ "text-generation",
11
+ model="WeiboAI/VibeThinker-1.5B",
12
+ torch_dtype=torch.bfloat16,
13
+ device_map="auto"
14
+ )
15
  print("Model loaded successfully!")
16
 
17
 
18
+ @spaces.GPU
19
+ def generate_response(messages):
20
+ """Generate response using the pipeline."""
21
+ response = pipe(
22
+ messages,
23
+ max_new_tokens=4096,
24
+ do_sample=True,
25
+ temperature=0.6,
26
+ top_p=0.95,
27
+ top_k=-1
28
+ )
29
+ return response[0]["generated_text"][-1]["content"]
30
+
31
+
32
  def respond(message, history):
33
  """
34
  Generate response for the chatbot.
 
47
  messages.append({"role": "user", "content": message})
48
 
49
  # Generate response
50
+ response = generate_response(messages)
51
 
52
  return response
53
 
 
73
  fn=respond,
74
  type="messages",
75
  title="",
76
+ description="Ask me anything! I'm powered by VibeThinker with ZeroGPU acceleration.",
77
  examples=[
78
  "What is the meaning of life?",
79
  "Explain quantum computing in simple terms",
 
91
  ### About VibeThinker
92
  VibeThinker is a 1.5B parameter conversational AI model designed for engaging and thoughtful conversations.
93
  The model uses temperature sampling (0.6) for balanced creativity and coherence.
94
+
95
+ **Powered by ZeroGPU** for efficient GPU resource allocation.
96
  """
97
  )
98
 
requirements.txt CHANGED
@@ -5,3 +5,4 @@ accelerate
5
  bitsandbytes
6
  sentencepiece
7
  protobuf
 
 
5
  bitsandbytes
6
  sentencepiece
7
  protobuf
8
+ spaces