Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import threading
|
|
| 5 |
import queue
|
| 6 |
import time
|
| 7 |
import spaces
|
|
|
|
| 8 |
# Model configuration
|
| 9 |
model_name = "HelpingAI/Dhanishtha-2.0-preview"
|
| 10 |
|
|
@@ -31,10 +32,12 @@ def load_model():
|
|
| 31 |
|
| 32 |
class GradioTextStreamer(TextStreamer):
|
| 33 |
"""Custom TextStreamer for Gradio integration"""
|
| 34 |
-
def __init__(self, tokenizer, skip_prompt=True
|
| 35 |
-
|
|
|
|
| 36 |
self.text_queue = queue.Queue()
|
| 37 |
self.generated_text = ""
|
|
|
|
| 38 |
|
| 39 |
def on_finalized_text(self, text: str, stream_end: bool = False):
|
| 40 |
"""Called when text is finalized"""
|
|
@@ -56,6 +59,7 @@ class GradioTextStreamer(TextStreamer):
|
|
| 56 |
self.text_queue.get_nowait()
|
| 57 |
except queue.Empty:
|
| 58 |
break
|
|
|
|
| 59 |
@spaces.GPU()
|
| 60 |
def generate_response(message, history, max_tokens, temperature, top_p):
|
| 61 |
"""Generate streaming response"""
|
|
@@ -86,7 +90,7 @@ def generate_response(message, history, max_tokens, temperature, top_p):
|
|
| 86 |
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
| 87 |
|
| 88 |
# Create and setup streamer
|
| 89 |
-
streamer = GradioTextStreamer(tokenizer, skip_prompt=True
|
| 90 |
streamer.reset()
|
| 91 |
|
| 92 |
# Start generation in a separate thread
|
|
|
|
| 5 |
import queue
|
| 6 |
import time
|
| 7 |
import spaces
|
| 8 |
+
|
| 9 |
# Model configuration
|
| 10 |
model_name = "HelpingAI/Dhanishtha-2.0-preview"
|
| 11 |
|
|
|
|
| 32 |
|
| 33 |
class GradioTextStreamer(TextStreamer):
|
| 34 |
"""Custom TextStreamer for Gradio integration"""
|
| 35 |
+
def __init__(self, tokenizer, skip_prompt=True):
|
| 36 |
+
# TextStreamer only accepts tokenizer and skip_prompt parameters
|
| 37 |
+
super().__init__(tokenizer, skip_prompt)
|
| 38 |
self.text_queue = queue.Queue()
|
| 39 |
self.generated_text = ""
|
| 40 |
+
self.skip_special_tokens = True # Handle this manually if needed
|
| 41 |
|
| 42 |
def on_finalized_text(self, text: str, stream_end: bool = False):
|
| 43 |
"""Called when text is finalized"""
|
|
|
|
| 59 |
self.text_queue.get_nowait()
|
| 60 |
except queue.Empty:
|
| 61 |
break
|
| 62 |
+
|
| 63 |
@spaces.GPU()
|
| 64 |
def generate_response(message, history, max_tokens, temperature, top_p):
|
| 65 |
"""Generate streaming response"""
|
|
|
|
| 90 |
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
| 91 |
|
| 92 |
# Create and setup streamer
|
| 93 |
+
streamer = GradioTextStreamer(tokenizer, skip_prompt=True)
|
| 94 |
streamer.reset()
|
| 95 |
|
| 96 |
# Start generation in a separate thread
|