Spaces:
Lap-AI
/
Runtime error

File size: 1,644 Bytes
6e0397b
 
3003014
6e0397b
 
 
 
c986dfe
6e0397b
 
 
3003014
 
 
3003e89
1e49580
3003014
6e0397b
3003014
 
6e0397b
c950d42
6e0397b
731d6b6
 
 
 
 
 
 
 
 
 
6e0397b
 
731d6b6
6e0397b
 
 
 
0d7c9ed
6e0397b
 
 
 
 
 
 
 
3003014
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline, TextStreamer
import torch

class ModelInput(BaseModel):
    prompt: str
    max_new_tokens: int = 4096

app = FastAPI()

# Initialize text generation pipeline
generator = pipeline(
    "text-generation",
    model="HuggingFaceTB/SmolLM2-360M-Instruct",
    device="cpu"  # Use CPU (change to device=0 for GPU)
)

# Create text streamer
streamer = TextStreamer(generator.tokenizer, skip_prompt=True)

def generate_response(prompt: str, max_new_tokens: int = 4096):
    try:
        # Pass the prompt as a simple string, not a chat message list
        output = generator(prompt, max_new_tokens=max_new_tokens, do_sample=False, streamer=streamer)
        
        # The output format is different now. We need to extract the response.
        full_text = output[0]["generated_text"]
        
        # Remove the original prompt from the start of the response
        if full_text.startswith(prompt):
             return full_text[len(prompt):].strip()
        return full_text
    except Exception as e:
        raise ValueError(f"Error generating response: {e}")
        
@app.post("/generate")
async def generate_text(input: ModelInput):
    try:
        response = generate_response(
            prompt=(input.prompt,"You are a helpful assistant.")
            max_new_tokens=input.max_new_tokens
        )
        return {"generated_text": response}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/")
async def root():
    return {"message": "Welcome to the Streaming Model API!"}