Diamanta commited on
Commit
e1b187e
·
verified ·
1 Parent(s): 9dcecfa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -4
app.py CHANGED
@@ -23,17 +23,33 @@ class ChatRequest(BaseModel):
23
  temperature: Optional[float] = 0.7
24
  max_tokens: Optional[int] = 256
25
 
 
 
 
 
 
 
 
26
  class ModelInfo(BaseModel):
27
  id: str
28
  name: str
29
  description: str
 
 
 
30
 
31
- # Load your models info here or dynamically from disk/config
32
  AVAILABLE_MODELS = [
33
- ModelInfo(id="llama2", name="Llama 2", description="Meta Llama 2 model"),
34
- # Add more models if you want
 
 
 
 
 
 
35
  ]
36
 
 
37
  @app.on_event("startup")
38
  def load_model():
39
  global llm
@@ -77,6 +93,10 @@ async def list_models():
77
  # Return available models info
78
  return [model.dict() for model in AVAILABLE_MODELS]
79
 
 
 
 
 
80
  @app.get("/models/{model_id}")
81
  async def get_model(model_id: str):
82
  for model in AVAILABLE_MODELS:
@@ -118,4 +138,33 @@ async def chat(req: ChatRequest):
118
  }
119
  ]
120
  }
121
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  temperature: Optional[float] = 0.7
24
  max_tokens: Optional[int] = 256
25
 
26
+ class GenerateRequest(BaseModel):
27
+ model: str
28
+ prompt: str
29
+ max_tokens: Optional[int] = 256
30
+ temperature: Optional[float] = 0.7
31
+
32
+
33
  class ModelInfo(BaseModel):
34
  id: str
35
  name: str
36
  description: str
37
+ format: str
38
+ path: str
39
+ size: int
40
 
 
41
  AVAILABLE_MODELS = [
42
+ ModelInfo(
43
+ id="codellama-7b-instruct",
44
+ name="CodeLlama 7b Instruct",
45
+ description="CodeLlama 7b instruct model",
46
+ format="gguf",
47
+ path="/models/codellama-7b-instruct.gguf",
48
+ size=8000000000
49
+ )
50
  ]
51
 
52
+
53
  @app.on_event("startup")
54
  def load_model():
55
  global llm
 
93
  # Return available models info
94
  return [model.dict() for model in AVAILABLE_MODELS]
95
 
96
+ @app.get("/api/models")
97
+ async def api_models():
98
+ return {"models": [model.dict() for model in AVAILABLE_MODELS]}
99
+
100
  @app.get("/models/{model_id}")
101
  async def get_model(model_id: str):
102
  for model in AVAILABLE_MODELS:
 
138
  }
139
  ]
140
  }
141
+ return response
142
+
143
+ @app.post("/api/generate")
144
+ async def api_generate(req: GenerateRequest):
145
+ global llm
146
+ if llm is None:
147
+ raise HTTPException(status_code=503, detail="Model not initialized")
148
+
149
+ if req.model not in [m.id for m in AVAILABLE_MODELS]:
150
+ raise HTTPException(status_code=400, detail="Unsupported model")
151
+
152
+ output = llm(
153
+ req.prompt,
154
+ max_tokens=req.max_tokens,
155
+ temperature=req.temperature,
156
+ stop=["\n\n"] # Or any stop sequence you want
157
+ )
158
+ text = output.get("choices", [{}])[0].get("text", "").strip()
159
+
160
+ return {
161
+ "id": str(uuid.uuid4()),
162
+ "model": req.model,
163
+ "choices": [
164
+ {
165
+ "text": text,
166
+ "index": 0,
167
+ "finish_reason": "stop"
168
+ }
169
+ ]
170
+ }