Spaces:

Diamanta
/

JBAIP

Sleeping

App Files Files Community

Diamanta commited on Jun 1

Commit

e1b187e

verified ·

1 Parent(s): 9dcecfa

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -4

app.py CHANGED Viewed

@@ -23,17 +23,33 @@ class ChatRequest(BaseModel):
     temperature: Optional[float] = 0.7
     max_tokens: Optional[int] = 256
 class ModelInfo(BaseModel):
     id: str
     name: str
     description: str
-# Load your models info here or dynamically from disk/config
 AVAILABLE_MODELS = [
-    ModelInfo(id="llama2", name="Llama 2", description="Meta Llama 2 model"),
-    # Add more models if you want
 ]
 @app.on_event("startup")
 def load_model():
     global llm
@@ -77,6 +93,10 @@ async def list_models():
     # Return available models info
     return [model.dict() for model in AVAILABLE_MODELS]
 @app.get("/models/{model_id}")
 async def get_model(model_id: str):
     for model in AVAILABLE_MODELS:
@@ -118,4 +138,33 @@ async def chat(req: ChatRequest):
             }
         ]
     }
-    return response

     temperature: Optional[float] = 0.7
     max_tokens: Optional[int] = 256
+class GenerateRequest(BaseModel):
+    model: str
+    prompt: str
+    max_tokens: Optional[int] = 256
+    temperature: Optional[float] = 0.7
 class ModelInfo(BaseModel):
     id: str
     name: str
     description: str
+    format: str
+    path: str
+    size: int
 AVAILABLE_MODELS = [
+    ModelInfo(
+        id="codellama-7b-instruct",
+        name="CodeLlama 7b Instruct",
+        description="CodeLlama 7b instruct model",
+        format="gguf",
+        path="/models/codellama-7b-instruct.gguf",
+        size=8000000000
+    )
 ]
 @app.on_event("startup")
 def load_model():
     global llm
     # Return available models info
     return [model.dict() for model in AVAILABLE_MODELS]
+@app.get("/api/models")
+    async def api_models():
+    return {"models": [model.dict() for model in AVAILABLE_MODELS]}
 @app.get("/models/{model_id}")
 async def get_model(model_id: str):
     for model in AVAILABLE_MODELS:
             }
         ]
     }
+    return response
+@app.post("/api/generate")
+async def api_generate(req: GenerateRequest):
+    global llm
+    if llm is None:
+        raise HTTPException(status_code=503, detail="Model not initialized")
+    if req.model not in [m.id for m in AVAILABLE_MODELS]:
+        raise HTTPException(status_code=400, detail="Unsupported model")
+    output = llm(
+        req.prompt,
+        max_tokens=req.max_tokens,
+        temperature=req.temperature,
+        stop=["\n\n"]  # Or any stop sequence you want
+    )
+    text = output.get("choices", [{}])[0].get("text", "").strip()
+    return {
+        "id": str(uuid.uuid4()),
+        "model": req.model,
+        "choices": [
+            {
+                "text": text,
+                "index": 0,
+                "finish_reason": "stop"
+            }
+        ]
+    }