Spaces:

Mungert
/

GradLLM

Running

App Files Files Community

johnbridges commited on Aug 13

Commit

8280e1d

1 Parent(s): 7630510

.

Browse files

Files changed (2) hide show

app.py +60 -36
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # app.py
 import asyncio
 import gradio as gr
 from fastapi import FastAPI
-from contextlib import asynccontextmanager
 from config import settings
 from rabbit_base import RabbitBase
@@ -11,34 +12,50 @@ from rabbit_repo import RabbitRepo
 from service import LLMService
 from runners.base import ILLMRunner
-# --- Optional ZeroGPU hook ---
-# If your Space uses ZeroGPU hardware, this satisfies the startup check.
-# If you're on CPU hardware, this is harmless.
 try:
     import spaces
     ZERO_GPU_AVAILABLE = True
 except Exception:
-    spaces = None
     ZERO_GPU_AVAILABLE = False
-# --- Runner factory (stub) ---
 class EchoRunner(ILLMRunner):
     Type = "EchoRunner"
-    async def StartProcess(self, llmServiceObj: dict): pass
-    async def RemoveProcess(self, sessionId: str): pass
-    async def StopRequest(self, sessionId: str): pass
-    async def SendInputAndGetResponse(self, llmServiceObj: dict): pass
 async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
     return EchoRunner()
-# --- Publisher and Service ---
 publisher = RabbitRepo(external_source="https://space.external")
 service = LLMService(publisher, runner_factory)
-# --- Handlers mapping .NET FuncName -> service method ---
 async def h_start(data):  await service.StartProcess(data or {})
 async def h_user(data):   await service.UserInput(data or {})
 async def h_remove(data): await service.RemoveSession(data or {})
@@ -57,11 +74,11 @@ handlers = {
     "getFunctionRegistryFiltered": h_getreg_f,
 }
-# --- Listener wiring (needs base + instance_name) ---
 base = RabbitBase()
 listener = RabbitListenerBase(
     base,
-    instance_name=settings.RABBIT_INSTANCE_NAME,   # queue prefix like your .NET instance
     handlers=handlers,
 )
@@ -72,38 +89,38 @@ DECLS = [
     {"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput",
      "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
     {"ExchangeName": f"llmRemoveSession{settings.SERVICE_ID}", "FuncName": "llmRemoveSession",
-      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
     {"ExchangeName": f"llmStopRequest{settings.SERVICE_ID}", "FuncName": "llmStopRequest",
-      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
     {"ExchangeName": f"queryIndexResult{settings.SERVICE_ID}", "FuncName": "queryIndexResult",
-      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
     {"ExchangeName": f"getFunctionRegistry{settings.SERVICE_ID}", "FuncName": "getFunctionRegistry",
-      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
     {"ExchangeName": f"getFunctionRegistryFiltered{settings.SERVICE_ID}", "FuncName": "getFunctionRegistryFiltered",
-      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
 ]
-# --- ZeroGPU detection function (no-op) ---
-# This only exists so HF Spaces sees that you "have" a GPU entrypoint on ZeroGPU.
-if ZERO_GPU_AVAILABLE:
-    @spaces.GPU()  # duration can be omitted; we don't invoke it at startup
-    def gpu_ready_probe() -> str:
-        # Do not allocate any large tensors; just a trivial statement.
-        # Presence of this function is enough for the ZeroGPU startup check.
-        return "gpu-probe-ok"
-# --- Gradio UI (for smoke test) ---
 async def ping():
     return "ok"
 with gr.Blocks() as demo:
-    gr.Markdown("### LLM Runner (Python) listening on RabbitMQ")
-    btn = gr.Button("Ping")
-    out = gr.Textbox()
     btn.click(ping, inputs=None, outputs=out)
-# --- FastAPI app with lifespan (replaces deprecated @on_event) ---
 @asynccontextmanager
 async def lifespan(_app: FastAPI):
     # startup
@@ -111,9 +128,10 @@ async def lifespan(_app: FastAPI):
     await service.init()
     await listener.start(DECLS)
     yield
-    # shutdown (optional cleanup)
-    # await publisher.close()  # if your RabbitRepo exposes this
-    # await listener.stop()    # if you implement stop()
 app = FastAPI(lifespan=lifespan)
 app = gr.mount_gradio_app(app, demo, path="/")
@@ -122,6 +140,12 @@ app = gr.mount_gradio_app(app, demo, path="/")
 async def health():
     return {"status": "ok"}
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 # app.py
 import asyncio
+from contextlib import asynccontextmanager
 import gradio as gr
 from fastapi import FastAPI
 from config import settings
 from rabbit_base import RabbitBase
 from service import LLMService
 from runners.base import ILLMRunner
+# ---------------- ZeroGPU probe ----------------
+# Keep the Space alive on ZeroGPU until real GPU inference is added.
 try:
     import spaces
     ZERO_GPU_AVAILABLE = True
+    @spaces.GPU()  # trivial, no tensor allocations
+    def gpu_ready_probe() -> str:
+        return "gpu-probe-ok"
 except Exception:
     ZERO_GPU_AVAILABLE = False
+    def gpu_ready_probe() -> str:  # fallback for local/CPU runs
+        return "cpu-only"
+# ---------------- Runner factory (stub) ----------------
 class EchoRunner(ILLMRunner):
     Type = "EchoRunner"
+    async def StartProcess(self, llmServiceObj: dict):  # noqa: N802
+        pass
+    async def RemoveProcess(self, sessionId: str):  # noqa: N802
+        pass
+    async def StopRequest(self, sessionId: str):  # noqa: N802
+        pass
+    async def SendInputAndGetResponse(self, llmServiceObj: dict):  # noqa: N802
+        pass
 async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
     return EchoRunner()
+# ---------------- Publisher and Service ----------------
 publisher = RabbitRepo(external_source="https://space.external")
 service = LLMService(publisher, runner_factory)
+# ---------------- Handlers (.NET FuncName -> service) ----------------
 async def h_start(data):  await service.StartProcess(data or {})
 async def h_user(data):   await service.UserInput(data or {})
 async def h_remove(data): await service.RemoveSession(data or {})
     "getFunctionRegistryFiltered": h_getreg_f,
 }
+# ---------------- Listener wiring ----------------
 base = RabbitBase()
 listener = RabbitListenerBase(
     base,
+    instance_name=settings.RABBIT_INSTANCE_NAME,  # queue prefix like your .NET instance
     handlers=handlers,
 )
     {"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput",
      "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
     {"ExchangeName": f"llmRemoveSession{settings.SERVICE_ID}", "FuncName": "llmRemoveSession",
+     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
     {"ExchangeName": f"llmStopRequest{settings.SERVICE_ID}", "FuncName": "llmStopRequest",
+     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
     {"ExchangeName": f"queryIndexResult{settings.SERVICE_ID}", "FuncName": "queryIndexResult",
+     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
     {"ExchangeName": f"getFunctionRegistry{settings.SERVICE_ID}", "FuncName": "getFunctionRegistry",
+     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
     {"ExchangeName": f"getFunctionRegistryFiltered{settings.SERVICE_ID}", "FuncName": "getFunctionRegistryFiltered",
+     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
 ]
+# ---------------- Gradio UI (smoke test + GPU probe) ----------------
 async def ping():
     return "ok"
 with gr.Blocks() as demo:
+    gr.Markdown("### LLM Runner (Python) — RabbitMQ listener")
+    with gr.Row():
+        btn = gr.Button("Ping")
+        out = gr.Textbox(label="Ping result")
     btn.click(ping, inputs=None, outputs=out)
+    # Reference the GPU probe so ZeroGPU detection never misses it.
+    if ZERO_GPU_AVAILABLE:
+        probe_btn = gr.Button("GPU Probe")
+        probe_out = gr.Textbox(label="GPU Probe Result")
+        probe_btn.click(lambda: gpu_ready_probe(), None, probe_out)
+# ---------------- FastAPI + lifespan ----------------
 @asynccontextmanager
 async def lifespan(_app: FastAPI):
     # startup
     await service.init()
     await listener.start(DECLS)
     yield
+    # shutdown (optional: close AMQP if you implement it)
+    # await publisher.close()
+    # await listener.stop()
 app = FastAPI(lifespan=lifespan)
 app = gr.mount_gradio_app(app, demo, path="/")
 async def health():
     return {"status": "ok"}
+# Extra: also expose the probe via HTTP (belt & braces for ZeroGPU)
+@app.get("/gpu-probe")
+def gpu_probe_route():
+    return {"status": gpu_ready_probe()}
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt CHANGED Viewed

@@ -4,4 +4,4 @@ uvicorn==0.35.0
 aio-pika==9.5.7
 pydantic==2.11.1
 pydantic-settings==2.10.1

 aio-pika==9.5.7
 pydantic==2.11.1
 pydantic-settings==2.10.1
+spaces>=0.26.3