Spaces:

Mungert
/

GradLLM

Running

File size: 5,081 Bytes

# app.py (Gradio-only, ZeroGPU-safe)
import asyncio
import logging
from typing import Any, Dict, List
import gradio as gr

from config import settings
from rabbit_base import RabbitBase
from listener import RabbitListenerBase
from rabbit_repo import RabbitRepo
from service import LLMService
from runners.base import ILLMRunner
from factory import default_runner_factory

# ---------- logging ----------
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
)
log = logging.getLogger("app")

# ---------- @spaces.GPU entrypoint ----------
try:
    import spaces

    @spaces.GPU(duration=60)  # minimal GPU endpoint; no tensors allocated
    def gpu_entrypoint() -> str:
        return "gpu: ready"
except Exception:
    def gpu_entrypoint() -> str:
        return "gpu: not available (CPU only)"
    
# ---------- Publisher & Service ----------
publisher = RabbitRepo(external_source="https://space.external")
service = LLMService(publisher, default_runner_factory)

# ---------- Handlers (.NET FuncName -> service) ----------
async def h_start(data):  await service.StartProcess(data or {})
async def h_user(data):   await service.UserInput(data or {})
async def h_remove(data): await service.RemoveSession(data or {})
async def h_stop(data):   await service.StopRequest(data or {})
async def h_qir(data):    await service.QueryIndexResult(data or {})
async def h_getreg(_):    await service.GetFunctionRegistry(False)
async def h_getreg_f(_):  await service.GetFunctionRegistry(True)

handlers = {
    "llmStartSession": h_start,
    "llmUserInput": h_user,
    "llmRemoveSession": h_remove,
    "llmStopRequest": h_stop,
    "queryIndexResult": h_qir,
    "getFunctionRegistry": h_getreg,
    "getFunctionRegistryFiltered": h_getreg_f,
}

# ---------- Listener wiring ----------
base = RabbitBase()
listener = RabbitListenerBase(
    base,
    instance_name=settings.RABBIT_INSTANCE_NAME,  # queue prefix like your .NET instance
    handlers=handlers,
)

# Mirror your C# InitRabbitMQObjs()
DECLS: List[Dict[str, Any]] = [
    {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
     "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput",
     "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"llmRemoveSession{settings.SERVICE_ID}", "FuncName": "llmRemoveSession",
     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"llmStopRequest{settings.SERVICE_ID}", "FuncName": "llmStopRequest",
     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"queryIndexResult{settings.SERVICE_ID}", "FuncName": "queryIndexResult",
     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"getFunctionRegistry{settings.SERVICE_ID}", "FuncName": "getFunctionRegistry",
     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"getFunctionRegistryFiltered{settings.SERVICE_ID}", "FuncName": "getFunctionRegistryFiltered",
     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
]

# ---------- Gradio callbacks ----------
async def ping() -> str:
    return "ok"

# Start RabbitMQ when the Gradio app loads. Return a short status string.
async def _startup_init():
    try:
        await publisher.connect()
        await service.init()
        await listener.start(DECLS)
        return "listener: ready"
    except Exception as e:
        log.exception("Startup init failed")
        return f"listener: ERROR -> {e}"

# ---------- Build the actual page ----------
with gr.Blocks(title="LLM Runner (ZeroGPU-ready)", theme=gr.themes.Soft()) as demo:
    gr.Markdown("## LLM Runner — RabbitMQ listener (ZeroGPU-ready)")

    with gr.Tabs():
        with gr.Tab("Service"):
            with gr.Row():
                btn = gr.Button("Ping")
                out = gr.Textbox(label="Ping result")
            btn.click(ping, inputs=None, outputs=out)

            # show init status when page loads
            init_status = gr.Textbox(label="Startup status", interactive=False)
            demo.load(fn=_startup_init, inputs=None, outputs=init_status)

        with gr.Tab("@spaces.GPU Probe"):
            gr.Markdown("This button is a real `@spaces.GPU()` entrypoint so ZeroGPU keeps the Space alive.")
            with gr.Row():
                gpu_btn = gr.Button("GPU Ready Probe", variant="primary")
                gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False)
            # IMPORTANT: reference the decorated function DIRECTLY
            gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out)

# On HF Spaces, Gradio serves the app automatically if the variable is named `demo`.
if __name__ == "__main__":
    # Local testing only.
    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True)