File size: 5,081 Bytes
b843648 bf292d9 b843648 bf292d9 2001be3 b2c2f23 527d73d b2c2f23 8d27c84 bf292d9 b843648 32b704b b843648 2001be3 b843648 2001be3 b843648 2001be3 8d27c84 b843648 527d73d 8d27c84 bf292d9 b843648 527d73d bf292d9 527d73d bf292d9 b843648 527d73d 2001be3 527d73d bf292d9 b843648 527d73d 8280e1d 527d73d 8280e1d 527d73d 8280e1d 527d73d 8280e1d 527d73d 8280e1d bf292d9 b843648 bf292d9 b843648 bf292d9 b843648 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# app.py (Gradio-only, ZeroGPU-safe)
import asyncio
import logging
from typing import Any, Dict, List
import gradio as gr
from config import settings
from rabbit_base import RabbitBase
from listener import RabbitListenerBase
from rabbit_repo import RabbitRepo
from service import LLMService
from runners.base import ILLMRunner
from factory import default_runner_factory
# ---------- logging ----------
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
)
log = logging.getLogger("app")
# ---------- @spaces.GPU entrypoint ----------
try:
import spaces
@spaces.GPU(duration=60) # minimal GPU endpoint; no tensors allocated
def gpu_entrypoint() -> str:
return "gpu: ready"
except Exception:
def gpu_entrypoint() -> str:
return "gpu: not available (CPU only)"
# ---------- Publisher & Service ----------
publisher = RabbitRepo(external_source="https://space.external")
service = LLMService(publisher, default_runner_factory)
# ---------- Handlers (.NET FuncName -> service) ----------
async def h_start(data): await service.StartProcess(data or {})
async def h_user(data): await service.UserInput(data or {})
async def h_remove(data): await service.RemoveSession(data or {})
async def h_stop(data): await service.StopRequest(data or {})
async def h_qir(data): await service.QueryIndexResult(data or {})
async def h_getreg(_): await service.GetFunctionRegistry(False)
async def h_getreg_f(_): await service.GetFunctionRegistry(True)
handlers = {
"llmStartSession": h_start,
"llmUserInput": h_user,
"llmRemoveSession": h_remove,
"llmStopRequest": h_stop,
"queryIndexResult": h_qir,
"getFunctionRegistry": h_getreg,
"getFunctionRegistryFiltered": h_getreg_f,
}
# ---------- Listener wiring ----------
base = RabbitBase()
listener = RabbitListenerBase(
base,
instance_name=settings.RABBIT_INSTANCE_NAME, # queue prefix like your .NET instance
handlers=handlers,
)
# Mirror your C# InitRabbitMQObjs()
DECLS: List[Dict[str, Any]] = [
{"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
"MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
{"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput",
"MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
{"ExchangeName": f"llmRemoveSession{settings.SERVICE_ID}", "FuncName": "llmRemoveSession",
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
{"ExchangeName": f"llmStopRequest{settings.SERVICE_ID}", "FuncName": "llmStopRequest",
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
{"ExchangeName": f"queryIndexResult{settings.SERVICE_ID}", "FuncName": "queryIndexResult",
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
{"ExchangeName": f"getFunctionRegistry{settings.SERVICE_ID}", "FuncName": "getFunctionRegistry",
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
{"ExchangeName": f"getFunctionRegistryFiltered{settings.SERVICE_ID}", "FuncName": "getFunctionRegistryFiltered",
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
]
# ---------- Gradio callbacks ----------
async def ping() -> str:
return "ok"
# Start RabbitMQ when the Gradio app loads. Return a short status string.
async def _startup_init():
try:
await publisher.connect()
await service.init()
await listener.start(DECLS)
return "listener: ready"
except Exception as e:
log.exception("Startup init failed")
return f"listener: ERROR -> {e}"
# ---------- Build the actual page ----------
with gr.Blocks(title="LLM Runner (ZeroGPU-ready)", theme=gr.themes.Soft()) as demo:
gr.Markdown("## LLM Runner — RabbitMQ listener (ZeroGPU-ready)")
with gr.Tabs():
with gr.Tab("Service"):
with gr.Row():
btn = gr.Button("Ping")
out = gr.Textbox(label="Ping result")
btn.click(ping, inputs=None, outputs=out)
# show init status when page loads
init_status = gr.Textbox(label="Startup status", interactive=False)
demo.load(fn=_startup_init, inputs=None, outputs=init_status)
with gr.Tab("@spaces.GPU Probe"):
gr.Markdown("This button is a real `@spaces.GPU()` entrypoint so ZeroGPU keeps the Space alive.")
with gr.Row():
gpu_btn = gr.Button("GPU Ready Probe", variant="primary")
gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False)
# IMPORTANT: reference the decorated function DIRECTLY
gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out)
# On HF Spaces, Gradio serves the app automatically if the variable is named `demo`.
if __name__ == "__main__":
# Local testing only.
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True)
|