File size: 5,081 Bytes
b843648
bf292d9
b843648
 
bf292d9
2001be3
b2c2f23
527d73d
b2c2f23
 
 
 
8d27c84
bf292d9
b843648
32b704b
 
 
 
b843648
 
 
2001be3
 
 
b843648
 
 
2001be3
b843648
2001be3
8d27c84
b843648
527d73d
8d27c84
bf292d9
b843648
527d73d
 
bf292d9
527d73d
 
 
 
bf292d9
 
 
 
 
 
 
 
 
 
 
b843648
527d73d
 
 
2001be3
527d73d
 
bf292d9
b843648
 
527d73d
 
 
 
 
8280e1d
527d73d
8280e1d
527d73d
8280e1d
527d73d
8280e1d
527d73d
8280e1d
bf292d9
 
b843648
 
bf292d9
 
b843648
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf292d9
b843648
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# app.py (Gradio-only, ZeroGPU-safe)
import asyncio
import logging
from typing import Any, Dict, List
import gradio as gr

from config import settings
from rabbit_base import RabbitBase
from listener import RabbitListenerBase
from rabbit_repo import RabbitRepo
from service import LLMService
from runners.base import ILLMRunner
from factory import default_runner_factory

# ---------- logging ----------
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
)
log = logging.getLogger("app")

# ---------- @spaces.GPU entrypoint ----------
try:
    import spaces

    @spaces.GPU(duration=60)  # minimal GPU endpoint; no tensors allocated
    def gpu_entrypoint() -> str:
        return "gpu: ready"
except Exception:
    def gpu_entrypoint() -> str:
        return "gpu: not available (CPU only)"
    
# ---------- Publisher & Service ----------
publisher = RabbitRepo(external_source="https://space.external")
service = LLMService(publisher, default_runner_factory)

# ---------- Handlers (.NET FuncName -> service) ----------
async def h_start(data):  await service.StartProcess(data or {})
async def h_user(data):   await service.UserInput(data or {})
async def h_remove(data): await service.RemoveSession(data or {})
async def h_stop(data):   await service.StopRequest(data or {})
async def h_qir(data):    await service.QueryIndexResult(data or {})
async def h_getreg(_):    await service.GetFunctionRegistry(False)
async def h_getreg_f(_):  await service.GetFunctionRegistry(True)

handlers = {
    "llmStartSession": h_start,
    "llmUserInput": h_user,
    "llmRemoveSession": h_remove,
    "llmStopRequest": h_stop,
    "queryIndexResult": h_qir,
    "getFunctionRegistry": h_getreg,
    "getFunctionRegistryFiltered": h_getreg_f,
}

# ---------- Listener wiring ----------
base = RabbitBase()
listener = RabbitListenerBase(
    base,
    instance_name=settings.RABBIT_INSTANCE_NAME,  # queue prefix like your .NET instance
    handlers=handlers,
)

# Mirror your C# InitRabbitMQObjs()
DECLS: List[Dict[str, Any]] = [
    {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
     "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput",
     "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"llmRemoveSession{settings.SERVICE_ID}", "FuncName": "llmRemoveSession",
     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"llmStopRequest{settings.SERVICE_ID}", "FuncName": "llmStopRequest",
     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"queryIndexResult{settings.SERVICE_ID}", "FuncName": "queryIndexResult",
     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"getFunctionRegistry{settings.SERVICE_ID}", "FuncName": "getFunctionRegistry",
     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": f"getFunctionRegistryFiltered{settings.SERVICE_ID}", "FuncName": "getFunctionRegistryFiltered",
     "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
]

# ---------- Gradio callbacks ----------
async def ping() -> str:
    return "ok"

# Start RabbitMQ when the Gradio app loads. Return a short status string.
async def _startup_init():
    try:
        await publisher.connect()
        await service.init()
        await listener.start(DECLS)
        return "listener: ready"
    except Exception as e:
        log.exception("Startup init failed")
        return f"listener: ERROR -> {e}"

# ---------- Build the actual page ----------
with gr.Blocks(title="LLM Runner (ZeroGPU-ready)", theme=gr.themes.Soft()) as demo:
    gr.Markdown("## LLM Runner — RabbitMQ listener (ZeroGPU-ready)")

    with gr.Tabs():
        with gr.Tab("Service"):
            with gr.Row():
                btn = gr.Button("Ping")
                out = gr.Textbox(label="Ping result")
            btn.click(ping, inputs=None, outputs=out)

            # show init status when page loads
            init_status = gr.Textbox(label="Startup status", interactive=False)
            demo.load(fn=_startup_init, inputs=None, outputs=init_status)

        with gr.Tab("@spaces.GPU Probe"):
            gr.Markdown("This button is a real `@spaces.GPU()` entrypoint so ZeroGPU keeps the Space alive.")
            with gr.Row():
                gpu_btn = gr.Button("GPU Ready Probe", variant="primary")
                gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False)
            # IMPORTANT: reference the decorated function DIRECTLY
            gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out)

# On HF Spaces, Gradio serves the app automatically if the variable is named `demo`.
if __name__ == "__main__":
    # Local testing only.
    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True)