Spaces:

Mungert
/

GradLLM

Running

App Files Files Community

johnbridges commited on Aug 15

Commit

8d27c84

1 Parent(s): ee97085

added test llm runner

Browse files

Files changed (8) hide show

app.py +3 -13
factory.py +6 -0
function_tracker.py +42 -0
message_helper.py +5 -0
models.py +34 -0
runners/echo.py +46 -0
service.py +10 -16
streaming.py +20 -0

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ from listener import RabbitListenerBase
 from rabbit_repo import RabbitRepo
 from service import LLMService
 from runners.base import ILLMRunner
 # ---------- logging ----------
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
@@ -25,21 +26,10 @@ try:
 except Exception:
     def gpu_entrypoint() -> str:
         return "gpu: not available (CPU only)"
-# ---------- Runner factory (stub) ----------
-class EchoRunner(ILLMRunner):
-    Type = "EchoRunner"
-    async def StartProcess(self, llmServiceObj: dict): pass
-    async def RemoveProcess(self, sessionId: str): pass
-    async def StopRequest(self, sessionId: str): pass
-    async def SendInputAndGetResponse(self, llmServiceObj: dict): pass
-async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
-    return EchoRunner()
 # ---------- Publisher & Service ----------
 publisher = RabbitRepo(external_source="https://space.external")
-service = LLMService(publisher, runner_factory)
 # ---------- Handlers (.NET FuncName -> service) ----------
 async def h_start(data):  await service.StartProcess(data or {})

 from rabbit_repo import RabbitRepo
 from service import LLMService
 from runners.base import ILLMRunner
+from factory import default_runner_factory
 # ---------- logging ----------
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
 except Exception:
     def gpu_entrypoint() -> str:
         return "gpu: not available (CPU only)"
 # ---------- Publisher & Service ----------
 publisher = RabbitRepo(external_source="https://space.external")
+service = LLMService(publisher, default_runner_factory)
 # ---------- Handlers (.NET FuncName -> service) ----------
 async def h_start(data):  await service.StartProcess(data or {})

factory.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# factories.py
+from runners.echo import EchoRunner
+async def default_runner_factory(context: Dict[str, Any]) -> ILLMRunner:
+    # choose runner by context["LLMRunnerType"] if you need variants
+    return EchoRunner(publisher=context["_publisher"], settings=context["_settings"])

function_tracker.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# function_tracker.py
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Dict, List
+import random
+@dataclass
+class TrackedCall:
+    FunctionCallId: str
+    FunctionName: str
+    IsProcessed: bool = False
+    Payload: str = ""
+class FunctionCallTracker:
+    def __init__(self) -> None:
+        self._by_msg: Dict[str, Dict[str, TrackedCall]] = {}
+    @staticmethod
+    def gen_id() -> str:
+        return f"call_{random.randint(10_000_000, 99_999_999)}"
+    def add(self, message_id: str, fn_name: str, payload: str) -> str:
+        call_id = self.gen_id()
+        self._by_msg.setdefault(message_id, {})[call_id] = TrackedCall(call_id, fn_name, False, payload)
+        return call_id
+    def mark_processed(self, message_id: str, call_id: str, payload: str = "") -> None:
+        m = self._by_msg.get(message_id, {})
+        if call_id in m:
+            m[call_id].IsProcessed = True
+            if payload:
+                m[call_id].Payload = payload
+    def all_processed(self, message_id: str) -> bool:
+        m = self._by_msg.get(message_id, {})
+        return bool(m) and all(x.IsProcessed for x in m.values())
+    def processed_list(self, message_id: str) -> List[TrackedCall]:
+        return list(self._by_msg.get(message_id, {}).values())
+    def clear(self, message_id: str) -> None:
+        self._by_msg.pop(message_id, None)

message_helper.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# message_helper.py
+def success(msg: str) -> str: return f"</llm-success>{msg}"
+def error(msg: str)   -> str: return f"</llm-error>{msg}"
+def warning(msg: str) -> str: return f"</llm-warning>{msg}"
+def info(msg: str)    -> str: return f"</llm-info>{msg}"

models.py CHANGED Viewed

@@ -127,6 +127,40 @@ class LLMServiceObj(BaseModel):
     IsFunctionCallError: bool = False
     IsFunctionCallStatus: bool = False
     IsFunctionStillRunning: bool = False
 class ResultObj(BaseModel):

     IsFunctionCallError: bool = False
     IsFunctionCallStatus: bool = False
     IsFunctionStillRunning: bool = False
+    def set_as_call(self) -> "LLMServiceObj":
+        self.IsFunctionCall = True
+        self.IsFunctionCallResponse = False
+        self.IsFunctionCallError = False
+        self.IsFunctionCallStatus = False
+        self.IsFunctionStillRunning = False
+        return self
+    def set_as_call_error(self) -> "LLMServiceObj":
+        self.IsFunctionCall = True
+        self.IsFunctionCallResponse = False
+        self.IsFunctionCallError = True
+        self.IsFunctionCallStatus = False
+        self.IsFunctionStillRunning = False
+        return self
+    def set_as_response_complete(self) -> "LLMServiceObj":
+        self.IsFunctionCall = False
+        self.IsFunctionCallResponse = True
+        self.IsFunctionCallError = False
+        self.IsFunctionCallStatus = False
+        self.IsFunctionStillRunning = False
+        self.IsProcessed = True
+        return self
+    def set_as_not_call(self) -> "LLMServiceObj":
+        self.IsFunctionCall = False
+        self.IsFunctionCallResponse = False
+        self.IsFunctionCallError = False
+        self.IsFunctionCallStatus = False
+        self.IsFunctionStillRunning = False
+        return self
 class ResultObj(BaseModel):

runners/echo.py ADDED Viewed

	@@ -0,0 +1,46 @@

+# runners/echo.py
+from __future__ import annotations
+from typing import Any, Dict, Optional
+from .base import ILLMRunner
+from models import LLMServiceObj
+from function_tracker import FunctionCallTracker
+import logging
+class EchoRunner(ILLMRunner):
+    Type = "TurboLLM"
+    IsEnabled = True
+    IsStateStarting = False
+    IsStateFailed = False
+    def __init__(self, publisher, settings):
+        self._pub = publisher
+        self._settings = settings
+        self._tracker = FunctionCallTracker()
+        self._log = logging.getLogger("EchoRunner")
+    async def StartProcess(self, llmServiceObj: dict) -> None:
+        self._log.debug(f"StartProcess called with: {llmServiceObj}")
+        # pretend to “warm up”
+        pass
+    async def RemoveProcess(self, sessionId: str) -> None:
+        self._log.debug(f"RemoveProcess called for session: {sessionId}")
+        # nothing to clean here
+        pass
+    async def StopRequest(self, sessionId: str) -> None:
+        self._log.debug(f"StopRequest called for session: {sessionId}")
+        # no streaming loop to stop in echo
+        pass
+    async def SendInputAndGetResponse(self, llmServiceObj: dict) -> None:
+        self._log.debug(f"SendInputAndGetResponse called with: {llmServiceObj}")
+        llm = LLMServiceObj(**llmServiceObj)
+        if llm.UserInput.startswith("<|START_AUDIO|>") or llm.UserInput.startswith("<|STOP_AUDIO|>"):
+            self._log.debug("Audio input detected, ignoring in echo.")
+            return
+        # Echo behavior (match UI format)
+        await self._pub.publish("llmServiceMessage", LLMServiceObj(LlmMessage=f"<User:> {llm.UserInput}\n\n"))
+        await self._pub.publish("llmServiceMessage", LLMServiceObj(LlmMessage=f"<Assistant:> You said: {llm.UserInput}\n"))
+        await self._pub.publish("llmServiceMessage", LLMServiceObj(LlmMessage="<end-of-line>"))

service.py CHANGED Viewed

@@ -7,7 +7,7 @@ from config import settings
 from models import LLMServiceObj, ResultObj
 from rabbit_repo import RabbitRepo
 from runners.base import ILLMRunner
 @dataclass
 class _Session:
@@ -65,7 +65,7 @@ class LLMService:
         llm.ResultMessage = message
         llm.ResultSuccess = success
         if include_llm_message:
-            llm.LlmMessage = f"<Success>{message}</Success>" if success else f"<Error>{message}</Error>"
         if check_system and llm.IsSystemLlm:
             return
@@ -164,6 +164,7 @@ class LLMService:
             try:
                 await s.Runner.RemoveProcess(sid)
                 s.Runner = None
                 msgs.append(sid)
             except Exception as e:
                 ok = False
@@ -217,29 +218,22 @@ class LLMService:
         await r.SendInputAndGetResponse(llm.model_dump(by_alias=True))
     async def QueryIndexResult(self, payload: Any) -> None:
-        """
-        Your .NET listener concatenates RAG outputs, sets ResultObj, and notifies the coordinator.
-        Here, we forward a service message containing the same info so the UI can reflect completion.
-        'payload' usually has: Success, Message, QueryResults: [{Output: "..."}]
-        """
         try:
             data = payload if isinstance(payload, dict) else {}
             outputs = data.get("QueryResults") or []
             rag_data = "\n".join([x.get("Output", "") for x in outputs if isinstance(x, dict)])
             await self._pub.publish(
                 "llmServiceMessage",
-                ResultObj(
-                    Message=data.get("Message", ""),
-                    Success=bool(data.get("Success", False)),
-                    Data=rag_data,
-                ),
             )
         except Exception as e:
-            await self._pub.publish(
-                "llmServiceMessage",
-                ResultObj(Message=str(e), Success=False),
-            )
     async def GetFunctionRegistry(self, filtered: bool = False) -> None:
         """

 from models import LLMServiceObj, ResultObj
 from rabbit_repo import RabbitRepo
 from runners.base import ILLMRunner
+from message_helper import success as _ok, error as _err
 @dataclass
 class _Session:
         llm.ResultMessage = message
         llm.ResultSuccess = success
         if include_llm_message:
+            llm.LlmMessage = _ok(message) if success else _err(message)
         if check_system and llm.IsSystemLlm:
             return
             try:
                 await s.Runner.RemoveProcess(sid)
                 s.Runner = None
+                self._sessions.pop(sid, None)  # ← free the entry
                 msgs.append(sid)
             except Exception as e:
                 ok = False
         await r.SendInputAndGetResponse(llm.model_dump(by_alias=True))
     async def QueryIndexResult(self, payload: Any) -> None:
         try:
             data = payload if isinstance(payload, dict) else {}
             outputs = data.get("QueryResults") or []
             rag_data = "\n".join([x.get("Output", "") for x in outputs if isinstance(x, dict)])
+            # NEW: show RAG to the chat like tool output
+            await self._pub.publish("llmServiceMessage", LLMServiceObj(LlmMessage=f"<Function Response:> {rag_data}\n\n"))
+            await self._pub.publish("llmServiceMessage", LLMServiceObj(LlmMessage="</functioncall-complete>"))
+            # keep your existing summary object (nice for observers/metrics)
             await self._pub.publish(
                 "llmServiceMessage",
+                ResultObj(Message=data.get("Message", ""), Success=bool(data.get("Success", False)), Data=rag_data),
             )
         except Exception as e:
+            await self._pub.publish("llmServiceMessage", ResultObj(Message=str(e), Success=False))
     async def GetFunctionRegistry(self, filtered: bool = False) -> None:
         """

streaming.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# streaming.py
+import asyncio
+async def stream_in_chunks(publish, exchange: str, llm_obj_builder, text: str,
+                           batch_size: int = 3, max_chars: int = 100,
+                           base_delay_ms: int = 30, per_char_ms: int = 2) -> None:
+    seps = set(" ,!?{}.:;\n")
+    buf, parts, count = [], [], 0
+    for ch in text:
+        parts.append(ch)
+        if ch in seps:
+            buf.append("".join(parts)); parts.clear(); count += 1
+            if count >= batch_size or sum(len(x) for x in buf) >= max_chars:
+                o = llm_obj_builder("".join(buf))
+                await publish(exchange, o)
+                await asyncio.sleep((base_delay_ms + per_char_ms * sum(len(x) for x in buf))/1000)
+                buf.clear(); count = 0
+    if parts: buf.append("".join(parts))
+    if buf:
+        await publish(exchange, llm_obj_builder("".join(buf)))