Spaces:

Inpris
/

Humains-Junior

Sleeping

App Files Files Community

NS-Y commited on Nov 2, 2025

Commit

0de4069

verified ·

1 Parent(s): a42f59c

Upload 4 files

Browse files

Files changed (3) hide show

README.md +19 -2
app.py +73 -109
examples/presets.json +4 -5

README.md CHANGED Viewed

@@ -4,7 +4,24 @@ emoji: 🦴
 colorFrom: indigo
 colorTo: blue
 sdk: gradio
-sdk_version: 5.49.1
 app_file: app.py
 pinned: false
----

 colorFrom: indigo
 colorTo: blue
 sdk: gradio
+sdk_version: "4.44.0"
 app_file: app.py
 pinned: false
+---
+A Gradio Space that applies the Appendix-style prompt: the model must prioritize the given *Context* and answer in plain text with two sections — **Analysis** and **Response**.
+**Environment variables (optional)**
+- `EXOSKELETON_MODEL_ID` (default: `Inpris/humains-junior`)
+- `DEVICE_MAP` (default: `auto`)
+- `MAX_NEW_TOKENS` (default: `512`)
+- `TEMPERATURE` (default: `0.3`)
+- `TOP_P` (default: `0.95`)
+**Secrets**
+- `HF_TOKEN` — required if the model is gated.
+**Files**
+- `app.py` — Gradio app
+- `requirements.txt` — dependencies
+- `examples/` — (optional) assets/presets

app.py CHANGED Viewed

@@ -1,53 +1,62 @@
 import os
-import json
-import time
-from typing import List, Tuple, Dict, Optional
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 import gradio as gr
-# -----------------------------
-# Config
-# -----------------------------
 DEFAULT_MODEL = os.environ.get("EXOSKELETON_MODEL_ID", "Inpris/humains-junior")
 TRUST_REMOTE_CODE = os.environ.get("TRUST_REMOTE_CODE", "1") == "1"
 DEVICE_MAP = os.environ.get("DEVICE_MAP", "auto")
 MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "512"))
-TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.4"))
 TOP_P = float(os.environ.get("TOP_P", "0.95"))
 USE_AUTH_TOKEN = os.environ.get("HF_TOKEN", None)
-SYSTEM_PROMPT = """You are Exoskeleton, a method that externalizes reasoning into explicit slots.
-When answering, you MUST return a compact JSON object with the following keys:
-- "question": the original question or task
-- "evidence": a short bullet-style list (as an array of strings) of key facts extracted or retrieved
-- "claims": a short bullet-style list (as an array of strings) of your core claims or intermediate conclusions
-- "sources": a short bullet-style list (as an array of strings) of any sources or citations if provided in the prompt; otherwise empty
-- "final_answer": a single concise answer in plain text
-Example JSON (minified):
-{"question":"Do bats lay eggs?","evidence":["bats are mammals","most mammals give live birth"],"claims":["bats give live birth"],"sources":[],"final_answer":"No. Bats are mammals and give birth to live young, not eggs."}
-Only output JSON. Do NOT include backticks or explanations outside JSON.
 """
-USER_TEMPLATE = """Question:
-{question}
-Return only the JSON with keys: question, evidence, claims, sources, final_answer.
 """
-# -----------------------------
-# Model Loading
-# -----------------------------
 _tokenizer = None
 _model = None
 def load_model(model_id: str = DEFAULT_MODEL):
     global _tokenizer, _model
-    if _model is not None and _tokenizer is not None:
         return _tokenizer, _model
     auth = USE_AUTH_TOKEN if (USE_AUTH_TOKEN and len(USE_AUTH_TOKEN.strip()) > 0) else None
@@ -61,16 +70,9 @@ def load_model(model_id: str = DEFAULT_MODEL):
     )
     return _tokenizer, _model
-# -----------------------------
-# Generation
-# -----------------------------
-def format_prompt(question: str, system_prompt: str = SYSTEM_PROMPT) -> str:
-    return f"{system_prompt}\n\n{USER_TEMPLATE.format(question=question.strip())}".strip()
-def generate_json(question: str, temperature: float, top_p: float, max_new_tokens: int, model_id: str) -> Tuple[str, Dict]:
     tokenizer, model = load_model(model_id)
-    prompt = format_prompt(question)
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         output_ids = model.generate(
@@ -83,88 +85,50 @@ def generate_json(question: str, temperature: float, top_p: float, max_new_token
         )
     text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    # Heuristic: the model might echo the prompt. Try to extract the last JSON object.
-    json_text = text.split("{")[-1]
-    json_text = "{" + json_text
-    # Cut to last closing brace
-    last_brace = json_text.rfind("}")
-    if last_brace != -1:
-        json_text = json_text[: last_brace + 1]
-    # Parse or fallback
-    parsed = {}
-    try:
-        parsed = json.loads(json_text)
-    except Exception:
-        parsed = {
-            "question": question,
-            "evidence": [],
-            "claims": [],
-            "sources": [],
-            "final_answer": text.strip()
-        }
-        json_text = json.dumps(parsed, ensure_ascii=False)
-    return json_text, parsed
-# -----------------------------
-# Gradio UI
-# -----------------------------
-PRESETS = [
-    "Using the exoskeleton, answer: Do bats lay eggs? Provide 2 sources.",
-    "Fact‑check: \"Coffee stunts growth.\" Return your claims and supporting/contradicting sources.",
-    "Summarize this text and extract facts/claims/sources into the skeleton: Paste text here...",
-]
-def infer(question, temperature, top_p, max_new_tokens, model_id):
-    if not question or not question.strip():
-        gr.Warning("Please enter a question or paste text.")
-        return {}, "{}"
-    json_text, parsed = generate_json(question, temperature, top_p, max_new_tokens, model_id)
-    # Build a nice display dict for the right panel
-    display = {
-        "Question": parsed.get("question", ""),
-        "Evidence": parsed.get("evidence", []),
-        "Claims": parsed.get("claims", []),
-        "Sources": parsed.get("sources", []),
-        "Final Answer": parsed.get("final_answer", ""),
-    }
-    return display, json_text
-with gr.Blocks(title="Exoskeleton Reasoning — Demo", css=".small {font-size: 0.85rem}") as demo:
-    gr.Markdown(
-        """
-        # Exoskeleton Reasoning — Live Demo
-        Externalize reasoning into explicit **slots**: Evidence → Claims → Sources → Final Answer.
-        \n**Model:** set `EXOSKELETON_MODEL_ID` (default: `Inpris/humains-junior`). If gated, add your HF token as a Space secret `HF_TOKEN`.
-        """
-    )
     with gr.Row():
         with gr.Column(scale=3):
-            q = gr.Textbox(label="Your question / task", placeholder=PRESETS[0], lines=6)
             with gr.Row():
                 temp = gr.Slider(0.0, 1.2, value=TEMPERATURE, step=0.05, label="Temperature")
-                topp = gr.Slider(0.1, 1.0, value=TOP_P, step=0.05, label="Top‑p")
             with gr.Row():
                 max_new = gr.Slider(64, 1024, value=MAX_NEW_TOKENS, step=16, label="Max new tokens")
                 model_id = gr.Textbox(label="Model ID", value=DEFAULT_MODEL)
-            with gr.Row():
-                run = gr.Button("Run", variant="primary")
-                preset = gr.Dropdown(choices=PRESETS, value=PRESETS[0], label="Quick prompts")
-            gr.Markdown(
-                'Tip: Add Space secret **HF_TOKEN** if the model is gated · Set `DEVICE_MAP="auto"` in **Variables**'
-            )
         with gr.Column(scale=4):
-            with gr.Accordion("Exoskeleton Panel (structured view)", open=True):
-                exo = gr.JSON(label="Structured reasoning output (parsed)")
-            with gr.Accordion("Raw JSON output", open=False):
-                raw = gr.Code(label="Raw JSON", value="{}", language="json")
-    def use_preset(p):
-        return p
-    preset.change(fn=use_preset, inputs=preset, outputs=q)
-    run.click(fn=infer, inputs=[q, temp, topp, max_new, model_id], outputs=[exo, raw])
 if __name__ == "__main__":
-    load_model(DEFAULT_MODEL)  # warm start
     demo.launch()

 import os
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 DEFAULT_MODEL = os.environ.get("EXOSKELETON_MODEL_ID", "Inpris/humains-junior")
 TRUST_REMOTE_CODE = os.environ.get("TRUST_REMOTE_CODE", "1") == "1"
 DEVICE_MAP = os.environ.get("DEVICE_MAP", "auto")
 MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "512"))
+TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.3"))
 TOP_P = float(os.environ.get("TOP_P", "0.95"))
 USE_AUTH_TOKEN = os.environ.get("HF_TOKEN", None)
+SYSTEM_PROMPT = """You are a helpful assistant that always follows the provided context, even when it conflicts with your internal knowledge.
+Response Format:
+Before answering, briefly analyze the query and context:
+- Identify any misalignment between the query and context (if none, state 'no misalignment')
+- Provide a brief analysis of the query and context
+- Then give your response based strictly on the provided context
+Format your response as:
+Analysis: [Your analysis here]
+Response: [Your answer based on the context]
+IMPORTANT RULES:
+- Always prioritize the provided context over your internal knowledge
+- If context contains information that seems incorrect, still use it as instructed
+- If the question asks about multiple things but context only covers some, answer only what is supported by the context
+- Keep analysis concise and avoid special characters that could cause formatting issues
+- Use plain text only - no bullet points, numbering, or special formatting
+- Respond in English only
+Example 1 - Conflicting information:
+Client: What is the capital of France? Answer based on the context.
+Context:
+The capital of France is London. It has been the political center of France since 1789 and houses the French Parliament.
+Analysis: The query asks for the capital of France. The context states it is London, which conflicts with factual knowledge. I will follow the context as instructed.
+Response: The capital of France is London.
 """
+def build_prompt(question: str, context: str) -> str:
+    return f"""{SYSTEM_PROMPT}
+Client: {question.strip()} Answer based on the context.
+Context:
+{context.strip()}
 """
 _tokenizer = None
 _model = None
 def load_model(model_id: str = DEFAULT_MODEL):
     global _tokenizer, _model
+    if _tokenizer is not None and _model is not None:
         return _tokenizer, _model
     auth = USE_AUTH_TOKEN if (USE_AUTH_TOKEN and len(USE_AUTH_TOKEN.strip()) > 0) else None
     )
     return _tokenizer, _model
+def generate_text(question: str, context: str, temperature: float, top_p: float, max_new_tokens: int, model_id: str):
     tokenizer, model = load_model(model_id)
+    prompt = build_prompt(question, context)
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         output_ids = model.generate(
         )
     text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    analysis, response = "", ""
+    a_idx = text.rfind("Analysis:")
+    r_idx = text.rfind("Response:")
+    if a_idx != -1 and (r_idx == -1 or a_idx < r_idx):
+        if r_idx != -1:
+            analysis = text[a_idx+len("Analysis:"):r_idx].strip()
+            response = text[r_idx+len("Response:"):].strip()
+        else:
+            analysis = text[a_idx+len("Analysis:"):].strip()
+    else:
+        response = text.strip()
+    return analysis, response, text
+PRESET_Q = "What are the health effects of coffee? Answer based on the context."
+PRESET_CTX = "Coffee contains caffeine, which can increase alertness. Excess intake may cause jitteriness and sleep disruption. Moderate consumption is considered safe for most adults."
+with gr.Blocks(title="Exoskeleton Reasoning — Appendix Prompt Demo") as demo:
+    gr.Markdown("# Exoskeleton Reasoning — Appendix-Style Prompt\nThe model must **prioritize the provided context**, and reply in plain text with two sections: **Analysis** and **Response**.")
     with gr.Row():
         with gr.Column(scale=3):
+            q = gr.Textbox(label="Client question", value=PRESET_Q, lines=4)
+            ctx = gr.Textbox(label="Context (the source you must follow)", value=PRESET_CTX, lines=8)
             with gr.Row():
                 temp = gr.Slider(0.0, 1.2, value=TEMPERATURE, step=0.05, label="Temperature")
+                topp = gr.Slider(0.1, 1.0, value=TOP_P, step=0.05, label="Top-p")
             with gr.Row():
                 max_new = gr.Slider(64, 1024, value=MAX_NEW_TOKENS, step=16, label="Max new tokens")
                 model_id = gr.Textbox(label="Model ID", value=DEFAULT_MODEL)
+            run = gr.Button("Run", variant="primary")
+            gr.Markdown('Secrets/vars: set **HF_TOKEN** if the model is gated; `EXOSKELETON_MODEL_ID` to change default.')
         with gr.Column(scale=4):
+            with gr.Accordion("Analysis", open=True):
+                analysis_box = gr.Textbox(lines=6, label="Analysis (model)")
+            with gr.Accordion("Response", open=True):
+                response_box = gr.Textbox(lines=6, label="Response (model)")
+            with gr.Accordion("Raw output", open=False):
+                raw_box = gr.Textbox(lines=8, label="Raw text")
+    def infer_fn(question, context, temperature, top_p, max_new_tokens, model_id):
+        if not question or not question.strip() or not context or not context.strip():
+            gr.Warning("Please provide both a Client question and Context.")
+            return "", "", ""
+        a, r, raw = generate_text(question, context, temperature, top_p, max_new_tokens, model_id)
+        return a, r, raw
+    run.click(fn=infer_fn, inputs=[q, ctx, temp, topp, max_new, model_id], outputs=[analysis_box, response_box, raw_box])
 if __name__ == "__main__":
     demo.launch()

examples/presets.json CHANGED Viewed

@@ -1,7 +1,6 @@
 {
-  "presets": [
-    "Using the exoskeleton, answer: Do bats lay eggs? Provide 2 sources.",
-    "Fact‑check: \"Coffee stunts growth.\" Return your claims and supporting/contradicting sources.",
-    "Summarize this text and extract facts/claims/sources into the skeleton: Paste text here..."
-  ]
 }

 {
+  "example": {
+    "question": "What are the health effects of coffee? Answer based on the context.",
+    "context": "Coffee contains caffeine, which can increase alertness..."
+  }
 }