Spaces:

Inpris
/

Humains-Junior

Sleeping

App Files Files Community

NS-Y commited on Nov 2, 2025

Commit

7d02d76

verified ·

1 Parent(s): 5ddd643

Upload 4 files

Browse files

Files changed (4) hide show

README.md +23 -14
app.py +170 -0
examples/presets.json +7 -0
requirements.txt +5 -0

README.md CHANGED Viewed

@@ -1,14 +1,23 @@
----
-title: Humains Junior
-emoji: 🦀
-colorFrom: indigo
-colorTo: blue
-sdk: gradio
-sdk_version: 5.49.1
-app_file: app.py
-pinned: false
-license: cc-by-nc-sa-4.0
-short_description: A small model for factual grounding
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Exoskeleton Reasoning — Hugging Face Space
+A zero-setup Gradio demo for **Exoskeleton Reasoning**. It asks the model to return a compact JSON with slots:
+`question, evidence, claims, sources, final_answer` and renders a structured panel + the raw JSON.
+## Try locally
+```bash
+pip install -r requirements.txt
+export EXOSKELETON_MODEL_ID=Inpris/humains-junior   # or another compatible instruct model
+export HF_TOKEN=hf_xxx  # if the model is gated
+python app.py
+```
+## Space secrets / variables
+- **HF_TOKEN**: (Secret) your access token if the model is gated.
+- **EXOSKELETON_MODEL_ID**: (Variable) defaults to `Inpris/humains-junior`.
+- **DEVICE_MAP**: (Variable) defaults to `auto`. Set to `cuda` on GPU Spaces.
+- **MAX_NEW_TOKENS**: (Variable) default 512.
+- **TEMPERATURE**, **TOP_P**: sampling controls.
+## Notes
+- On free CPU Spaces, first token latency can be high. Consider enabling GPU or using a quantized checkpoint.
+- If JSON parsing fails (some models may add prose), the app falls back to showing the raw text as the final answer.

app.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import os
+import json
+import time
+from typing import List, Tuple, Dict, Optional
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
+import gradio as gr
+# -----------------------------
+# Config
+# -----------------------------
+DEFAULT_MODEL = os.environ.get("EXOSKELETON_MODEL_ID", "Inpris/humains-junior")
+TRUST_REMOTE_CODE = os.environ.get("TRUST_REMOTE_CODE", "1") == "1"
+DEVICE_MAP = os.environ.get("DEVICE_MAP", "auto")
+MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "512"))
+TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.4"))
+TOP_P = float(os.environ.get("TOP_P", "0.95"))
+USE_AUTH_TOKEN = os.environ.get("HF_TOKEN", None)
+SYSTEM_PROMPT = """You are Exoskeleton, a method that externalizes reasoning into explicit slots.
+When answering, you MUST return a compact JSON object with the following keys:
+- "question": the original question or task
+- "evidence": a short bullet-style list (as an array of strings) of key facts extracted or retrieved
+- "claims": a short bullet-style list (as an array of strings) of your core claims or intermediate conclusions
+- "sources": a short bullet-style list (as an array of strings) of any sources or citations if provided in the prompt; otherwise empty
+- "final_answer": a single concise answer in plain text
+Example JSON (minified):
+{"question":"Do bats lay eggs?","evidence":["bats are mammals","most mammals give live birth"],"claims":["bats give live birth"],"sources":[],"final_answer":"No. Bats are mammals and give birth to live young, not eggs."}
+Only output JSON. Do NOT include backticks or explanations outside JSON.
+"""
+USER_TEMPLATE = """Question:
+{question}
+Return only the JSON with keys: question, evidence, claims, sources, final_answer.
+"""
+# -----------------------------
+# Model Loading
+# -----------------------------
+_tokenizer = None
+_model = None
+def load_model(model_id: str = DEFAULT_MODEL):
+    global _tokenizer, _model
+    if _model is not None and _tokenizer is not None:
+        return _tokenizer, _model
+    auth = USE_AUTH_TOKEN if (USE_AUTH_TOKEN and len(USE_AUTH_TOKEN.strip()) > 0) else None
+    _tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=auth, trust_remote_code=TRUST_REMOTE_CODE)
+    _model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        device_map=DEVICE_MAP,
+        use_auth_token=auth,
+        trust_remote_code=TRUST_REMOTE_CODE,
+    )
+    return _tokenizer, _model
+# -----------------------------
+# Generation
+# -----------------------------
+def format_prompt(question: str, system_prompt: str = SYSTEM_PROMPT) -> str:
+    return f"{system_prompt}\n\n{USER_TEMPLATE.format(question=question.strip())}".strip()
+def generate_json(question: str, temperature: float, top_p: float, max_new_tokens: int, model_id: str) -> Tuple[str, Dict]:
+    tokenizer, model = load_model(model_id)
+    prompt = format_prompt(question)
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        output_ids = model.generate(
+            **inputs,
+            do_sample=True if temperature > 0 else False,
+            temperature=temperature,
+            top_p=top_p,
+            max_new_tokens=max_new_tokens,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+    text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    # Heuristic: the model might echo the prompt. Try to extract the last JSON object.
+    json_text = text.split("{")[-1]
+    json_text = "{" + json_text
+    # Cut to last closing brace
+    last_brace = json_text.rfind("}")
+    if last_brace != -1:
+        json_text = json_text[: last_brace + 1]
+    # Parse or fallback
+    parsed = {}
+    try:
+        parsed = json.loads(json_text)
+    except Exception:
+        parsed = {
+            "question": question,
+            "evidence": [],
+            "claims": [],
+            "sources": [],
+            "final_answer": text.strip()
+        }
+        json_text = json.dumps(parsed, ensure_ascii=False)
+    return json_text, parsed
+# -----------------------------
+# Gradio UI
+# -----------------------------
+PRESETS = [
+    "Using the exoskeleton, answer: Do bats lay eggs? Provide 2 sources.",
+    "Fact‑check: \"Coffee stunts growth.\" Return your claims and supporting/contradicting sources.",
+    "Summarize this text and extract facts/claims/sources into the skeleton: Paste text here...",
+]
+def infer(question, temperature, top_p, max_new_tokens, model_id):
+    if not question or not question.strip():
+        gr.Warning("Please enter a question or paste text.")
+        return {}, "{}"
+    json_text, parsed = generate_json(question, temperature, top_p, max_new_tokens, model_id)
+    # Build a nice display dict for the right panel
+    display = {
+        "Question": parsed.get("question", ""),
+        "Evidence": parsed.get("evidence", []),
+        "Claims": parsed.get("claims", []),
+        "Sources": parsed.get("sources", []),
+        "Final Answer": parsed.get("final_answer", ""),
+    }
+    return display, json_text
+with gr.Blocks(title="Exoskeleton Reasoning — Demo", css=".small {font-size: 0.85rem}") as demo:
+    gr.Markdown(
+        """
+        # Exoskeleton Reasoning — Live Demo
+        Externalize reasoning into explicit **slots**: Evidence → Claims → Sources → Final Answer.
+        \n**Model:** set `EXOSKELETON_MODEL_ID` (default: `Inpris/humains-junior`). If gated, add your HF token as a Space secret `HF_TOKEN`.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=3):
+            q = gr.Textbox(label="Your question / task", placeholder=PRESETS[0], lines=6)
+            with gr.Row():
+                temp = gr.Slider(0.0, 1.2, value=TEMPERATURE, step=0.05, label="Temperature")
+                topp = gr.Slider(0.1, 1.0, value=TOP_P, step=0.05, label="Top‑p")
+            with gr.Row():
+                max_new = gr.Slider(64, 1024, value=MAX_NEW_TOKENS, step=16, label="Max new tokens")
+                model_id = gr.Textbox(label="Model ID", value=DEFAULT_MODEL)
+            with gr.Row():
+                run = gr.Button("Run", variant="primary")
+                preset = gr.Dropdown(choices=PRESETS, value=PRESETS[0], label="Quick prompts")
+            gr.Markdown(
+                'Tip: Add Space secret **HF_TOKEN** if the model is gated · Set `DEVICE_MAP="auto"` in **Variables**'
+            )
+        with gr.Column(scale=4):
+            with gr.Accordion("Exoskeleton Panel (structured view)", open=True):
+                exo = gr.JSON(label="Structured reasoning output (parsed)")
+            with gr.Accordion("Raw JSON output", open=False):
+                raw = gr.Code(label="Raw JSON", value="{}", language="json")
+    def use_preset(p):
+        return p
+    preset.change(fn=use_preset, inputs=preset, outputs=q)
+    run.click(fn=infer, inputs=[q, temp, topp, max_new, model_id], outputs=[exo, raw])
+if __name__ == "__main__":
+    load_model(DEFAULT_MODEL)  # warm start
+    demo.launch()

examples/presets.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "presets": [
+    "Using the exoskeleton, answer: Do bats lay eggs? Provide 2 sources.",
+    "Fact‑check: \"Coffee stunts growth.\" Return your claims and supporting/contradicting sources.",
+    "Summarize this text and extract facts/claims/sources into the skeleton: Paste text here..."
+  ]
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio>=4.44.0
+transformers>=4.44.0
+accelerate>=0.33.0
+torch
+sentencepiece