NS-Y commited on
Commit
7d02d76
·
verified ·
1 Parent(s): 5ddd643

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +23 -14
  2. app.py +170 -0
  3. examples/presets.json +7 -0
  4. requirements.txt +5 -0
README.md CHANGED
@@ -1,14 +1,23 @@
1
- ---
2
- title: Humains Junior
3
- emoji: 🦀
4
- colorFrom: indigo
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 5.49.1
8
- app_file: app.py
9
- pinned: false
10
- license: cc-by-nc-sa-4.0
11
- short_description: A small model for factual grounding
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
1
+ # Exoskeleton Reasoning — Hugging Face Space
2
+
3
+ A zero-setup Gradio demo for **Exoskeleton Reasoning**. It asks the model to return a compact JSON with slots:
4
+ `question, evidence, claims, sources, final_answer` and renders a structured panel + the raw JSON.
5
+
6
+ ## Try locally
7
+ ```bash
8
+ pip install -r requirements.txt
9
+ export EXOSKELETON_MODEL_ID=Inpris/humains-junior # or another compatible instruct model
10
+ export HF_TOKEN=hf_xxx # if the model is gated
11
+ python app.py
12
+ ```
13
+
14
+ ## Space secrets / variables
15
+ - **HF_TOKEN**: (Secret) your access token if the model is gated.
16
+ - **EXOSKELETON_MODEL_ID**: (Variable) defaults to `Inpris/humains-junior`.
17
+ - **DEVICE_MAP**: (Variable) defaults to `auto`. Set to `cuda` on GPU Spaces.
18
+ - **MAX_NEW_TOKENS**: (Variable) default 512.
19
+ - **TEMPERATURE**, **TOP_P**: sampling controls.
20
+
21
+ ## Notes
22
+ - On free CPU Spaces, first token latency can be high. Consider enabling GPU or using a quantized checkpoint.
23
+ - If JSON parsing fails (some models may add prose), the app falls back to showing the raw text as the final answer.
app.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import json
4
+ import time
5
+ from typing import List, Tuple, Dict, Optional
6
+
7
+ import torch
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
9
+ import gradio as gr
10
+
11
+ # -----------------------------
12
+ # Config
13
+ # -----------------------------
14
+ DEFAULT_MODEL = os.environ.get("EXOSKELETON_MODEL_ID", "Inpris/humains-junior")
15
+ TRUST_REMOTE_CODE = os.environ.get("TRUST_REMOTE_CODE", "1") == "1"
16
+ DEVICE_MAP = os.environ.get("DEVICE_MAP", "auto")
17
+ MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "512"))
18
+ TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.4"))
19
+ TOP_P = float(os.environ.get("TOP_P", "0.95"))
20
+ USE_AUTH_TOKEN = os.environ.get("HF_TOKEN", None)
21
+
22
+ SYSTEM_PROMPT = """You are Exoskeleton, a method that externalizes reasoning into explicit slots.
23
+ When answering, you MUST return a compact JSON object with the following keys:
24
+ - "question": the original question or task
25
+ - "evidence": a short bullet-style list (as an array of strings) of key facts extracted or retrieved
26
+ - "claims": a short bullet-style list (as an array of strings) of your core claims or intermediate conclusions
27
+ - "sources": a short bullet-style list (as an array of strings) of any sources or citations if provided in the prompt; otherwise empty
28
+ - "final_answer": a single concise answer in plain text
29
+
30
+ Example JSON (minified):
31
+ {"question":"Do bats lay eggs?","evidence":["bats are mammals","most mammals give live birth"],"claims":["bats give live birth"],"sources":[],"final_answer":"No. Bats are mammals and give birth to live young, not eggs."}
32
+
33
+ Only output JSON. Do NOT include backticks or explanations outside JSON.
34
+ """
35
+
36
+ USER_TEMPLATE = """Question:
37
+ {question}
38
+
39
+ Return only the JSON with keys: question, evidence, claims, sources, final_answer.
40
+ """
41
+
42
+ # -----------------------------
43
+ # Model Loading
44
+ # -----------------------------
45
+ _tokenizer = None
46
+ _model = None
47
+
48
+ def load_model(model_id: str = DEFAULT_MODEL):
49
+ global _tokenizer, _model
50
+ if _model is not None and _tokenizer is not None:
51
+ return _tokenizer, _model
52
+
53
+ auth = USE_AUTH_TOKEN if (USE_AUTH_TOKEN and len(USE_AUTH_TOKEN.strip()) > 0) else None
54
+ _tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=auth, trust_remote_code=TRUST_REMOTE_CODE)
55
+ _model = AutoModelForCausalLM.from_pretrained(
56
+ model_id,
57
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
58
+ device_map=DEVICE_MAP,
59
+ use_auth_token=auth,
60
+ trust_remote_code=TRUST_REMOTE_CODE,
61
+ )
62
+ return _tokenizer, _model
63
+
64
+ # -----------------------------
65
+ # Generation
66
+ # -----------------------------
67
+ def format_prompt(question: str, system_prompt: str = SYSTEM_PROMPT) -> str:
68
+ return f"{system_prompt}\n\n{USER_TEMPLATE.format(question=question.strip())}".strip()
69
+
70
+ def generate_json(question: str, temperature: float, top_p: float, max_new_tokens: int, model_id: str) -> Tuple[str, Dict]:
71
+ tokenizer, model = load_model(model_id)
72
+ prompt = format_prompt(question)
73
+
74
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
75
+ with torch.no_grad():
76
+ output_ids = model.generate(
77
+ **inputs,
78
+ do_sample=True if temperature > 0 else False,
79
+ temperature=temperature,
80
+ top_p=top_p,
81
+ max_new_tokens=max_new_tokens,
82
+ pad_token_id=tokenizer.eos_token_id,
83
+ )
84
+ text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
85
+
86
+ # Heuristic: the model might echo the prompt. Try to extract the last JSON object.
87
+ json_text = text.split("{")[-1]
88
+ json_text = "{" + json_text
89
+ # Cut to last closing brace
90
+ last_brace = json_text.rfind("}")
91
+ if last_brace != -1:
92
+ json_text = json_text[: last_brace + 1]
93
+
94
+ # Parse or fallback
95
+ parsed = {}
96
+ try:
97
+ parsed = json.loads(json_text)
98
+ except Exception:
99
+ parsed = {
100
+ "question": question,
101
+ "evidence": [],
102
+ "claims": [],
103
+ "sources": [],
104
+ "final_answer": text.strip()
105
+ }
106
+ json_text = json.dumps(parsed, ensure_ascii=False)
107
+
108
+ return json_text, parsed
109
+
110
+ # -----------------------------
111
+ # Gradio UI
112
+ # -----------------------------
113
+ PRESETS = [
114
+ "Using the exoskeleton, answer: Do bats lay eggs? Provide 2 sources.",
115
+ "Fact‑check: \"Coffee stunts growth.\" Return your claims and supporting/contradicting sources.",
116
+ "Summarize this text and extract facts/claims/sources into the skeleton: Paste text here...",
117
+ ]
118
+
119
+ def infer(question, temperature, top_p, max_new_tokens, model_id):
120
+ if not question or not question.strip():
121
+ gr.Warning("Please enter a question or paste text.")
122
+ return {}, "{}"
123
+ json_text, parsed = generate_json(question, temperature, top_p, max_new_tokens, model_id)
124
+
125
+ # Build a nice display dict for the right panel
126
+ display = {
127
+ "Question": parsed.get("question", ""),
128
+ "Evidence": parsed.get("evidence", []),
129
+ "Claims": parsed.get("claims", []),
130
+ "Sources": parsed.get("sources", []),
131
+ "Final Answer": parsed.get("final_answer", ""),
132
+ }
133
+ return display, json_text
134
+
135
+ with gr.Blocks(title="Exoskeleton Reasoning — Demo", css=".small {font-size: 0.85rem}") as demo:
136
+ gr.Markdown(
137
+ """
138
+ # Exoskeleton Reasoning — Live Demo
139
+ Externalize reasoning into explicit **slots**: Evidence → Claims → Sources → Final Answer.
140
+ \n**Model:** set `EXOSKELETON_MODEL_ID` (default: `Inpris/humains-junior`). If gated, add your HF token as a Space secret `HF_TOKEN`.
141
+ """
142
+ )
143
+ with gr.Row():
144
+ with gr.Column(scale=3):
145
+ q = gr.Textbox(label="Your question / task", placeholder=PRESETS[0], lines=6)
146
+ with gr.Row():
147
+ temp = gr.Slider(0.0, 1.2, value=TEMPERATURE, step=0.05, label="Temperature")
148
+ topp = gr.Slider(0.1, 1.0, value=TOP_P, step=0.05, label="Top‑p")
149
+ with gr.Row():
150
+ max_new = gr.Slider(64, 1024, value=MAX_NEW_TOKENS, step=16, label="Max new tokens")
151
+ model_id = gr.Textbox(label="Model ID", value=DEFAULT_MODEL)
152
+ with gr.Row():
153
+ run = gr.Button("Run", variant="primary")
154
+ preset = gr.Dropdown(choices=PRESETS, value=PRESETS[0], label="Quick prompts")
155
+ gr.Markdown(
156
+ 'Tip: Add Space secret **HF_TOKEN** if the model is gated · Set `DEVICE_MAP="auto"` in **Variables**'
157
+ )
158
+ with gr.Column(scale=4):
159
+ with gr.Accordion("Exoskeleton Panel (structured view)", open=True):
160
+ exo = gr.JSON(label="Structured reasoning output (parsed)")
161
+ with gr.Accordion("Raw JSON output", open=False):
162
+ raw = gr.Code(label="Raw JSON", value="{}", language="json")
163
+ def use_preset(p):
164
+ return p
165
+ preset.change(fn=use_preset, inputs=preset, outputs=q)
166
+ run.click(fn=infer, inputs=[q, temp, topp, max_new, model_id], outputs=[exo, raw])
167
+
168
+ if __name__ == "__main__":
169
+ load_model(DEFAULT_MODEL) # warm start
170
+ demo.launch()
examples/presets.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "presets": [
3
+ "Using the exoskeleton, answer: Do bats lay eggs? Provide 2 sources.",
4
+ "Fact‑check: \"Coffee stunts growth.\" Return your claims and supporting/contradicting sources.",
5
+ "Summarize this text and extract facts/claims/sources into the skeleton: Paste text here..."
6
+ ]
7
+ }
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio>=4.44.0
2
+ transformers>=4.44.0
3
+ accelerate>=0.33.0
4
+ torch
5
+ sentencepiece