NS-Y commited on
Commit
0de4069
·
verified ·
1 Parent(s): a42f59c

Upload 4 files

Browse files
Files changed (3) hide show
  1. README.md +19 -2
  2. app.py +73 -109
  3. examples/presets.json +4 -5
README.md CHANGED
@@ -4,7 +4,24 @@ emoji: 🦴
4
  colorFrom: indigo
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.49.1
8
  app_file: app.py
9
  pinned: false
10
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  colorFrom: indigo
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: "4.44.0"
8
  app_file: app.py
9
  pinned: false
10
+ ---
11
+
12
+ A Gradio Space that applies the Appendix-style prompt: the model must prioritize the given *Context* and answer in plain text with two sections — **Analysis** and **Response**.
13
+
14
+ **Environment variables (optional)**
15
+ - `EXOSKELETON_MODEL_ID` (default: `Inpris/humains-junior`)
16
+ - `DEVICE_MAP` (default: `auto`)
17
+ - `MAX_NEW_TOKENS` (default: `512`)
18
+ - `TEMPERATURE` (default: `0.3`)
19
+ - `TOP_P` (default: `0.95`)
20
+
21
+ **Secrets**
22
+ - `HF_TOKEN` — required if the model is gated.
23
+
24
+ **Files**
25
+ - `app.py` — Gradio app
26
+ - `requirements.txt` — dependencies
27
+ - `examples/` — (optional) assets/presets
app.py CHANGED
@@ -1,53 +1,62 @@
1
 
2
  import os
3
- import json
4
- import time
5
- from typing import List, Tuple, Dict, Optional
6
-
7
  import torch
8
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
9
  import gradio as gr
10
 
11
- # -----------------------------
12
- # Config
13
- # -----------------------------
14
  DEFAULT_MODEL = os.environ.get("EXOSKELETON_MODEL_ID", "Inpris/humains-junior")
15
  TRUST_REMOTE_CODE = os.environ.get("TRUST_REMOTE_CODE", "1") == "1"
16
  DEVICE_MAP = os.environ.get("DEVICE_MAP", "auto")
17
  MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "512"))
18
- TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.4"))
19
  TOP_P = float(os.environ.get("TOP_P", "0.95"))
20
  USE_AUTH_TOKEN = os.environ.get("HF_TOKEN", None)
21
 
22
- SYSTEM_PROMPT = """You are Exoskeleton, a method that externalizes reasoning into explicit slots.
23
- When answering, you MUST return a compact JSON object with the following keys:
24
- - "question": the original question or task
25
- - "evidence": a short bullet-style list (as an array of strings) of key facts extracted or retrieved
26
- - "claims": a short bullet-style list (as an array of strings) of your core claims or intermediate conclusions
27
- - "sources": a short bullet-style list (as an array of strings) of any sources or citations if provided in the prompt; otherwise empty
28
- - "final_answer": a single concise answer in plain text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- Example JSON (minified):
31
- {"question":"Do bats lay eggs?","evidence":["bats are mammals","most mammals give live birth"],"claims":["bats give live birth"],"sources":[],"final_answer":"No. Bats are mammals and give birth to live young, not eggs."}
32
 
33
- Only output JSON. Do NOT include backticks or explanations outside JSON.
 
34
  """
35
 
36
- USER_TEMPLATE = """Question:
37
- {question}
38
 
39
- Return only the JSON with keys: question, evidence, claims, sources, final_answer.
 
 
 
40
  """
41
 
42
- # -----------------------------
43
- # Model Loading
44
- # -----------------------------
45
  _tokenizer = None
46
  _model = None
47
 
48
  def load_model(model_id: str = DEFAULT_MODEL):
49
  global _tokenizer, _model
50
- if _model is not None and _tokenizer is not None:
51
  return _tokenizer, _model
52
 
53
  auth = USE_AUTH_TOKEN if (USE_AUTH_TOKEN and len(USE_AUTH_TOKEN.strip()) > 0) else None
@@ -61,16 +70,9 @@ def load_model(model_id: str = DEFAULT_MODEL):
61
  )
62
  return _tokenizer, _model
63
 
64
- # -----------------------------
65
- # Generation
66
- # -----------------------------
67
- def format_prompt(question: str, system_prompt: str = SYSTEM_PROMPT) -> str:
68
- return f"{system_prompt}\n\n{USER_TEMPLATE.format(question=question.strip())}".strip()
69
-
70
- def generate_json(question: str, temperature: float, top_p: float, max_new_tokens: int, model_id: str) -> Tuple[str, Dict]:
71
  tokenizer, model = load_model(model_id)
72
- prompt = format_prompt(question)
73
-
74
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
75
  with torch.no_grad():
76
  output_ids = model.generate(
@@ -83,88 +85,50 @@ def generate_json(question: str, temperature: float, top_p: float, max_new_token
83
  )
84
  text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
85
 
86
- # Heuristic: the model might echo the prompt. Try to extract the last JSON object.
87
- json_text = text.split("{")[-1]
88
- json_text = "{" + json_text
89
- # Cut to last closing brace
90
- last_brace = json_text.rfind("}")
91
- if last_brace != -1:
92
- json_text = json_text[: last_brace + 1]
93
-
94
- # Parse or fallback
95
- parsed = {}
96
- try:
97
- parsed = json.loads(json_text)
98
- except Exception:
99
- parsed = {
100
- "question": question,
101
- "evidence": [],
102
- "claims": [],
103
- "sources": [],
104
- "final_answer": text.strip()
105
- }
106
- json_text = json.dumps(parsed, ensure_ascii=False)
107
-
108
- return json_text, parsed
109
-
110
- # -----------------------------
111
- # Gradio UI
112
- # -----------------------------
113
- PRESETS = [
114
- "Using the exoskeleton, answer: Do bats lay eggs? Provide 2 sources.",
115
- "Fact‑check: \"Coffee stunts growth.\" Return your claims and supporting/contradicting sources.",
116
- "Summarize this text and extract facts/claims/sources into the skeleton: Paste text here...",
117
- ]
118
-
119
- def infer(question, temperature, top_p, max_new_tokens, model_id):
120
- if not question or not question.strip():
121
- gr.Warning("Please enter a question or paste text.")
122
- return {}, "{}"
123
- json_text, parsed = generate_json(question, temperature, top_p, max_new_tokens, model_id)
124
-
125
- # Build a nice display dict for the right panel
126
- display = {
127
- "Question": parsed.get("question", ""),
128
- "Evidence": parsed.get("evidence", []),
129
- "Claims": parsed.get("claims", []),
130
- "Sources": parsed.get("sources", []),
131
- "Final Answer": parsed.get("final_answer", ""),
132
- }
133
- return display, json_text
134
-
135
- with gr.Blocks(title="Exoskeleton Reasoning — Demo", css=".small {font-size: 0.85rem}") as demo:
136
- gr.Markdown(
137
- """
138
- # Exoskeleton Reasoning — Live Demo
139
- Externalize reasoning into explicit **slots**: Evidence → Claims → Sources → Final Answer.
140
- \n**Model:** set `EXOSKELETON_MODEL_ID` (default: `Inpris/humains-junior`). If gated, add your HF token as a Space secret `HF_TOKEN`.
141
- """
142
- )
143
  with gr.Row():
144
  with gr.Column(scale=3):
145
- q = gr.Textbox(label="Your question / task", placeholder=PRESETS[0], lines=6)
 
146
  with gr.Row():
147
  temp = gr.Slider(0.0, 1.2, value=TEMPERATURE, step=0.05, label="Temperature")
148
- topp = gr.Slider(0.1, 1.0, value=TOP_P, step=0.05, label="Topp")
149
  with gr.Row():
150
  max_new = gr.Slider(64, 1024, value=MAX_NEW_TOKENS, step=16, label="Max new tokens")
151
  model_id = gr.Textbox(label="Model ID", value=DEFAULT_MODEL)
152
- with gr.Row():
153
- run = gr.Button("Run", variant="primary")
154
- preset = gr.Dropdown(choices=PRESETS, value=PRESETS[0], label="Quick prompts")
155
- gr.Markdown(
156
- 'Tip: Add Space secret **HF_TOKEN** if the model is gated · Set `DEVICE_MAP="auto"` in **Variables**'
157
- )
158
  with gr.Column(scale=4):
159
- with gr.Accordion("Exoskeleton Panel (structured view)", open=True):
160
- exo = gr.JSON(label="Structured reasoning output (parsed)")
161
- with gr.Accordion("Raw JSON output", open=False):
162
- raw = gr.Code(label="Raw JSON", value="{}", language="json")
163
- def use_preset(p):
164
- return p
165
- preset.change(fn=use_preset, inputs=preset, outputs=q)
166
- run.click(fn=infer, inputs=[q, temp, topp, max_new, model_id], outputs=[exo, raw])
 
 
 
 
 
167
 
168
  if __name__ == "__main__":
169
- load_model(DEFAULT_MODEL) # warm start
170
  demo.launch()
 
1
 
2
  import os
 
 
 
 
3
  import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import gradio as gr
6
 
 
 
 
7
  DEFAULT_MODEL = os.environ.get("EXOSKELETON_MODEL_ID", "Inpris/humains-junior")
8
  TRUST_REMOTE_CODE = os.environ.get("TRUST_REMOTE_CODE", "1") == "1"
9
  DEVICE_MAP = os.environ.get("DEVICE_MAP", "auto")
10
  MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "512"))
11
+ TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.3"))
12
  TOP_P = float(os.environ.get("TOP_P", "0.95"))
13
  USE_AUTH_TOKEN = os.environ.get("HF_TOKEN", None)
14
 
15
+ SYSTEM_PROMPT = """You are a helpful assistant that always follows the provided context, even when it conflicts with your internal knowledge.
16
+
17
+ Response Format:
18
+ Before answering, briefly analyze the query and context:
19
+ - Identify any misalignment between the query and context (if none, state 'no misalignment')
20
+ - Provide a brief analysis of the query and context
21
+ - Then give your response based strictly on the provided context
22
+
23
+ Format your response as:
24
+ Analysis: [Your analysis here]
25
+ Response: [Your answer based on the context]
26
+
27
+ IMPORTANT RULES:
28
+ - Always prioritize the provided context over your internal knowledge
29
+ - If context contains information that seems incorrect, still use it as instructed
30
+ - If the question asks about multiple things but context only covers some, answer only what is supported by the context
31
+ - Keep analysis concise and avoid special characters that could cause formatting issues
32
+ - Use plain text only - no bullet points, numbering, or special formatting
33
+ - Respond in English only
34
+
35
+ Example 1 - Conflicting information:
36
+ Client: What is the capital of France? Answer based on the context.
37
 
38
+ Context:
39
+ The capital of France is London. It has been the political center of France since 1789 and houses the French Parliament.
40
 
41
+ Analysis: The query asks for the capital of France. The context states it is London, which conflicts with factual knowledge. I will follow the context as instructed.
42
+ Response: The capital of France is London.
43
  """
44
 
45
+ def build_prompt(question: str, context: str) -> str:
46
+ return f"""{SYSTEM_PROMPT}
47
 
48
+ Client: {question.strip()} Answer based on the context.
49
+
50
+ Context:
51
+ {context.strip()}
52
  """
53
 
 
 
 
54
  _tokenizer = None
55
  _model = None
56
 
57
  def load_model(model_id: str = DEFAULT_MODEL):
58
  global _tokenizer, _model
59
+ if _tokenizer is not None and _model is not None:
60
  return _tokenizer, _model
61
 
62
  auth = USE_AUTH_TOKEN if (USE_AUTH_TOKEN and len(USE_AUTH_TOKEN.strip()) > 0) else None
 
70
  )
71
  return _tokenizer, _model
72
 
73
+ def generate_text(question: str, context: str, temperature: float, top_p: float, max_new_tokens: int, model_id: str):
 
 
 
 
 
 
74
  tokenizer, model = load_model(model_id)
75
+ prompt = build_prompt(question, context)
 
76
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
77
  with torch.no_grad():
78
  output_ids = model.generate(
 
85
  )
86
  text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
87
 
88
+ analysis, response = "", ""
89
+ a_idx = text.rfind("Analysis:")
90
+ r_idx = text.rfind("Response:")
91
+ if a_idx != -1 and (r_idx == -1 or a_idx < r_idx):
92
+ if r_idx != -1:
93
+ analysis = text[a_idx+len("Analysis:"):r_idx].strip()
94
+ response = text[r_idx+len("Response:"):].strip()
95
+ else:
96
+ analysis = text[a_idx+len("Analysis:"):].strip()
97
+ else:
98
+ response = text.strip()
99
+ return analysis, response, text
100
+
101
+ PRESET_Q = "What are the health effects of coffee? Answer based on the context."
102
+ PRESET_CTX = "Coffee contains caffeine, which can increase alertness. Excess intake may cause jitteriness and sleep disruption. Moderate consumption is considered safe for most adults."
103
+
104
+ with gr.Blocks(title="Exoskeleton Reasoning — Appendix Prompt Demo") as demo:
105
+ gr.Markdown("# Exoskeleton Reasoning — Appendix-Style Prompt\nThe model must **prioritize the provided context**, and reply in plain text with two sections: **Analysis** and **Response**.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  with gr.Row():
107
  with gr.Column(scale=3):
108
+ q = gr.Textbox(label="Client question", value=PRESET_Q, lines=4)
109
+ ctx = gr.Textbox(label="Context (the source you must follow)", value=PRESET_CTX, lines=8)
110
  with gr.Row():
111
  temp = gr.Slider(0.0, 1.2, value=TEMPERATURE, step=0.05, label="Temperature")
112
+ topp = gr.Slider(0.1, 1.0, value=TOP_P, step=0.05, label="Top-p")
113
  with gr.Row():
114
  max_new = gr.Slider(64, 1024, value=MAX_NEW_TOKENS, step=16, label="Max new tokens")
115
  model_id = gr.Textbox(label="Model ID", value=DEFAULT_MODEL)
116
+ run = gr.Button("Run", variant="primary")
117
+ gr.Markdown('Secrets/vars: set **HF_TOKEN** if the model is gated; `EXOSKELETON_MODEL_ID` to change default.')
 
 
 
 
118
  with gr.Column(scale=4):
119
+ with gr.Accordion("Analysis", open=True):
120
+ analysis_box = gr.Textbox(lines=6, label="Analysis (model)")
121
+ with gr.Accordion("Response", open=True):
122
+ response_box = gr.Textbox(lines=6, label="Response (model)")
123
+ with gr.Accordion("Raw output", open=False):
124
+ raw_box = gr.Textbox(lines=8, label="Raw text")
125
+ def infer_fn(question, context, temperature, top_p, max_new_tokens, model_id):
126
+ if not question or not question.strip() or not context or not context.strip():
127
+ gr.Warning("Please provide both a Client question and Context.")
128
+ return "", "", ""
129
+ a, r, raw = generate_text(question, context, temperature, top_p, max_new_tokens, model_id)
130
+ return a, r, raw
131
+ run.click(fn=infer_fn, inputs=[q, ctx, temp, topp, max_new, model_id], outputs=[analysis_box, response_box, raw_box])
132
 
133
  if __name__ == "__main__":
 
134
  demo.launch()
examples/presets.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
- "presets": [
3
- "Using the exoskeleton, answer: Do bats lay eggs? Provide 2 sources.",
4
- "Fact‑check: \"Coffee stunts growth.\" Return your claims and supporting/contradicting sources.",
5
- "Summarize this text and extract facts/claims/sources into the skeleton: Paste text here..."
6
- ]
7
  }
 
1
  {
2
+ "example": {
3
+ "question": "What are the health effects of coffee? Answer based on the context.",
4
+ "context": "Coffee contains caffeine, which can increase alertness..."
5
+ }
 
6
  }