vicgalle-gpt2-open-instruct-v1

Sleeping

App Files Files Community

kairusama commited on Oct 13

Commit

24ddd2a

verified ·

1 Parent(s): 75ea43c

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -15

app.py CHANGED Viewed

@@ -1,13 +1,31 @@
 # app.py
 import gradio as gr
-from transformers import pipeline
 # ---- Load model via pipeline ----
 MODEL_NAME = "vicgalle/gpt2-open-instruct-v1"
 pipe = pipeline("text-generation", model=MODEL_NAME, device_map="auto")
 # ---- Inference function ----
-def generate_response(instruction, max_new_tokens=150, temperature=0.7, top_k=50, top_p=0.9):
     system_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
 ### Instruction:
@@ -15,34 +33,48 @@ def generate_response(instruction, max_new_tokens=150, temperature=0.7, top_k=50
 ### Response:
 """
-    output = pipe(
         system_prompt,
-        max_new_tokens=max_new_tokens,
-        temperature=temperature,
-        top_k=top_k,
-        top_p=top_p,
         do_sample=True,
         pad_token_id=pipe.tokenizer.eos_token_id,
     )
-    # Clean up output text
-    text = output[0]["generated_text"]
-    return text.split("### Response:")[-1].strip()
 # ---- Gradio UI ----
 with gr.Blocks() as demo:
-    gr.Markdown("# 🛸 GPT-2 Open Instruct Playground\nType an instruction and let the alien respond!")
     with gr.Row():
-        with gr.Column(scale=3):
-            instruction = gr.Textbox(label="Instruction", placeholder="Pretend you are an alien visiting Earth...", lines=6)
             max_new_tokens = gr.Slider(50, 500, value=150, step=10, label="Max new tokens")
             temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="Temperature")
             top_k = gr.Slider(10, 100, value=50, step=5, label="Top-K sampling")
             top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P (nucleus) sampling")
             generate_btn = gr.Button("Generate ✨")
-        with gr.Column(scale=2):
-            output_box = gr.Textbox(label="Model Output", lines=10)
     generate_btn.click(generate_response, [instruction, max_new_tokens, temperature, top_k, top_p], output_box)
 # ---- Launch ----
 if __name__ == "__main__":
     demo.launch()

 # app.py
 import gradio as gr
+import torch
+from transformers import pipeline, StoppingCriteria, StoppingCriteriaList
 # ---- Load model via pipeline ----
 MODEL_NAME = "vicgalle/gpt2-open-instruct-v1"
+class StopOnStrings(StoppingCriteria):
+    def __init__(self, stop_ids, window=10):
+        super().__init__()
+        self.stop_ids = stop_ids
+        self.window = window
+    def __call__(self, input_ids, scores, **kwargs):
+        # Stop if the recent tokens match any stop sequence
+        for stop in self.stop_ids:
+            if len(input_ids[0]) >= len(stop):
+                if torch.equal(input_ids[0][-len(stop):], stop):
+                    return True
+        return False
 pipe = pipeline("text-generation", model=MODEL_NAME, device_map="auto")
 # ---- Inference function ----
+def generate_response(instruction,
+                      max_new_tokens=150,
+                      temperature=0.7,
+                      top_p=0.9):
     system_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
 ### Instruction:
 ### Response:
 """
+    # Build stop ids for "### End"
+    stop_text = "### End"
+    stop_ids = pipe.tokenizer(stop_text, add_special_tokens=False, return_tensors="pt")["input_ids"][0]
+    stopping = StoppingCriteriaList([StopOnStrings([stop_ids])])
+    out = pipe(
         system_prompt,
         do_sample=True,
+        temperature=temperature,
+        top_p=top_p,          # prefer one: top_p OR top_k
+        # top_k=50,           # leave this off when using top_p
+        max_new_tokens=max_new_tokens,
+        no_repeat_ngram_size=3,
+        repetition_penalty=1.15,
+        eos_token_id=pipe.tokenizer.eos_token_id,
         pad_token_id=pipe.tokenizer.eos_token_id,
+        return_full_text=False,          # don't echo the prompt
+        stopping_criteria=stopping,
     )
+    text = out[0]["generated_text"]
+    # Hard stop as a second line of defense
+    text = text.split(stop_text)[0].strip()
+    return text
 # ---- Gradio UI ----
 with gr.Blocks() as demo:
+    gr.Markdown("# 🛸 GPT-2 Open Instruct Playground\nThe original GPT-2 fine-tuned with Open Instruct v1.")
     with gr.Row():
+        with gr.Column(scale=4):
+            instruction = gr.Textbox(label="Instruction", value="What is the capital city of France?", lines=6)
+            output_box = gr.Textbox(label="Model Output", lines=25)
+        with gr.Column(scale=1):
             max_new_tokens = gr.Slider(50, 500, value=150, step=10, label="Max new tokens")
             temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="Temperature")
             top_k = gr.Slider(10, 100, value=50, step=5, label="Top-K sampling")
             top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P (nucleus) sampling")
             generate_btn = gr.Button("Generate ✨")
     generate_btn.click(generate_response, [instruction, max_new_tokens, temperature, top_k, top_p], output_box)
 # ---- Launch ----
 if __name__ == "__main__":
     demo.launch()