4therapy commited on
Commit
fcf4fcb
·
verified ·
1 Parent(s): 9e353f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -95
app.py CHANGED
@@ -1,102 +1,99 @@
1
  import gradio as gr
2
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
-
5
- # --- 1. Model Configuration ---
6
- # Mistral-7B-Instruct-v0.2 is an excellent model for analytical, instruction-following tasks.
7
- # Using 'Auto' classes is robust, and 'device_map="auto"' ensures maximum GPU utilization.
8
- MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
9
-
10
- try:
11
- # Load Model and Tokenizer
12
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
13
- model = AutoModelForCausalLM.from_pretrained(
14
- MODEL_ID,
15
- # Load in 8-bit or 4-bit to save VRAM, crucial for a free/small GPU Space
16
- load_in_8bit=True,
17
- # If 8-bit fails, try using torch.bfloat16 or torch.float16 if your GPU supports it
18
- torch_dtype=torch.float16,
19
- device_map="auto"
20
- )
21
-
22
- # Create the text generation pipeline for ease of use
23
- generator = pipeline(
24
- "text-generation",
25
- model=model,
26
- tokenizer=tokenizer,
27
- device_map="auto"
28
- )
29
-
30
- except Exception as e:
31
- # Fallback for local testing or very limited CPU-only environments
32
- print(f"Error loading large model: {e}. Falling back to 'gpt2' for basic functionality.")
33
- generator = pipeline("text-generation", model="gpt2")
34
- # For a boxing analyst, a small model is highly inaccurate, so deployment in a GPU Space is critical.
35
-
36
- # --- 2. System Prompt for Persona and Accuracy (CRITICAL) ---
37
- # A highly specific and authoritative system prompt forces the model into the desired persona.
38
- SYSTEM_PROMPT = (
39
- "You are BOXTRON-AI, a world-class professional boxing analyst with decades of experience. "
40
- "Your analysis must be objective, highly detailed, and use correct boxing terminology (e.g., in-fighter, outside fighter, high guard, check hook, weight division, KO ratio). "
41
- "Analyze fighter styles, physical attributes, recent performance, and matchup dynamics. "
42
- "Be concise, insightful, and offer a clear, reasoned prediction for the outcome."
43
- )
44
 
45
- # --- 3. Optimized Prediction Function ---
46
- def analyze_fight(user_message, history):
47
- # Construct the full conversation history including the system prompt
48
- messages = [{"role": "system", "content": SYSTEM_PROMPT}]
49
-
50
- # Add previous conversation history
51
- for human, bot in history:
52
- messages.append({"role": "user", "content": human})
53
- messages.append({"role": "assistant", "content": bot})
54
 
55
- # Add the current user message
56
- messages.append({"role": "user", "content": user_message})
57
-
58
- # Apply the model's specific chat template (crucial for instruct models like Mistral)
59
- prompt = generator.tokenizer.apply_chat_template(
60
- messages,
61
- tokenize=False,
62
- add_generation_prompt=True
63
- )
64
-
65
- # Generate the response with optimized parameters for factual, analytical output
66
- # temperature=0.1: Low temperature reduces randomness (hallucination) and increases factual consistency.
67
- # top_p=0.9: Limits sampling to the most probable tokens for coherent, focused analysis.
68
- response = generator(
69
- prompt,
70
- max_new_tokens=512,
71
- do_sample=True,
72
- temperature=0.1,
73
- top_p=0.9,
74
- return_full_text=False # Ensures only the model's response is returned
75
- )
76
-
77
- # Extract the generated text.
78
- # The output format for the pipeline with return_full_text=False is a list of dicts.
79
- bot_response = response[0]['generated_text'].strip()
80
-
81
- return bot_response
82
-
83
- # --- 4. Gradio Interface ---
84
- # The gr.ChatInterface is the standard and simplest way to build a chat UI.
85
- demo = gr.ChatInterface(
86
- fn=analyze_fight,
87
- title="🥊 BOXTRON-AI: Elite Boxing Analyst Chatbot",
88
- description="Ask me to break down an upcoming fight, analyze a fighter's legacy, or detail specific boxing techniques.",
89
-
90
- # Set the history to be visible, and optionally customize the appearance
91
- chatbot=gr.Chatbot(height=500),
92
- theme="soft", # A modern theme
93
- examples=[
94
- ["Analyze the potential fight between Terence Crawford and Errol Spence Jr."],
95
- ["What are the strengths and weaknesses of Canelo Alvarez's fighting style?"],
96
- ["Predict the outcome of the next Heavyweight title fight."],
97
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  )
99
 
100
- # Launch the app
 
 
 
 
 
101
  if __name__ == "__main__":
102
  demo.launch()
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+
5
+ def respond(
6
+ message,
7
+ history: list[dict[str, str]],
8
+ system_message,
9
+ max_tokens,
10
+ temperature,
11
+ top_p,
12
+ hf_token: gr.OAuthToken,
13
+ ):
14
+ """
15
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
+ """
17
+ client = InferenceClient(token=hf_token.token, model="moonshotai/Kimi-K2-Thinking")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # --- TRANSFORM HISTORY ---
20
+ fixed_history = []
21
+ print(fixed_history)
22
+ for h in history:
23
+ # Detect the keys safely
24
+ user_msg = h.get("user") or h.get("content") or ""
25
+ bot_msg = h.get("message") or h.get("content") or ""
 
 
26
 
27
+ if user_msg:
28
+ fixed_history.append({"role": "user", "content": user_msg})
29
+ if bot_msg:
30
+ fixed_history.append({"role": "assistant", "content": bot_msg})
31
+
32
+ # --- BUILD MESSAGES LIST ---
33
+ messages = [{"role": "system", "content": system_message}]
34
+ messages.extend(fixed_history)
35
+ messages.append({"role": "user", "content": message})
36
+
37
+ response = ""
38
+
39
+ for chunk in client.chat_completion(
40
+ messages,
41
+ max_tokens=max_tokens,
42
+ stream=True,
43
+ temperature=temperature,
44
+ top_p=top_p,
45
+ ):
46
+ choices = chunk.choices
47
+ token = ""
48
+ if len(choices) and choices[0].delta.content:
49
+ token = choices[0].delta.content
50
+
51
+ response += token
52
+ yield response
53
+
54
+
55
+ """
56
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
57
+ """
58
+
59
+ system_prompt="""
60
+ You are BOXTRON-AI, a highly skilled boxing analyst and professional-level fight judge. Your purpose is to provide objective, deeply informed evaluations of fighters, styles, strategies, and matchups. You score rounds using the official 10-point must system, always basing your judgments on clean and effective punching, effective aggression, ring generalship, and defense. When a round is close, you explain why; when a round is one-sided, you justify the margin. Knockdowns are reflected in the score unless extraordinary circumstances warrant otherwise, and you clearly explain those cases.
61
+
62
+ Your analysis of fighters must be technical, comprehensive, and grounded in real boxing principles. This includes breaking down fighting styles, strengths, weaknesses, physical attributes, tendencies, and historical performances. You discuss strategy, game plans, match-up dynamics, pace, footwork, punch selection, defense, adaptability, stamina, and the overall ebb and flow of a bout. Whenever you give predictions, you do so logically and without bias, making your reasoning transparent and acknowledging uncertainties.
63
+
64
+ You maintain strict objectivity at all times. You do not favor particular fighters, trainers, or promotions. You avoid hype and keep your focus on evidence and technical detail. If uncertainty exists, you state that clearly and use probabilities instead of definitive claims.
65
+
66
+ When asked to predict a fight, you describe multiple plausible scenarios and identify the factors that make each possible. You consider momentum, recent performances, stylistic interactions, weight changes, injury history when reliably documented, defensive responsibility, punch resistance, pace, and likely adjustments. All conclusions are based on established boxing knowledge rather than speculation.
67
+
68
+ If you are asked to simulate a fight, you proceed round by round, offering realistic and technically grounded descriptions of the action as it unfolds. At the end of each simulated round, you provide an official score and a concise explanation of why the round was scored that way.
69
+
70
+ You avoid unfounded medical claims, do not present rumors as facts, and label speculation clearly when you use it. Your tone is clear, professional, and analytical, resembling a hybrid of an experienced boxing judge and a seasoned ringside analyst. You use accurate boxing terminology and can provide slow-motion-style breakdowns of exchanges when useful.
71
+
72
+ You can also find weaknesses in opponents and tell the user how to capitalize on them and defeat their opponent
73
+ """
74
+
75
+ chatbot = gr.ChatInterface(
76
+ respond,
77
+ type="messages",
78
+ additional_inputs=[
79
+ gr.Textbox(value=system_prompt, label="System message"),
80
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
81
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
82
+ gr.Slider(
83
+ minimum=0.1,
84
+ maximum=1.0,
85
+ value=0.95,
86
+ step=0.05,
87
+ label="Top-p (nucleus sampling)",
88
+ ),
89
+ ],
90
  )
91
 
92
+ with gr.Blocks() as demo:
93
+ with gr.Sidebar():
94
+ gr.LoginButton()
95
+ chatbot.render()
96
+
97
+
98
  if __name__ == "__main__":
99
  demo.launch()