Markus Clauss DIRU Vetsuisse
Initial commit - Apertus Swiss AI Transparency Dashboard
b65eda7
raw
history blame
36.7 kB
"""
πŸ‡¨πŸ‡­ Apertus Swiss AI Transparency Dashboard
Gradio-based HuggingFace Spaces application
"""
import gradio as gr
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import warnings
import os
# Set environment variables to reduce verbosity and warnings
os.environ['TRANSFORMERS_VERBOSITY'] = 'error'
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
warnings.filterwarnings('ignore')
# Global variables for model and tokenizer
model = None
tokenizer = None
def load_model(hf_token):
"""Load Apertus model with HuggingFace token"""
global model, tokenizer
if not hf_token or not hf_token.startswith("hf_"):
return "❌ Invalid HuggingFace token. Must start with 'hf_'"
model_name = "swiss-ai/Apertus-8B-Instruct-2509"
try:
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_name,
token=hf_token,
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else "cpu",
low_cpu_mem_usage=True,
output_attentions=True,
output_hidden_states=True,
trust_remote_code=True
)
total_params = sum(p.numel() for p in model.parameters())
memory_usage = torch.cuda.memory_allocated() / 1024**3 if torch.cuda.is_available() else 0
return f"βœ… Model loaded successfully!\nπŸ“Š Parameters: {total_params:,}\nπŸ’Ύ Memory: {memory_usage:.1f} GB" if memory_usage > 0 else f"βœ… Model loaded successfully!\nπŸ“Š Parameters: {total_params:,}\nπŸ’Ύ CPU mode"
except Exception as e:
return f"❌ Failed to load model: {str(e)}\nπŸ’‘ Check your token and model access permissions."
def chat_with_apertus(message, max_tokens=300):
"""Simple chat function"""
global model, tokenizer
if model is None or tokenizer is None:
return "❌ Please load the model first by entering your HuggingFace token."
try:
formatted_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### System:
You are Apertus, a helpful Swiss AI assistant. You are transparent, multilingual, and precise.
### Instruction:
{message}
### Response:
"""
inputs = tokenizer(formatted_prompt, return_tensors="pt", truncation=True, max_length=2048)
device = next(model.parameters()).device
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=0.8,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id
)
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = full_response.split("### Response:")[-1].strip()
return f"πŸ‡¨πŸ‡­ **Apertus:** {response}"
except Exception as e:
return f"❌ Error: {str(e)}"
def analyze_attention(text, layer=15):
"""Analyze attention patterns"""
global model, tokenizer
if model is None or tokenizer is None:
return None, "❌ Please load the model first."
try:
inputs = tokenizer(text, return_tensors="pt")
tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
device = next(model.parameters()).device
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs, output_attentions=True)
attention_weights = outputs.attentions[layer][0]
avg_attention = attention_weights.mean(dim=0).cpu()
if avg_attention.dtype == torch.bfloat16:
avg_attention = avg_attention.float()
avg_attention = avg_attention.numpy()
# Create attention heatmap
fig = px.imshow(
avg_attention,
x=tokens,
y=tokens,
color_continuous_scale='Blues',
title=f"Attention Patterns - Layer {layer}",
labels={'color': 'Attention Weight'}
)
fig.update_layout(height=500)
# Get insights
attention_received = avg_attention.sum(axis=0)
top_indices = np.argsort(attention_received)[-3:][::-1]
insights = "**🎯 Top Attended Tokens:**\n\n"
for i, idx in enumerate(top_indices):
if idx < len(tokens):
score = attention_received[idx]
token = tokens[idx]
# Use markdown code blocks to prevent any formatting issues
insights += f"{i+1}. Token: `{token}` β€’ Score: {score:.3f}\n\n"
return fig, insights
except Exception as e:
return None, f"❌ Error analyzing attention: {str(e)}"
def analyze_token_predictions(text):
"""Analyze next token predictions"""
global model, tokenizer
if model is None or tokenizer is None:
return None, "❌ Please load the model first."
try:
inputs = tokenizer(text, return_tensors="pt")
device = next(model.parameters()).device
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits[0, -1, :]
probabilities = torch.nn.functional.softmax(logits, dim=-1)
top_probs, top_indices = torch.topk(probabilities, 10)
# Create prediction data
pred_data = []
for i in range(10):
token_id = top_indices[i].item()
token = tokenizer.decode([token_id])
# Keep original tokens - they show important tokenization info
if not token.strip():
token = f"[ID:{token_id}]"
prob = top_probs[i].item()
pred_data.append({"Rank": i+1, "Token": token, "Probability": prob})
df = pd.DataFrame(pred_data)
fig = px.bar(df, x="Token", y="Probability",
title="Top 10 Most Likely Next Tokens",
color="Probability", color_continuous_scale="viridis")
fig.update_layout(height=400)
# Create insights
insights = "**πŸ† Prediction Details:**\n\n"
for _, row in df.iterrows():
prob_pct = row["Probability"] * 100
confidence = "πŸ”₯" if prob_pct > 20 else "βœ…" if prob_pct > 5 else "⚠️"
confidence_text = "Very confident" if prob_pct > 20 else "Confident" if prob_pct > 5 else "Uncertain"
token = str(row['Token'])
# Use markdown code blocks to prevent formatting issues
insights += f"{row['Rank']}. Token: `{token}` β€’ {prob_pct:.1f}% {confidence} ({confidence_text})\n\n"
return fig, insights
except Exception as e:
return None, f"❌ Error analyzing predictions: {str(e)}"
def analyze_layer_evolution(text):
"""Analyze how representations evolve through layers"""
global model, tokenizer
if model is None or tokenizer is None:
return None, "❌ Please load the model first."
try:
inputs = tokenizer(text, return_tensors="pt")
device = next(model.parameters()).device
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs, output_hidden_states=True)
hidden_states = outputs.hidden_states
# Sample key layers
sample_layers = [0, 4, 8, 12, 16, 20, 24, 28, 31]
layer_stats = []
for layer_idx in sample_layers:
if layer_idx < len(hidden_states):
layer_state = hidden_states[layer_idx][0]
layer_cpu = layer_state.cpu()
if layer_cpu.dtype == torch.bfloat16:
layer_cpu = layer_cpu.float()
l2_norms = torch.norm(layer_cpu, dim=-1)
layer_stats.append({
"Layer": layer_idx,
"L2_Norm_Mean": l2_norms.mean().item(),
"L2_Norm_Max": l2_norms.max().item(),
"Hidden_Mean": layer_cpu.mean().item(),
"Hidden_Std": layer_cpu.std().item()
})
df = pd.DataFrame(layer_stats)
# Create evolution plots
fig = make_subplots(
rows=2, cols=2,
subplot_titles=('L2 Norm Evolution', 'Hidden State Mean',
'Hidden State Std', 'Layer Comparison'),
vertical_spacing=0.12
)
fig.add_trace(go.Scatter(x=df['Layer'], y=df['L2_Norm_Mean'],
mode='lines+markers', name='L2 Mean'), row=1, col=1)
fig.add_trace(go.Scatter(x=df['Layer'], y=df['Hidden_Mean'],
mode='lines+markers', name='Hidden Mean'), row=1, col=2)
fig.add_trace(go.Scatter(x=df['Layer'], y=df['Hidden_Std'],
mode='lines+markers', name='Hidden Std'), row=2, col=1)
fig.add_trace(go.Bar(x=df['Layer'], y=df['L2_Norm_Max'],
name='L2 Max'), row=2, col=2)
fig.update_layout(height=600, showlegend=False, title="Neural Representation Evolution")
# Create table
table_html = df.round(4).to_html(index=False, classes='table table-striped')
return fig, f"**πŸ“Š Layer Statistics:**\n{table_html}"
except Exception as e:
return None, f"❌ Error analyzing layer evolution: {str(e)}"
def analyze_weights(layer_num, layer_type):
"""Analyze weight distribution with research-based metrics"""
global model
if model is None:
return None, "❌ Please load the model first."
try:
selected_layer = f"model.layers.{layer_num}.{layer_type}"
# Get weights directly
layer_dict = dict(model.named_modules())
if selected_layer not in layer_dict:
return None, f"❌ Layer '{selected_layer}' not found"
layer_obj = layer_dict[selected_layer]
if not hasattr(layer_obj, 'weight'):
return None, f"❌ Layer has no weights"
weights = layer_obj.weight.data.cpu()
if weights.dtype == torch.bfloat16:
weights = weights.float()
weights = weights.numpy()
# Research-based analysis
l1_norm = np.sum(np.abs(weights))
l2_norm = np.sqrt(np.sum(weights**2))
zero_weights = np.sum(np.abs(weights) < 1e-8)
dead_ratio = zero_weights / weights.size * 100
weight_range = np.max(weights) - np.min(weights)
# Sparsity analysis with LLM-appropriate thresholds
sparse_001 = np.mean(np.abs(weights) < 0.001) * 100 # Tiny weights
sparse_01 = np.mean(np.abs(weights) < 0.01) * 100 # Very small weights
sparse_1 = np.mean(np.abs(weights) < 0.1) * 100 # Small weights
# Percentiles
p25, p50, p75, p95 = np.percentile(np.abs(weights), [25, 50, 75, 95])
# Smart visualization for different layer sizes
if weights.size < 500000: # Small layers - full histogram
fig = px.histogram(weights.flatten(), bins=50,
title=f"Weight Distribution - {selected_layer}",
labels={'x': 'Weight Value', 'y': 'Frequency'},
color_discrete_sequence=['#2E86AB'])
fig.add_vline(x=np.mean(weights), line_dash="dash", line_color="red",
annotation_text=f"Mean: {np.mean(weights):.6f}")
elif weights.size < 2000000: # Medium layers - sampled histogram
# Sample 100k weights for visualization
sample_size = min(100000, weights.size)
sampled_weights = np.random.choice(weights.flatten(), sample_size, replace=False)
fig = px.histogram(sampled_weights, bins=50,
title=f"Weight Distribution - {selected_layer} (Sampled: {sample_size:,}/{weights.size:,})",
labels={'x': 'Weight Value', 'y': 'Frequency'},
color_discrete_sequence=['#2E86AB'])
fig.add_vline(x=np.mean(weights), line_dash="dash", line_color="red",
annotation_text=f"Mean: {np.mean(weights):.6f}")
else: # Large layers - statistical summary plot
# Create a multi-panel statistical visualization
fig = make_subplots(
rows=2, cols=2,
subplot_titles=(
'Weight Statistics Summary',
'Sparsity Analysis',
'Distribution Percentiles',
'Health Indicators'
),
specs=[[{"type": "bar"}, {"type": "bar"}],
[{"type": "bar"}, {"type": "indicator"}]]
)
# Panel 1: Basic statistics
fig.add_trace(go.Bar(
x=['Mean', 'Std', 'Min', 'Max'],
y=[np.mean(weights), np.std(weights), np.min(weights), np.max(weights)],
name='Statistics',
marker_color='#2E86AB'
), row=1, col=1)
# Panel 2: Sparsity levels (Updated for 8B LLM standards)
fig.add_trace(go.Bar(
x=['<0.001', '<0.01', '<0.1'],
y=[sparse_001, sparse_01, sparse_1],
name='Sparsity %',
marker_color=[
'#28a745' if sparse_001 < 25 else '#ffc107' if sparse_001 < 40 else '#ff8c00' if sparse_001 < 55 else '#dc3545',
'#28a745' if sparse_01 < 50 else '#ffc107' if sparse_01 < 65 else '#ff8c00' if sparse_01 < 80 else '#dc3545',
'#28a745' if sparse_1 < 75 else '#ffc107' if sparse_1 < 85 else '#ff8c00' if sparse_1 < 92 else '#dc3545'
]
), row=1, col=2)
# Panel 3: Percentiles
fig.add_trace(go.Bar(
x=['25th', '50th', '75th', '95th'],
y=[p25, p50, p75, p95],
name='Percentiles',
marker_color='#17a2b8'
), row=2, col=1)
# Panel 4: Health score gauge
health_score = 100
if dead_ratio > 15: health_score -= 30
elif dead_ratio > 5: health_score -= 15
if sparse_001 > 30: health_score -= 20
elif sparse_001 > 10: health_score -= 10
if weight_range < 0.001: health_score -= 25
if weight_range > 10: health_score -= 25
fig.add_trace(go.Indicator(
mode = "gauge+number",
value = health_score,
title = {'text': "Health Score"},
gauge = {
'axis': {'range': [None, 100]},
'bar': {'color': '#2E86AB'},
'steps': [
{'range': [0, 60], 'color': "lightgray"},
{'range': [60, 80], 'color': "gray"}],
'threshold': {
'line': {'color': "red", 'width': 4},
'thickness': 0.75,
'value': 90}}
), row=2, col=2)
fig.update_layout(height=600, showlegend=False,
title=f"Statistical Analysis - {selected_layer} ({weights.size:,} parameters)")
fig.update_layout(height=500, showlegend=False)
# Health assessment (updated for 8B LLM standards)
health_score = 100
# Dead weights - very strict since truly dead weights are bad
if dead_ratio > 15: health_score -= 30
elif dead_ratio > 5: health_score -= 15
# Tiny weights (<0.001) - updated thresholds based on LLM research
if sparse_001 > 55: health_score -= 25 # >55% is concerning
elif sparse_001 > 40: health_score -= 15 # >40% needs attention
elif sparse_001 > 25: health_score -= 5 # >25% is acceptable
# Weight range - extreme ranges indicate problems
if weight_range < 0.001: health_score -= 20 # Too compressed
elif weight_range > 10: health_score -= 20 # Too wide
health_color = "🟒" if health_score >= 80 else "🟑" if health_score >= 60 else "πŸ”΄"
health_status = "Excellent" if health_score >= 90 else "Good" if health_score >= 80 else "Fair" if health_score >= 60 else "Poor"
# Format results
results = f"""
## βš–οΈ Weight Analysis: {selected_layer}
### πŸ“Š Core Statistics
- **Shape:** {weights.shape}
- **Parameters:** {weights.size:,}
- **Mean:** {np.mean(weights):+.6f}
- **Std:** {np.std(weights):.6f}
### πŸ”¬ Weight Health Analysis
- **L1 Norm:** {l1_norm:.3f} (Manhattan distance - sparsity indicator)
- **L2 Norm:** {l2_norm:.3f} (Euclidean distance - magnitude measure)
- **Dead Weights:** {dead_ratio:.1f}% (weights β‰ˆ 0)
- **Range:** {weight_range:.6f} (Max - Min weight values)
### πŸ•ΈοΈ Sparsity Analysis (8B LLM Research-Based Thresholds)
- **Tiny (<0.001):** {sparse_001:.1f}% {'🟒 Excellent' if sparse_001 < 25 else '🟑 Good' if sparse_001 < 40 else '⚠️ Watch' if sparse_001 < 55 else 'πŸ”΄ Concerning'}
- **Very Small (<0.01):** {sparse_01:.1f}% {'🟒 Excellent' if sparse_01 < 50 else '🟑 Good' if sparse_01 < 65 else '⚠️ Acceptable' if sparse_01 < 80 else 'πŸ”΄ High'}
- **Small (<0.1):** {sparse_1:.1f}% {'🟒 Excellent' if sparse_1 < 75 else '🟑 Good' if sparse_1 < 85 else '⚠️ Normal' if sparse_1 < 92 else 'πŸ”΄ Very High'}
### πŸ“ˆ Distribution Characteristics
- **25th Percentile:** {p25:.6f}
- **Median:** {p50:.6f}
- **75th Percentile:** {p75:.6f}
- **95th Percentile:** {p95:.6f}
### πŸ₯ Layer Health Assessment: {health_color} {health_status} ({health_score}/100)
**Key Insights (8B LLM Standards):**
- **Weight Activity:** {100-dead_ratio:.1f}% of weights are active (target: >95%)
- **Sparsity Pattern:** {sparse_1:.1f}% small weights (8B LLMs: 70-85% is normal)
- **Distribution Health:** L2/L1 ratio = {l2_norm/l1_norm:.3f} (balanced β‰ˆ 0.1-1.0)
- **Learning Capacity:** Weight range suggests {'good' if 0.01 < weight_range < 5 else 'limited'} learning capacity
πŸ’‘ **Research Note:** High sparsity (70-90%) is **normal** for large transformers and indicates efficient learned representations, not poor health.
"""
return fig, results
except Exception as e:
return None, f"❌ Error analyzing weights: {str(e)}"
# Create Gradio interface with custom CSS
def create_interface():
# Custom CSS for dark Swiss theme
custom_css = """
/* Dark Swiss-inspired styling */
.gradio-container {
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
font-family: 'Helvetica Neue', 'Arial', sans-serif;
color: #f8f9fa;
}
.main-header {
background: linear-gradient(135deg, #dc3545 0%, #8B0000 100%);
padding: 30px;
border-radius: 15px;
margin: 20px 0;
box-shadow: 0 8px 32px rgba(220, 53, 69, 0.4);
border: 1px solid rgba(220, 53, 69, 0.3);
}
.feature-box {
background: rgba(25, 25, 46, 0.95);
padding: 25px;
border-radius: 12px;
margin: 15px 0;
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3);
border-left: 4px solid #dc3545;
border: 1px solid rgba(255, 255, 255, 0.1);
}
.auth-section {
background: rgba(25, 25, 46, 0.9);
padding: 20px;
border-radius: 10px;
border: 2px solid #dc3545;
margin: 20px 0;
box-shadow: 0 4px 15px rgba(220, 53, 69, 0.2);
}
.footer-section {
background: linear-gradient(135deg, #0d1421 0%, #1a1a2e 100%);
padding: 30px;
border-radius: 15px;
margin-top: 40px;
color: #f8f9fa;
text-align: center;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
border: 1px solid rgba(255, 255, 255, 0.1);
}
/* Tab styling */
.tab-nav {
background: rgba(25, 25, 46, 0.95);
border-radius: 10px;
padding: 5px;
margin: 20px 0;
border: 1px solid rgba(255, 255, 255, 0.1);
}
/* Button improvements */
.gr-button {
background: linear-gradient(135deg, #dc3545 0%, #8B0000 100%);
border: none;
padding: 12px 24px;
font-weight: 600;
border-radius: 8px;
transition: all 0.3s ease;
color: white;
box-shadow: 0 2px 8px rgba(220, 53, 69, 0.3);
}
.gr-button:hover {
transform: translateY(-2px);
box-shadow: 0 6px 20px rgba(220, 53, 69, 0.6);
background: linear-gradient(135deg, #e74c3c 0%, #c0392b 100%);
}
/* Input field styling */
.gr-textbox, .gr-dropdown {
background: rgba(25, 25, 46, 0.8);
border-radius: 8px;
border: 2px solid rgba(255, 255, 255, 0.2);
transition: border-color 0.3s ease;
color: #f8f9fa;
}
.gr-textbox:focus, .gr-dropdown:focus {
border-color: #dc3545;
box-shadow: 0 0 0 3px rgba(220, 53, 69, 0.2);
background: rgba(25, 25, 46, 0.9);
}
/* Tab content styling */
.gr-tab-item {
background: rgba(25, 25, 46, 0.5);
border-radius: 10px;
padding: 20px;
margin: 10px 0;
}
/* Text color improvements */
.gr-markdown, .gr-html, .gr-textbox label {
color: #f8f9fa;
}
/* Plot background */
.gr-plot {
background: rgba(25, 25, 46, 0.8);
border-radius: 8px;
border: 1px solid rgba(255, 255, 255, 0.1);
}
"""
with gr.Blocks(
title="πŸ‡¨πŸ‡­ Apertus Swiss AI Transparency Dashboard",
theme=gr.themes.Default(
primary_hue="red",
secondary_hue="gray",
neutral_hue="gray",
font=gr.themes.GoogleFont("Inter")
),
css=custom_css
) as demo:
# Main Header
gr.HTML("""
<div class="main-header">
<div style="text-align: center; max-width: 1200px; margin: 0 auto;">
<h1 style="color: white; font-size: 3em; margin: 0; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">
πŸ‡¨πŸ‡­ Apertus Swiss AI Transparency Dashboard
</h1>
<h2 style="color: white; margin: 10px 0; text-shadow: 1px 1px 2px rgba(0,0,0,0.3);">
The World's Most Transparent Language Model
</h2>
<p style="color: white; font-size: 1.2em; margin: 15px 0; text-shadow: 1px 1px 2px rgba(0,0,0,0.3);">
<strong>Explore the internal workings of Switzerland's open-source 8B parameter AI model</strong>
</p>
</div>
</div>
""")
# Feature Overview
gr.HTML("""
<div class="feature-box">
<h3 style="color: #ff6b6b; margin-bottom: 20px; font-size: 1.5em;">🎯 What makes Apertus special?</h3>
<p style="font-size: 1.1em; margin-bottom: 15px; color: #f8f9fa; font-weight: 500;">
Unlike ChatGPT or Claude, you can see <strong>EVERYTHING</strong> happening inside the AI model:
</p>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 15px; margin: 20px 0;">
<div style="background: rgba(13, 20, 33, 0.8); padding: 20px; border-radius: 10px; border-left: 4px solid #4dabf7; box-shadow: 0 4px 12px rgba(77, 171, 247, 0.2); border: 1px solid rgba(77, 171, 247, 0.3);">
<strong style="color: #74c0fc; font-size: 1.1em;">🧠 Attention Patterns</strong><br>
<span style="color: #ced4da; line-height: 1.4;">Which words the AI focuses on (like eye-tracking during reading)</span>
</div>
<div style="background: rgba(13, 20, 33, 0.8); padding: 20px; border-radius: 10px; border-left: 4px solid #51cf66; box-shadow: 0 4px 12px rgba(81, 207, 102, 0.2); border: 1px solid rgba(81, 207, 102, 0.3);">
<strong style="color: #8ce99a; font-size: 1.1em;">βš–οΈ Neural Weights</strong><br>
<span style="color: #ced4da; line-height: 1.4;">The "brain connections" that control decisions</span>
</div>
<div style="background: rgba(13, 20, 33, 0.8); padding: 20px; border-radius: 10px; border-left: 4px solid #ffd43b; box-shadow: 0 4px 12px rgba(255, 212, 59, 0.2); border: 1px solid rgba(255, 212, 59, 0.3);">
<strong style="color: #ffec99; font-size: 1.1em;">🎲 Prediction Probabilities</strong><br>
<span style="color: #ced4da; line-height: 1.4;">How confident the AI is about each word choice</span>
</div>
<div style="background: rgba(13, 20, 33, 0.8); padding: 20px; border-radius: 10px; border-left: 4px solid #22b8cf; box-shadow: 0 4px 12px rgba(34, 184, 207, 0.2); border: 1px solid rgba(34, 184, 207, 0.3);">
<strong style="color: #66d9ef; font-size: 1.1em;">πŸ” Thinking Process</strong><br>
<span style="color: #ced4da; line-height: 1.4;">Step-by-step how responses are generated</span>
</div>
</div>
<p style="text-align: center; font-size: 1.3em; margin-top: 25px; color: #ff6b6b; font-weight: 600;">
<strong>This is complete AI transparency - no black boxes! πŸ‡¨πŸ‡­</strong>
</p>
</div>
""")
# Authentication Section
gr.HTML("""
<div class="auth-section">
<h3 style="color: #ff6b6b; margin-bottom: 15px; text-align: center; font-size: 1.4em;">πŸ” Model Authentication</h3>
<p style="text-align: center; color: #f8f9fa; margin-bottom: 20px; font-size: 1.1em; font-weight: 500;">
Enter your HuggingFace token to access the Apertus-8B-Instruct-2509 model
</p>
</div>
""")
with gr.Row():
with gr.Column(scale=2):
hf_token = gr.Textbox(
label="πŸ—οΈ HuggingFace Token",
placeholder="hf_...",
type="password",
info="Required to access swiss-ai/Apertus-8B-Instruct-2509. Get your token from: https://huggingface.co/settings/tokens",
container=True
)
with gr.Column(scale=1):
load_btn = gr.Button(
"πŸ‡¨πŸ‡­ Load Apertus Model",
variant="primary",
size="lg",
elem_classes="auth-button"
)
with gr.Row():
model_status = gr.Textbox(
label="πŸ“Š Model Status",
interactive=False,
container=True
)
load_btn.click(load_model, inputs=[hf_token], outputs=[model_status])
# Main Interface Tabs
with gr.Tabs():
# Chat Tab
with gr.TabItem("πŸ’¬ Chat with Apertus"):
with gr.Row():
with gr.Column(scale=2):
chat_input = gr.Textbox(
label="Your message (any language)",
placeholder="ErklΓ€re mir Transparenz in der KI...\nExplique-moi la transparence en IA...\nSpiegami la trasparenza nell'IA...",
lines=3
)
max_tokens = gr.Slider(50, 500, value=300, label="Max Tokens")
chat_btn = gr.Button("πŸ‡¨πŸ‡­ Chat", variant="primary")
with gr.Column(scale=3):
chat_output = gr.Markdown(label="Apertus Response")
chat_btn.click(chat_with_apertus, inputs=[chat_input, max_tokens], outputs=[chat_output])
# Attention Analysis Tab
with gr.TabItem("πŸ‘οΈ Attention Patterns"):
gr.HTML("<p><strong>πŸ” What you'll see:</strong> Heatmap showing which words the AI 'looks at' while thinking - like tracking eye movements during reading</p>")
with gr.Row():
with gr.Column(scale=1):
attention_text = gr.Textbox(
label="Text to analyze",
value="Die Schweiz ist",
info="Enter text to see internal model processing"
)
attention_layer = gr.Slider(0, 31, value=15, step=1, label="Attention Layer")
attention_btn = gr.Button("πŸ‘οΈ Analyze Attention", variant="secondary")
with gr.Column(scale=2):
attention_plot = gr.Plot(label="Attention Heatmap")
attention_insights = gr.Markdown(label="Attention Insights")
attention_btn.click(
analyze_attention,
inputs=[attention_text, attention_layer],
outputs=[attention_plot, attention_insights]
)
# Token Predictions Tab
with gr.TabItem("🎲 Token Predictions"):
gr.HTML("<p><strong>πŸ” What you'll see:</strong> Top-10 most likely next words with confidence levels - see the AI's 'thought process' for each word</p>")
with gr.Row():
with gr.Column(scale=1):
prediction_text = gr.Textbox(
label="Text to analyze",
value="Die wichtigste Eigenschaft von Apertus ist",
info="Enter partial text to see next word predictions"
)
prediction_btn = gr.Button("🎲 Analyze Predictions", variant="secondary")
with gr.Column(scale=2):
prediction_plot = gr.Plot(label="Prediction Probabilities")
prediction_insights = gr.Markdown(label="Prediction Details")
prediction_btn.click(
analyze_token_predictions,
inputs=[prediction_text],
outputs=[prediction_plot, prediction_insights]
)
# Layer Evolution Tab
with gr.TabItem("🧠 Layer Evolution"):
gr.HTML("<p><strong>πŸ” What you'll see:</strong> How the AI's 'understanding' develops through 32 neural layers - from basic recognition to deep comprehension</p>")
with gr.Row():
with gr.Column(scale=1):
evolution_text = gr.Textbox(
label="Text to analyze",
value="Schweizer KI-Innovation revolutioniert Transparenz.",
info="Enter text to see layer evolution"
)
evolution_btn = gr.Button("🧠 Analyze Evolution", variant="secondary")
with gr.Column(scale=2):
evolution_plot = gr.Plot(label="Layer Evolution")
evolution_stats = gr.HTML(label="Layer Statistics")
evolution_btn.click(
analyze_layer_evolution,
inputs=[evolution_text],
outputs=[evolution_plot, evolution_stats]
)
# Weight Analysis Tab
with gr.TabItem("βš–οΈ Weight Analysis"):
gr.HTML("<p><strong>πŸ” What you'll see:</strong> The actual 'brain connections' (neural weights) that control AI decisions - the learned parameters</p>")
gr.HTML("<p><em>Real-time analysis of neural network weights following research best practices</em></p>")
with gr.Row():
with gr.Column(scale=1):
weight_layer_num = gr.Dropdown(
choices=list(range(32)),
value=15,
label="Layer Number"
)
weight_layer_type = gr.Dropdown(
choices=["self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.o_proj", "mlp.up_proj", "mlp.down_proj"],
value="self_attn.q_proj",
label="Layer Component"
)
weight_btn = gr.Button("βš–οΈ Analyze Weights", variant="secondary")
with gr.Column(scale=2):
weight_plot = gr.Plot(label="Weight Distribution")
weight_analysis = gr.Markdown(label="Weight Analysis")
# Gradio handles state much better - no disappearing output!
weight_btn.click(
analyze_weights,
inputs=[weight_layer_num, weight_layer_type],
outputs=[weight_plot, weight_analysis]
)
# Footer
gr.HTML("""
<div class="footer-section">
<h2 style="color: white; margin-bottom: 20px; font-size: 2.2em;">πŸ‡¨πŸ‡­ Apertus Swiss AI</h2>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 30px; margin: 30px 0;">
<div>
<h4 style="color: #f8f9fa; margin-bottom: 10px;">πŸ”οΈ Swiss Excellence</h4>
<p style="color: #bdc3c7; line-height: 1.6;">
Built with Swiss precision engineering principles - reliable, transparent, and innovative.
</p>
</div>
<div>
<h4 style="color: #f8f9fa; margin-bottom: 10px;">πŸ”¬ Research Grade</h4>
<p style="color: #bdc3c7; line-height: 1.6;">
Complete model transparency with research-based metrics and analysis tools.
</p>
</div>
<div>
<h4 style="color: #f8f9fa; margin-bottom: 10px;">🌍 Multilingual</h4>
<p style="color: #bdc3c7; line-height: 1.6;">
Supports German, French, Italian, English, Romansh and Swiss dialects.
</p>
</div>
<div>
<h4 style="color: #f8f9fa; margin-bottom: 10px;">πŸŽ“ Educational</h4>
<p style="color: #bdc3c7; line-height: 1.6;">
Perfect for students, researchers, and anyone curious about AI internals.
</p>
</div>
</div>
<div style="border-top: 1px solid #546e7a; padding-top: 20px; margin-top: 30px;">
<p style="color: #ecf0f1; font-size: 1.3em; margin: 0;">
<strong>Experience true AI transparency - Swiss precision meets artificial intelligence</strong>
</p>
<p style="color: #95a5a6; margin: 10px 0 0 0;">
Powered by Apertus-8B-Instruct-2509 β€’ 8B Parameters β€’ Complete Transparency
</p>
</div>
</div>
""")
return demo
# Launch the app
if __name__ == "__main__":
demo = create_interface()
demo.launch(server_port=8501, server_name="0.0.0.0")