Spaces:
Runtime error
Runtime error
Markus Clauss DIRU Vetsuisse
Claude
commited on
Commit
Β·
e055772
1
Parent(s):
33f9c9e
Fix model loading for all analysis functions in ZeroGPU
Browse files- Add ensure_model_loaded() helper function for consistent model loading
- Update all analysis functions to use the helper
- Ensures model is available in each GPU-decorated function
- Fixes "Please load the model first" errors in analysis tabs
- Each function now loads model from cache if needed
π€ Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <[email protected]>
app.py
CHANGED
|
@@ -54,6 +54,36 @@ model_loaded = False
|
|
| 54 |
HF_TOKEN = os.environ.get('HF_TOKEN', None)
|
| 55 |
print(f"π HF_TOKEN available: {bool(HF_TOKEN)}")
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
@spaces.GPU(duration=120, enable_queue=True)
|
| 58 |
def load_model():
|
| 59 |
"""Load Apertus model with HuggingFace token from environment"""
|
|
@@ -149,30 +179,10 @@ def chat_with_apertus(message, max_tokens=300):
|
|
| 149 |
"""Simple chat function"""
|
| 150 |
global model, tokenizer
|
| 151 |
|
| 152 |
-
#
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
return "β No HuggingFace token found. Please set HF_TOKEN environment variable."
|
| 157 |
-
|
| 158 |
-
model_name = "swiss-ai/Apertus-8B-Instruct-2509"
|
| 159 |
-
try:
|
| 160 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
|
| 161 |
-
if tokenizer.pad_token is None:
|
| 162 |
-
tokenizer.pad_token = tokenizer.eos_token
|
| 163 |
-
|
| 164 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 165 |
-
model_name,
|
| 166 |
-
token=hf_token,
|
| 167 |
-
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
|
| 168 |
-
device_map="auto" if torch.cuda.is_available() else "cpu",
|
| 169 |
-
low_cpu_mem_usage=True,
|
| 170 |
-
output_attentions=True,
|
| 171 |
-
output_hidden_states=True,
|
| 172 |
-
trust_remote_code=True
|
| 173 |
-
)
|
| 174 |
-
except Exception as e:
|
| 175 |
-
return f"β Failed to load model: {str(e)}"
|
| 176 |
|
| 177 |
try:
|
| 178 |
formatted_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
|
@@ -215,9 +225,11 @@ You are Apertus, a helpful Swiss AI assistant. You are transparent, multilingual
|
|
| 215 |
def analyze_attention(text, layer=15):
|
| 216 |
"""Analyze attention patterns"""
|
| 217 |
global model, tokenizer
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
|
|
|
|
|
|
| 221 |
|
| 222 |
try:
|
| 223 |
inputs = tokenizer(text, return_tensors="pt")
|
|
@@ -270,9 +282,11 @@ def analyze_attention(text, layer=15):
|
|
| 270 |
def analyze_token_predictions(text):
|
| 271 |
"""Analyze next token predictions"""
|
| 272 |
global model, tokenizer
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
|
|
|
|
|
|
| 276 |
|
| 277 |
try:
|
| 278 |
inputs = tokenizer(text, return_tensors="pt")
|
|
@@ -324,9 +338,11 @@ def analyze_token_predictions(text):
|
|
| 324 |
def analyze_layer_evolution(text):
|
| 325 |
"""Analyze how representations evolve through layers"""
|
| 326 |
global model, tokenizer
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
|
|
|
|
|
|
| 330 |
|
| 331 |
try:
|
| 332 |
inputs = tokenizer(text, return_tensors="pt")
|
|
@@ -393,9 +409,11 @@ def analyze_layer_evolution(text):
|
|
| 393 |
def analyze_weights(layer_num, layer_type):
|
| 394 |
"""Analyze weight distribution with research-based metrics"""
|
| 395 |
global model
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
|
|
|
|
|
|
| 399 |
|
| 400 |
try:
|
| 401 |
selected_layer = f"model.layers.{layer_num}.{layer_type}"
|
|
@@ -842,9 +860,11 @@ def goldfish_loss_function(logits, targets, k=0.1, temperature=1.0):
|
|
| 842 |
def analyze_memorization_patterns(text, k_values=[0.0, 0.1, 0.2, 0.3]):
|
| 843 |
"""Analyze how Goldfish Loss affects memorization"""
|
| 844 |
global model, tokenizer
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
|
|
|
|
|
|
| 848 |
|
| 849 |
try:
|
| 850 |
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
|
|
@@ -1147,9 +1167,11 @@ def simulate_optimizer_comparison(baseline_loss, num_steps):
|
|
| 1147 |
def analyze_decision_process(text, max_steps=10):
|
| 1148 |
"""Step-by-step decision process like CLI script"""
|
| 1149 |
global model, tokenizer
|
| 1150 |
-
|
| 1151 |
-
|
| 1152 |
-
|
|
|
|
|
|
|
| 1153 |
|
| 1154 |
try:
|
| 1155 |
inputs = tokenizer(text, return_tensors="pt", max_length=256, truncation=True)
|
|
@@ -1281,9 +1303,11 @@ def analyze_decision_process(text, max_steps=10):
|
|
| 1281 |
def analyze_german_compounds(text_input=""):
|
| 1282 |
"""Analyze German compound words with multi-tokenizer comparison"""
|
| 1283 |
global model, tokenizer
|
| 1284 |
-
|
| 1285 |
-
|
| 1286 |
-
|
|
|
|
|
|
|
| 1287 |
|
| 1288 |
# Swiss/German compound examples if no input
|
| 1289 |
if not text_input.strip():
|
|
|
|
| 54 |
HF_TOKEN = os.environ.get('HF_TOKEN', None)
|
| 55 |
print(f"π HF_TOKEN available: {bool(HF_TOKEN)}")
|
| 56 |
|
| 57 |
+
def ensure_model_loaded():
|
| 58 |
+
"""Helper function to ensure model is loaded for ZeroGPU"""
|
| 59 |
+
global model, tokenizer
|
| 60 |
+
|
| 61 |
+
if model is None or tokenizer is None:
|
| 62 |
+
hf_token = HF_TOKEN
|
| 63 |
+
if not hf_token:
|
| 64 |
+
return False, "β No HuggingFace token found. Please set HF_TOKEN environment variable."
|
| 65 |
+
|
| 66 |
+
model_name = "swiss-ai/Apertus-8B-Instruct-2509"
|
| 67 |
+
try:
|
| 68 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
|
| 69 |
+
if tokenizer.pad_token is None:
|
| 70 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 71 |
+
|
| 72 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 73 |
+
model_name,
|
| 74 |
+
token=hf_token,
|
| 75 |
+
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
|
| 76 |
+
device_map="auto" if torch.cuda.is_available() else "cpu",
|
| 77 |
+
low_cpu_mem_usage=True,
|
| 78 |
+
output_attentions=True,
|
| 79 |
+
output_hidden_states=True,
|
| 80 |
+
trust_remote_code=True
|
| 81 |
+
)
|
| 82 |
+
return True, "β
Model loaded"
|
| 83 |
+
except Exception as e:
|
| 84 |
+
return False, f"β Failed to load model: {str(e)}"
|
| 85 |
+
return True, "β
Model ready"
|
| 86 |
+
|
| 87 |
@spaces.GPU(duration=120, enable_queue=True)
|
| 88 |
def load_model():
|
| 89 |
"""Load Apertus model with HuggingFace token from environment"""
|
|
|
|
| 179 |
"""Simple chat function"""
|
| 180 |
global model, tokenizer
|
| 181 |
|
| 182 |
+
# Ensure model is loaded
|
| 183 |
+
success, msg = ensure_model_loaded()
|
| 184 |
+
if not success:
|
| 185 |
+
return msg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
try:
|
| 188 |
formatted_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
|
|
|
| 225 |
def analyze_attention(text, layer=15):
|
| 226 |
"""Analyze attention patterns"""
|
| 227 |
global model, tokenizer
|
| 228 |
+
|
| 229 |
+
# Ensure model is loaded for ZeroGPU
|
| 230 |
+
success, msg = ensure_model_loaded()
|
| 231 |
+
if not success:
|
| 232 |
+
return None, msg
|
| 233 |
|
| 234 |
try:
|
| 235 |
inputs = tokenizer(text, return_tensors="pt")
|
|
|
|
| 282 |
def analyze_token_predictions(text):
|
| 283 |
"""Analyze next token predictions"""
|
| 284 |
global model, tokenizer
|
| 285 |
+
|
| 286 |
+
# Ensure model is loaded for ZeroGPU
|
| 287 |
+
success, msg = ensure_model_loaded()
|
| 288 |
+
if not success:
|
| 289 |
+
return None, msg
|
| 290 |
|
| 291 |
try:
|
| 292 |
inputs = tokenizer(text, return_tensors="pt")
|
|
|
|
| 338 |
def analyze_layer_evolution(text):
|
| 339 |
"""Analyze how representations evolve through layers"""
|
| 340 |
global model, tokenizer
|
| 341 |
+
|
| 342 |
+
# Ensure model is loaded for ZeroGPU
|
| 343 |
+
success, msg = ensure_model_loaded()
|
| 344 |
+
if not success:
|
| 345 |
+
return None, msg
|
| 346 |
|
| 347 |
try:
|
| 348 |
inputs = tokenizer(text, return_tensors="pt")
|
|
|
|
| 409 |
def analyze_weights(layer_num, layer_type):
|
| 410 |
"""Analyze weight distribution with research-based metrics"""
|
| 411 |
global model
|
| 412 |
+
|
| 413 |
+
# Ensure model is loaded for ZeroGPU
|
| 414 |
+
success, msg = ensure_model_loaded()
|
| 415 |
+
if not success:
|
| 416 |
+
return None, msg
|
| 417 |
|
| 418 |
try:
|
| 419 |
selected_layer = f"model.layers.{layer_num}.{layer_type}"
|
|
|
|
| 860 |
def analyze_memorization_patterns(text, k_values=[0.0, 0.1, 0.2, 0.3]):
|
| 861 |
"""Analyze how Goldfish Loss affects memorization"""
|
| 862 |
global model, tokenizer
|
| 863 |
+
|
| 864 |
+
# Ensure model is loaded for ZeroGPU
|
| 865 |
+
success, msg = ensure_model_loaded()
|
| 866 |
+
if not success:
|
| 867 |
+
return None, msg
|
| 868 |
|
| 869 |
try:
|
| 870 |
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
|
|
|
|
| 1167 |
def analyze_decision_process(text, max_steps=10):
|
| 1168 |
"""Step-by-step decision process like CLI script"""
|
| 1169 |
global model, tokenizer
|
| 1170 |
+
|
| 1171 |
+
# Ensure model is loaded for ZeroGPU
|
| 1172 |
+
success, msg = ensure_model_loaded()
|
| 1173 |
+
if not success:
|
| 1174 |
+
return None, msg
|
| 1175 |
|
| 1176 |
try:
|
| 1177 |
inputs = tokenizer(text, return_tensors="pt", max_length=256, truncation=True)
|
|
|
|
| 1303 |
def analyze_german_compounds(text_input=""):
|
| 1304 |
"""Analyze German compound words with multi-tokenizer comparison"""
|
| 1305 |
global model, tokenizer
|
| 1306 |
+
|
| 1307 |
+
# Ensure model is loaded for ZeroGPU
|
| 1308 |
+
success, msg = ensure_model_loaded()
|
| 1309 |
+
if not success:
|
| 1310 |
+
return None, msg
|
| 1311 |
|
| 1312 |
# Swiss/German compound examples if no input
|
| 1313 |
if not text_input.strip():
|