Spaces:
Runtime error
Runtime error
Markus Clauss DIRU Vetsuisse
Claude
commited on
Commit
Β·
ed1e41a
1
Parent(s):
e055772
Switch to CPU-only version for stable persistent model
Browse files- Remove all @spaces.GPU decorators
- Remove ensure_model_loaded helper (not needed)
- Simplify all functions - model stays persistent in memory
- Model loads once on startup and remains available
- All features work on CPU (just slower than GPU)
- Fixes all "Model not loaded" errors permanently
Benefits:
- Free forever (CPU Basic tier)
- Model persists across all function calls
- No ZeroGPU isolation issues
- Simpler, more stable code
π€ Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <[email protected]>
app.py
CHANGED
|
@@ -14,7 +14,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
| 14 |
import warnings
|
| 15 |
import os
|
| 16 |
import time # For timing measurements
|
| 17 |
-
import spaces
|
| 18 |
|
| 19 |
# Advanced ML components (2024 State-of-the-Art)
|
| 20 |
try:
|
|
@@ -54,37 +54,6 @@ model_loaded = False
|
|
| 54 |
HF_TOKEN = os.environ.get('HF_TOKEN', None)
|
| 55 |
print(f"π HF_TOKEN available: {bool(HF_TOKEN)}")
|
| 56 |
|
| 57 |
-
def ensure_model_loaded():
|
| 58 |
-
"""Helper function to ensure model is loaded for ZeroGPU"""
|
| 59 |
-
global model, tokenizer
|
| 60 |
-
|
| 61 |
-
if model is None or tokenizer is None:
|
| 62 |
-
hf_token = HF_TOKEN
|
| 63 |
-
if not hf_token:
|
| 64 |
-
return False, "β No HuggingFace token found. Please set HF_TOKEN environment variable."
|
| 65 |
-
|
| 66 |
-
model_name = "swiss-ai/Apertus-8B-Instruct-2509"
|
| 67 |
-
try:
|
| 68 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
|
| 69 |
-
if tokenizer.pad_token is None:
|
| 70 |
-
tokenizer.pad_token = tokenizer.eos_token
|
| 71 |
-
|
| 72 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 73 |
-
model_name,
|
| 74 |
-
token=hf_token,
|
| 75 |
-
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
|
| 76 |
-
device_map="auto" if torch.cuda.is_available() else "cpu",
|
| 77 |
-
low_cpu_mem_usage=True,
|
| 78 |
-
output_attentions=True,
|
| 79 |
-
output_hidden_states=True,
|
| 80 |
-
trust_remote_code=True
|
| 81 |
-
)
|
| 82 |
-
return True, "β
Model loaded"
|
| 83 |
-
except Exception as e:
|
| 84 |
-
return False, f"β Failed to load model: {str(e)}"
|
| 85 |
-
return True, "β
Model ready"
|
| 86 |
-
|
| 87 |
-
@spaces.GPU(duration=120, enable_queue=True)
|
| 88 |
def load_model():
|
| 89 |
"""Load Apertus model with HuggingFace token from environment"""
|
| 90 |
global model, tokenizer, model_loaded
|
|
@@ -174,15 +143,12 @@ def load_model():
|
|
| 174 |
print(f"π Full traceback:\n{traceback.format_exc()}")
|
| 175 |
return f"β Failed to load model: {str(e)}\nπ‘ Check your token and model access permissions."
|
| 176 |
|
| 177 |
-
@spaces.GPU(duration=60, enable_queue=True)
|
| 178 |
def chat_with_apertus(message, max_tokens=300):
|
| 179 |
"""Simple chat function"""
|
| 180 |
global model, tokenizer
|
| 181 |
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
if not success:
|
| 185 |
-
return msg
|
| 186 |
|
| 187 |
try:
|
| 188 |
formatted_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
|
@@ -221,15 +187,12 @@ You are Apertus, a helpful Swiss AI assistant. You are transparent, multilingual
|
|
| 221 |
except Exception as e:
|
| 222 |
return f"β Error: {str(e)}"
|
| 223 |
|
| 224 |
-
@spaces.GPU(duration=30)
|
| 225 |
def analyze_attention(text, layer=15):
|
| 226 |
"""Analyze attention patterns"""
|
| 227 |
global model, tokenizer
|
| 228 |
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
if not success:
|
| 232 |
-
return None, msg
|
| 233 |
|
| 234 |
try:
|
| 235 |
inputs = tokenizer(text, return_tensors="pt")
|
|
@@ -278,15 +241,12 @@ def analyze_attention(text, layer=15):
|
|
| 278 |
except Exception as e:
|
| 279 |
return None, f"β Error analyzing attention: {str(e)}"
|
| 280 |
|
| 281 |
-
@spaces.GPU(duration=30)
|
| 282 |
def analyze_token_predictions(text):
|
| 283 |
"""Analyze next token predictions"""
|
| 284 |
global model, tokenizer
|
| 285 |
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
if not success:
|
| 289 |
-
return None, msg
|
| 290 |
|
| 291 |
try:
|
| 292 |
inputs = tokenizer(text, return_tensors="pt")
|
|
@@ -334,15 +294,12 @@ def analyze_token_predictions(text):
|
|
| 334 |
except Exception as e:
|
| 335 |
return None, f"β Error analyzing predictions: {str(e)}"
|
| 336 |
|
| 337 |
-
@spaces.GPU(duration=30)
|
| 338 |
def analyze_layer_evolution(text):
|
| 339 |
"""Analyze how representations evolve through layers"""
|
| 340 |
global model, tokenizer
|
| 341 |
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
if not success:
|
| 345 |
-
return None, msg
|
| 346 |
|
| 347 |
try:
|
| 348 |
inputs = tokenizer(text, return_tensors="pt")
|
|
@@ -405,15 +362,12 @@ def analyze_layer_evolution(text):
|
|
| 405 |
except Exception as e:
|
| 406 |
return None, f"β Error analyzing layer evolution: {str(e)}"
|
| 407 |
|
| 408 |
-
@spaces.GPU(duration=30)
|
| 409 |
def analyze_weights(layer_num, layer_type):
|
| 410 |
"""Analyze weight distribution with research-based metrics"""
|
| 411 |
global model
|
| 412 |
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
if not success:
|
| 416 |
-
return None, msg
|
| 417 |
|
| 418 |
try:
|
| 419 |
selected_layer = f"model.layers.{layer_num}.{layer_type}"
|
|
@@ -856,15 +810,12 @@ def goldfish_loss_function(logits, targets, k=0.1, temperature=1.0):
|
|
| 856 |
else:
|
| 857 |
return masked_loss.sum()
|
| 858 |
|
| 859 |
-
@spaces.GPU(duration=30)
|
| 860 |
def analyze_memorization_patterns(text, k_values=[0.0, 0.1, 0.2, 0.3]):
|
| 861 |
"""Analyze how Goldfish Loss affects memorization"""
|
| 862 |
global model, tokenizer
|
| 863 |
|
| 864 |
-
|
| 865 |
-
|
| 866 |
-
if not success:
|
| 867 |
-
return None, msg
|
| 868 |
|
| 869 |
try:
|
| 870 |
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
|
|
@@ -1163,15 +1114,12 @@ def simulate_optimizer_comparison(baseline_loss, num_steps):
|
|
| 1163 |
# π§ DECISION PROCESS & GERMAN LANGUAGE ANALYSIS
|
| 1164 |
# =============================================================================
|
| 1165 |
|
| 1166 |
-
@spaces.GPU(duration=30)
|
| 1167 |
def analyze_decision_process(text, max_steps=10):
|
| 1168 |
"""Step-by-step decision process like CLI script"""
|
| 1169 |
global model, tokenizer
|
| 1170 |
|
| 1171 |
-
|
| 1172 |
-
|
| 1173 |
-
if not success:
|
| 1174 |
-
return None, msg
|
| 1175 |
|
| 1176 |
try:
|
| 1177 |
inputs = tokenizer(text, return_tensors="pt", max_length=256, truncation=True)
|
|
@@ -1299,15 +1247,12 @@ def analyze_decision_process(text, max_steps=10):
|
|
| 1299 |
except Exception as e:
|
| 1300 |
return None, f"β Error analyzing decision process: {str(e)}"
|
| 1301 |
|
| 1302 |
-
@spaces.GPU(duration=30)
|
| 1303 |
def analyze_german_compounds(text_input=""):
|
| 1304 |
"""Analyze German compound words with multi-tokenizer comparison"""
|
| 1305 |
global model, tokenizer
|
| 1306 |
|
| 1307 |
-
|
| 1308 |
-
|
| 1309 |
-
if not success:
|
| 1310 |
-
return None, msg
|
| 1311 |
|
| 1312 |
# Swiss/German compound examples if no input
|
| 1313 |
if not text_input.strip():
|
|
|
|
| 14 |
import warnings
|
| 15 |
import os
|
| 16 |
import time # For timing measurements
|
| 17 |
+
# import spaces # Disabled - CPU-only version for persistent model
|
| 18 |
|
| 19 |
# Advanced ML components (2024 State-of-the-Art)
|
| 20 |
try:
|
|
|
|
| 54 |
HF_TOKEN = os.environ.get('HF_TOKEN', None)
|
| 55 |
print(f"π HF_TOKEN available: {bool(HF_TOKEN)}")
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
def load_model():
|
| 58 |
"""Load Apertus model with HuggingFace token from environment"""
|
| 59 |
global model, tokenizer, model_loaded
|
|
|
|
| 143 |
print(f"π Full traceback:\n{traceback.format_exc()}")
|
| 144 |
return f"β Failed to load model: {str(e)}\nπ‘ Check your token and model access permissions."
|
| 145 |
|
|
|
|
| 146 |
def chat_with_apertus(message, max_tokens=300):
|
| 147 |
"""Simple chat function"""
|
| 148 |
global model, tokenizer
|
| 149 |
|
| 150 |
+
if model is None or tokenizer is None:
|
| 151 |
+
return "β Model not loaded. Please wait for initialization or refresh the page."
|
|
|
|
|
|
|
| 152 |
|
| 153 |
try:
|
| 154 |
formatted_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
|
|
|
| 187 |
except Exception as e:
|
| 188 |
return f"β Error: {str(e)}"
|
| 189 |
|
|
|
|
| 190 |
def analyze_attention(text, layer=15):
|
| 191 |
"""Analyze attention patterns"""
|
| 192 |
global model, tokenizer
|
| 193 |
|
| 194 |
+
if model is None or tokenizer is None:
|
| 195 |
+
return None, "β Model not loaded. Please wait for initialization or refresh the page."
|
|
|
|
|
|
|
| 196 |
|
| 197 |
try:
|
| 198 |
inputs = tokenizer(text, return_tensors="pt")
|
|
|
|
| 241 |
except Exception as e:
|
| 242 |
return None, f"β Error analyzing attention: {str(e)}"
|
| 243 |
|
|
|
|
| 244 |
def analyze_token_predictions(text):
|
| 245 |
"""Analyze next token predictions"""
|
| 246 |
global model, tokenizer
|
| 247 |
|
| 248 |
+
if model is None or tokenizer is None:
|
| 249 |
+
return None, "β Model not loaded. Please wait for initialization or refresh the page."
|
|
|
|
|
|
|
| 250 |
|
| 251 |
try:
|
| 252 |
inputs = tokenizer(text, return_tensors="pt")
|
|
|
|
| 294 |
except Exception as e:
|
| 295 |
return None, f"β Error analyzing predictions: {str(e)}"
|
| 296 |
|
|
|
|
| 297 |
def analyze_layer_evolution(text):
|
| 298 |
"""Analyze how representations evolve through layers"""
|
| 299 |
global model, tokenizer
|
| 300 |
|
| 301 |
+
if model is None or tokenizer is None:
|
| 302 |
+
return None, "β Model not loaded. Please wait for initialization or refresh the page."
|
|
|
|
|
|
|
| 303 |
|
| 304 |
try:
|
| 305 |
inputs = tokenizer(text, return_tensors="pt")
|
|
|
|
| 362 |
except Exception as e:
|
| 363 |
return None, f"β Error analyzing layer evolution: {str(e)}"
|
| 364 |
|
|
|
|
| 365 |
def analyze_weights(layer_num, layer_type):
|
| 366 |
"""Analyze weight distribution with research-based metrics"""
|
| 367 |
global model
|
| 368 |
|
| 369 |
+
if model is None:
|
| 370 |
+
return None, "β Model not loaded. Please wait for initialization or refresh the page."
|
|
|
|
|
|
|
| 371 |
|
| 372 |
try:
|
| 373 |
selected_layer = f"model.layers.{layer_num}.{layer_type}"
|
|
|
|
| 810 |
else:
|
| 811 |
return masked_loss.sum()
|
| 812 |
|
|
|
|
| 813 |
def analyze_memorization_patterns(text, k_values=[0.0, 0.1, 0.2, 0.3]):
|
| 814 |
"""Analyze how Goldfish Loss affects memorization"""
|
| 815 |
global model, tokenizer
|
| 816 |
|
| 817 |
+
if model is None or tokenizer is None:
|
| 818 |
+
return None, "β Model not loaded. Please wait for initialization or refresh the page."
|
|
|
|
|
|
|
| 819 |
|
| 820 |
try:
|
| 821 |
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
|
|
|
|
| 1114 |
# π§ DECISION PROCESS & GERMAN LANGUAGE ANALYSIS
|
| 1115 |
# =============================================================================
|
| 1116 |
|
|
|
|
| 1117 |
def analyze_decision_process(text, max_steps=10):
|
| 1118 |
"""Step-by-step decision process like CLI script"""
|
| 1119 |
global model, tokenizer
|
| 1120 |
|
| 1121 |
+
if model is None or tokenizer is None:
|
| 1122 |
+
return None, "β Model not loaded. Please wait for initialization or refresh the page."
|
|
|
|
|
|
|
| 1123 |
|
| 1124 |
try:
|
| 1125 |
inputs = tokenizer(text, return_tensors="pt", max_length=256, truncation=True)
|
|
|
|
| 1247 |
except Exception as e:
|
| 1248 |
return None, f"β Error analyzing decision process: {str(e)}"
|
| 1249 |
|
|
|
|
| 1250 |
def analyze_german_compounds(text_input=""):
|
| 1251 |
"""Analyze German compound words with multi-tokenizer comparison"""
|
| 1252 |
global model, tokenizer
|
| 1253 |
|
| 1254 |
+
if model is None or tokenizer is None:
|
| 1255 |
+
return None, "β Model not loaded. Please wait for initialization or refresh the page."
|
|
|
|
|
|
|
| 1256 |
|
| 1257 |
# Swiss/German compound examples if no input
|
| 1258 |
if not text_input.strip():
|