Markus Clauss DIRU Vetsuisse Claude commited on
Commit
e055772
Β·
1 Parent(s): 33f9c9e

Fix model loading for all analysis functions in ZeroGPU

Browse files

- Add ensure_model_loaded() helper function for consistent model loading
- Update all analysis functions to use the helper
- Ensures model is available in each GPU-decorated function
- Fixes "Please load the model first" errors in analysis tabs
- Each function now loads model from cache if needed

πŸ€– Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <[email protected]>

Files changed (1) hide show
  1. app.py +69 -45
app.py CHANGED
@@ -54,6 +54,36 @@ model_loaded = False
54
  HF_TOKEN = os.environ.get('HF_TOKEN', None)
55
  print(f"πŸ” HF_TOKEN available: {bool(HF_TOKEN)}")
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  @spaces.GPU(duration=120, enable_queue=True)
58
  def load_model():
59
  """Load Apertus model with HuggingFace token from environment"""
@@ -149,30 +179,10 @@ def chat_with_apertus(message, max_tokens=300):
149
  """Simple chat function"""
150
  global model, tokenizer
151
 
152
- # ZeroGPU: Need to load model in each GPU function
153
- if model is None or tokenizer is None:
154
- hf_token = HF_TOKEN
155
- if not hf_token:
156
- return "❌ No HuggingFace token found. Please set HF_TOKEN environment variable."
157
-
158
- model_name = "swiss-ai/Apertus-8B-Instruct-2509"
159
- try:
160
- tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
161
- if tokenizer.pad_token is None:
162
- tokenizer.pad_token = tokenizer.eos_token
163
-
164
- model = AutoModelForCausalLM.from_pretrained(
165
- model_name,
166
- token=hf_token,
167
- torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
168
- device_map="auto" if torch.cuda.is_available() else "cpu",
169
- low_cpu_mem_usage=True,
170
- output_attentions=True,
171
- output_hidden_states=True,
172
- trust_remote_code=True
173
- )
174
- except Exception as e:
175
- return f"❌ Failed to load model: {str(e)}"
176
 
177
  try:
178
  formatted_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
@@ -215,9 +225,11 @@ You are Apertus, a helpful Swiss AI assistant. You are transparent, multilingual
215
  def analyze_attention(text, layer=15):
216
  """Analyze attention patterns"""
217
  global model, tokenizer
218
-
219
- if model is None or tokenizer is None:
220
- return None, "❌ Please load the model first."
 
 
221
 
222
  try:
223
  inputs = tokenizer(text, return_tensors="pt")
@@ -270,9 +282,11 @@ def analyze_attention(text, layer=15):
270
  def analyze_token_predictions(text):
271
  """Analyze next token predictions"""
272
  global model, tokenizer
273
-
274
- if model is None or tokenizer is None:
275
- return None, "❌ Please load the model first."
 
 
276
 
277
  try:
278
  inputs = tokenizer(text, return_tensors="pt")
@@ -324,9 +338,11 @@ def analyze_token_predictions(text):
324
  def analyze_layer_evolution(text):
325
  """Analyze how representations evolve through layers"""
326
  global model, tokenizer
327
-
328
- if model is None or tokenizer is None:
329
- return None, "❌ Please load the model first."
 
 
330
 
331
  try:
332
  inputs = tokenizer(text, return_tensors="pt")
@@ -393,9 +409,11 @@ def analyze_layer_evolution(text):
393
  def analyze_weights(layer_num, layer_type):
394
  """Analyze weight distribution with research-based metrics"""
395
  global model
396
-
397
- if model is None:
398
- return None, "❌ Please load the model first."
 
 
399
 
400
  try:
401
  selected_layer = f"model.layers.{layer_num}.{layer_type}"
@@ -842,9 +860,11 @@ def goldfish_loss_function(logits, targets, k=0.1, temperature=1.0):
842
  def analyze_memorization_patterns(text, k_values=[0.0, 0.1, 0.2, 0.3]):
843
  """Analyze how Goldfish Loss affects memorization"""
844
  global model, tokenizer
845
-
846
- if model is None or tokenizer is None:
847
- return None, "❌ Please load the model first."
 
 
848
 
849
  try:
850
  inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
@@ -1147,9 +1167,11 @@ def simulate_optimizer_comparison(baseline_loss, num_steps):
1147
  def analyze_decision_process(text, max_steps=10):
1148
  """Step-by-step decision process like CLI script"""
1149
  global model, tokenizer
1150
-
1151
- if model is None or tokenizer is None:
1152
- return None, "❌ Please load the model first."
 
 
1153
 
1154
  try:
1155
  inputs = tokenizer(text, return_tensors="pt", max_length=256, truncation=True)
@@ -1281,9 +1303,11 @@ def analyze_decision_process(text, max_steps=10):
1281
  def analyze_german_compounds(text_input=""):
1282
  """Analyze German compound words with multi-tokenizer comparison"""
1283
  global model, tokenizer
1284
-
1285
- if model is None or tokenizer is None:
1286
- return None, "❌ Please load the model first."
 
 
1287
 
1288
  # Swiss/German compound examples if no input
1289
  if not text_input.strip():
 
54
  HF_TOKEN = os.environ.get('HF_TOKEN', None)
55
  print(f"πŸ” HF_TOKEN available: {bool(HF_TOKEN)}")
56
 
57
+ def ensure_model_loaded():
58
+ """Helper function to ensure model is loaded for ZeroGPU"""
59
+ global model, tokenizer
60
+
61
+ if model is None or tokenizer is None:
62
+ hf_token = HF_TOKEN
63
+ if not hf_token:
64
+ return False, "❌ No HuggingFace token found. Please set HF_TOKEN environment variable."
65
+
66
+ model_name = "swiss-ai/Apertus-8B-Instruct-2509"
67
+ try:
68
+ tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
69
+ if tokenizer.pad_token is None:
70
+ tokenizer.pad_token = tokenizer.eos_token
71
+
72
+ model = AutoModelForCausalLM.from_pretrained(
73
+ model_name,
74
+ token=hf_token,
75
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
76
+ device_map="auto" if torch.cuda.is_available() else "cpu",
77
+ low_cpu_mem_usage=True,
78
+ output_attentions=True,
79
+ output_hidden_states=True,
80
+ trust_remote_code=True
81
+ )
82
+ return True, "βœ… Model loaded"
83
+ except Exception as e:
84
+ return False, f"❌ Failed to load model: {str(e)}"
85
+ return True, "βœ… Model ready"
86
+
87
  @spaces.GPU(duration=120, enable_queue=True)
88
  def load_model():
89
  """Load Apertus model with HuggingFace token from environment"""
 
179
  """Simple chat function"""
180
  global model, tokenizer
181
 
182
+ # Ensure model is loaded
183
+ success, msg = ensure_model_loaded()
184
+ if not success:
185
+ return msg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
  try:
188
  formatted_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
 
225
  def analyze_attention(text, layer=15):
226
  """Analyze attention patterns"""
227
  global model, tokenizer
228
+
229
+ # Ensure model is loaded for ZeroGPU
230
+ success, msg = ensure_model_loaded()
231
+ if not success:
232
+ return None, msg
233
 
234
  try:
235
  inputs = tokenizer(text, return_tensors="pt")
 
282
  def analyze_token_predictions(text):
283
  """Analyze next token predictions"""
284
  global model, tokenizer
285
+
286
+ # Ensure model is loaded for ZeroGPU
287
+ success, msg = ensure_model_loaded()
288
+ if not success:
289
+ return None, msg
290
 
291
  try:
292
  inputs = tokenizer(text, return_tensors="pt")
 
338
  def analyze_layer_evolution(text):
339
  """Analyze how representations evolve through layers"""
340
  global model, tokenizer
341
+
342
+ # Ensure model is loaded for ZeroGPU
343
+ success, msg = ensure_model_loaded()
344
+ if not success:
345
+ return None, msg
346
 
347
  try:
348
  inputs = tokenizer(text, return_tensors="pt")
 
409
  def analyze_weights(layer_num, layer_type):
410
  """Analyze weight distribution with research-based metrics"""
411
  global model
412
+
413
+ # Ensure model is loaded for ZeroGPU
414
+ success, msg = ensure_model_loaded()
415
+ if not success:
416
+ return None, msg
417
 
418
  try:
419
  selected_layer = f"model.layers.{layer_num}.{layer_type}"
 
860
  def analyze_memorization_patterns(text, k_values=[0.0, 0.1, 0.2, 0.3]):
861
  """Analyze how Goldfish Loss affects memorization"""
862
  global model, tokenizer
863
+
864
+ # Ensure model is loaded for ZeroGPU
865
+ success, msg = ensure_model_loaded()
866
+ if not success:
867
+ return None, msg
868
 
869
  try:
870
  inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
 
1167
  def analyze_decision_process(text, max_steps=10):
1168
  """Step-by-step decision process like CLI script"""
1169
  global model, tokenizer
1170
+
1171
+ # Ensure model is loaded for ZeroGPU
1172
+ success, msg = ensure_model_loaded()
1173
+ if not success:
1174
+ return None, msg
1175
 
1176
  try:
1177
  inputs = tokenizer(text, return_tensors="pt", max_length=256, truncation=True)
 
1303
  def analyze_german_compounds(text_input=""):
1304
  """Analyze German compound words with multi-tokenizer comparison"""
1305
  global model, tokenizer
1306
+
1307
+ # Ensure model is loaded for ZeroGPU
1308
+ success, msg = ensure_model_loaded()
1309
+ if not success:
1310
+ return None, msg
1311
 
1312
  # Swiss/German compound examples if no input
1313
  if not text_input.strip():