Spaces:

AbdullahIsaMarkus
/

apertus-swiss-transparency

Runtime error

Markus Clauss DIRU Vetsuisse Claude commited on Sep 19

Commit

e055772

1 Parent(s): 33f9c9e

Fix model loading for all analysis functions in ZeroGPU

- Add ensure_model_loaded() helper function for consistent model loading
- Update all analysis functions to use the helper
- Ensures model is available in each GPU-decorated function
- Fixes "Please load the model first" errors in analysis tabs
- Each function now loads model from cache if needed

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <[email protected]>

Files changed (1) hide show

app.py +69 -45

app.py CHANGED Viewed

@@ -54,6 +54,36 @@ model_loaded = False
 HF_TOKEN = os.environ.get('HF_TOKEN', None)
 print(f"🔐 HF_TOKEN available: {bool(HF_TOKEN)}")
 @spaces.GPU(duration=120, enable_queue=True)
 def load_model():
     """Load Apertus model with HuggingFace token from environment"""
@@ -149,30 +179,10 @@ def chat_with_apertus(message, max_tokens=300):
     """Simple chat function"""
     global model, tokenizer
-    # ZeroGPU: Need to load model in each GPU function
-    if model is None or tokenizer is None:
-        hf_token = HF_TOKEN
-        if not hf_token:
-            return "❌ No HuggingFace token found. Please set HF_TOKEN environment variable."
-        model_name = "swiss-ai/Apertus-8B-Instruct-2509"
-        try:
-            tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
-            if tokenizer.pad_token is None:
-                tokenizer.pad_token = tokenizer.eos_token
-            model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                token=hf_token,
-                torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
-                device_map="auto" if torch.cuda.is_available() else "cpu",
-                low_cpu_mem_usage=True,
-                output_attentions=True,
-                output_hidden_states=True,
-                trust_remote_code=True
-            )
-        except Exception as e:
-            return f"❌ Failed to load model: {str(e)}"
     try:
         formatted_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
@@ -215,9 +225,11 @@ You are Apertus, a helpful Swiss AI assistant. You are transparent, multilingual
 def analyze_attention(text, layer=15):
     """Analyze attention patterns"""
     global model, tokenizer
-    if model is None or tokenizer is None:
-        return None, "❌ Please load the model first."
     try:
         inputs = tokenizer(text, return_tensors="pt")
@@ -270,9 +282,11 @@ def analyze_attention(text, layer=15):
 def analyze_token_predictions(text):
     """Analyze next token predictions"""
     global model, tokenizer
-    if model is None or tokenizer is None:
-        return None, "❌ Please load the model first."
     try:
         inputs = tokenizer(text, return_tensors="pt")
@@ -324,9 +338,11 @@ def analyze_token_predictions(text):
 def analyze_layer_evolution(text):
     """Analyze how representations evolve through layers"""
     global model, tokenizer
-    if model is None or tokenizer is None:
-        return None, "❌ Please load the model first."
     try:
         inputs = tokenizer(text, return_tensors="pt")
@@ -393,9 +409,11 @@ def analyze_layer_evolution(text):
 def analyze_weights(layer_num, layer_type):
     """Analyze weight distribution with research-based metrics"""
     global model
-    if model is None:
-        return None, "❌ Please load the model first."
     try:
         selected_layer = f"model.layers.{layer_num}.{layer_type}"
@@ -842,9 +860,11 @@ def goldfish_loss_function(logits, targets, k=0.1, temperature=1.0):
 def analyze_memorization_patterns(text, k_values=[0.0, 0.1, 0.2, 0.3]):
     """Analyze how Goldfish Loss affects memorization"""
     global model, tokenizer
-    if model is None or tokenizer is None:
-        return None, "❌ Please load the model first."
     try:
         inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
@@ -1147,9 +1167,11 @@ def simulate_optimizer_comparison(baseline_loss, num_steps):
 def analyze_decision_process(text, max_steps=10):
     """Step-by-step decision process like CLI script"""
     global model, tokenizer
-    if model is None or tokenizer is None:
-        return None, "❌ Please load the model first."
     try:
         inputs = tokenizer(text, return_tensors="pt", max_length=256, truncation=True)
@@ -1281,9 +1303,11 @@ def analyze_decision_process(text, max_steps=10):
 def analyze_german_compounds(text_input=""):
     """Analyze German compound words with multi-tokenizer comparison"""
     global model, tokenizer
-    if model is None or tokenizer is None:
-        return None, "❌ Please load the model first."
     # Swiss/German compound examples if no input
     if not text_input.strip():

 HF_TOKEN = os.environ.get('HF_TOKEN', None)
 print(f"🔐 HF_TOKEN available: {bool(HF_TOKEN)}")
+def ensure_model_loaded():
+    """Helper function to ensure model is loaded for ZeroGPU"""
+    global model, tokenizer
+    if model is None or tokenizer is None:
+        hf_token = HF_TOKEN
+        if not hf_token:
+            return False, "❌ No HuggingFace token found. Please set HF_TOKEN environment variable."
+        model_name = "swiss-ai/Apertus-8B-Instruct-2509"
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
+            if tokenizer.pad_token is None:
+                tokenizer.pad_token = tokenizer.eos_token
+            model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                token=hf_token,
+                torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+                device_map="auto" if torch.cuda.is_available() else "cpu",
+                low_cpu_mem_usage=True,
+                output_attentions=True,
+                output_hidden_states=True,
+                trust_remote_code=True
+            )
+            return True, "✅ Model loaded"
+        except Exception as e:
+            return False, f"❌ Failed to load model: {str(e)}"
+    return True, "✅ Model ready"
 @spaces.GPU(duration=120, enable_queue=True)
 def load_model():
     """Load Apertus model with HuggingFace token from environment"""
     """Simple chat function"""
     global model, tokenizer
+    # Ensure model is loaded
+    success, msg = ensure_model_loaded()
+    if not success:
+        return msg
     try:
         formatted_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
 def analyze_attention(text, layer=15):
     """Analyze attention patterns"""
     global model, tokenizer
+    # Ensure model is loaded for ZeroGPU
+    success, msg = ensure_model_loaded()
+    if not success:
+        return None, msg
     try:
         inputs = tokenizer(text, return_tensors="pt")
 def analyze_token_predictions(text):
     """Analyze next token predictions"""
     global model, tokenizer
+    # Ensure model is loaded for ZeroGPU
+    success, msg = ensure_model_loaded()
+    if not success:
+        return None, msg
     try:
         inputs = tokenizer(text, return_tensors="pt")
 def analyze_layer_evolution(text):
     """Analyze how representations evolve through layers"""
     global model, tokenizer
+    # Ensure model is loaded for ZeroGPU
+    success, msg = ensure_model_loaded()
+    if not success:
+        return None, msg
     try:
         inputs = tokenizer(text, return_tensors="pt")
 def analyze_weights(layer_num, layer_type):
     """Analyze weight distribution with research-based metrics"""
     global model
+    # Ensure model is loaded for ZeroGPU
+    success, msg = ensure_model_loaded()
+    if not success:
+        return None, msg
     try:
         selected_layer = f"model.layers.{layer_num}.{layer_type}"
 def analyze_memorization_patterns(text, k_values=[0.0, 0.1, 0.2, 0.3]):
     """Analyze how Goldfish Loss affects memorization"""
     global model, tokenizer
+    # Ensure model is loaded for ZeroGPU
+    success, msg = ensure_model_loaded()
+    if not success:
+        return None, msg
     try:
         inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
 def analyze_decision_process(text, max_steps=10):
     """Step-by-step decision process like CLI script"""
     global model, tokenizer
+    # Ensure model is loaded for ZeroGPU
+    success, msg = ensure_model_loaded()
+    if not success:
+        return None, msg
     try:
         inputs = tokenizer(text, return_tensors="pt", max_length=256, truncation=True)
 def analyze_german_compounds(text_input=""):
     """Analyze German compound words with multi-tokenizer comparison"""
     global model, tokenizer
+    # Ensure model is loaded for ZeroGPU
+    success, msg = ensure_model_loaded()
+    if not success:
+        return None, msg
     # Swiss/German compound examples if no input
     if not text_input.strip():