Spaces:

AbdullahIsaMarkus
/

apertus-swiss-transparency

Runtime error

Markus Clauss DIRU Vetsuisse Claude commited on Sep 19

Commit

ed1e41a

1 Parent(s): e055772

Switch to CPU-only version for stable persistent model

- Remove all @spaces.GPU decorators
- Remove ensure_model_loaded helper (not needed)
- Simplify all functions - model stays persistent in memory
- Model loads once on startup and remains available
- All features work on CPU (just slower than GPU)
- Fixes all "Model not loaded" errors permanently

Benefits:
- Free forever (CPU Basic tier)
- Model persists across all function calls
- No ZeroGPU isolation issues
- Simpler, more stable code

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <[email protected]>

Files changed (1) hide show

app.py +17 -72

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 import warnings
 import os
 import time  # For timing measurements
-import spaces
 # Advanced ML components (2024 State-of-the-Art)
 try:
@@ -54,37 +54,6 @@ model_loaded = False
 HF_TOKEN = os.environ.get('HF_TOKEN', None)
 print(f"🔐 HF_TOKEN available: {bool(HF_TOKEN)}")
-def ensure_model_loaded():
-    """Helper function to ensure model is loaded for ZeroGPU"""
-    global model, tokenizer
-    if model is None or tokenizer is None:
-        hf_token = HF_TOKEN
-        if not hf_token:
-            return False, "❌ No HuggingFace token found. Please set HF_TOKEN environment variable."
-        model_name = "swiss-ai/Apertus-8B-Instruct-2509"
-        try:
-            tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
-            if tokenizer.pad_token is None:
-                tokenizer.pad_token = tokenizer.eos_token
-            model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                token=hf_token,
-                torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
-                device_map="auto" if torch.cuda.is_available() else "cpu",
-                low_cpu_mem_usage=True,
-                output_attentions=True,
-                output_hidden_states=True,
-                trust_remote_code=True
-            )
-            return True, "✅ Model loaded"
-        except Exception as e:
-            return False, f"❌ Failed to load model: {str(e)}"
-    return True, "✅ Model ready"
-@spaces.GPU(duration=120, enable_queue=True)
 def load_model():
     """Load Apertus model with HuggingFace token from environment"""
     global model, tokenizer, model_loaded
@@ -174,15 +143,12 @@ def load_model():
         print(f"📋 Full traceback:\n{traceback.format_exc()}")
         return f"❌ Failed to load model: {str(e)}\n💡 Check your token and model access permissions."
-@spaces.GPU(duration=60, enable_queue=True)
 def chat_with_apertus(message, max_tokens=300):
     """Simple chat function"""
     global model, tokenizer
-    # Ensure model is loaded
-    success, msg = ensure_model_loaded()
-    if not success:
-        return msg
     try:
         formatted_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
@@ -221,15 +187,12 @@ You are Apertus, a helpful Swiss AI assistant. You are transparent, multilingual
     except Exception as e:
         return f"❌ Error: {str(e)}"
-@spaces.GPU(duration=30)
 def analyze_attention(text, layer=15):
     """Analyze attention patterns"""
     global model, tokenizer
-    # Ensure model is loaded for ZeroGPU
-    success, msg = ensure_model_loaded()
-    if not success:
-        return None, msg
     try:
         inputs = tokenizer(text, return_tensors="pt")
@@ -278,15 +241,12 @@ def analyze_attention(text, layer=15):
     except Exception as e:
         return None, f"❌ Error analyzing attention: {str(e)}"
-@spaces.GPU(duration=30)
 def analyze_token_predictions(text):
     """Analyze next token predictions"""
     global model, tokenizer
-    # Ensure model is loaded for ZeroGPU
-    success, msg = ensure_model_loaded()
-    if not success:
-        return None, msg
     try:
         inputs = tokenizer(text, return_tensors="pt")
@@ -334,15 +294,12 @@ def analyze_token_predictions(text):
     except Exception as e:
         return None, f"❌ Error analyzing predictions: {str(e)}"
-@spaces.GPU(duration=30)
 def analyze_layer_evolution(text):
     """Analyze how representations evolve through layers"""
     global model, tokenizer
-    # Ensure model is loaded for ZeroGPU
-    success, msg = ensure_model_loaded()
-    if not success:
-        return None, msg
     try:
         inputs = tokenizer(text, return_tensors="pt")
@@ -405,15 +362,12 @@ def analyze_layer_evolution(text):
     except Exception as e:
         return None, f"❌ Error analyzing layer evolution: {str(e)}"
-@spaces.GPU(duration=30)
 def analyze_weights(layer_num, layer_type):
     """Analyze weight distribution with research-based metrics"""
     global model
-    # Ensure model is loaded for ZeroGPU
-    success, msg = ensure_model_loaded()
-    if not success:
-        return None, msg
     try:
         selected_layer = f"model.layers.{layer_num}.{layer_type}"
@@ -856,15 +810,12 @@ def goldfish_loss_function(logits, targets, k=0.1, temperature=1.0):
     else:
         return masked_loss.sum()
-@spaces.GPU(duration=30)
 def analyze_memorization_patterns(text, k_values=[0.0, 0.1, 0.2, 0.3]):
     """Analyze how Goldfish Loss affects memorization"""
     global model, tokenizer
-    # Ensure model is loaded for ZeroGPU
-    success, msg = ensure_model_loaded()
-    if not success:
-        return None, msg
     try:
         inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
@@ -1163,15 +1114,12 @@ def simulate_optimizer_comparison(baseline_loss, num_steps):
 # 🧠 DECISION PROCESS & GERMAN LANGUAGE ANALYSIS
 # =============================================================================
-@spaces.GPU(duration=30)
 def analyze_decision_process(text, max_steps=10):
     """Step-by-step decision process like CLI script"""
     global model, tokenizer
-    # Ensure model is loaded for ZeroGPU
-    success, msg = ensure_model_loaded()
-    if not success:
-        return None, msg
     try:
         inputs = tokenizer(text, return_tensors="pt", max_length=256, truncation=True)
@@ -1299,15 +1247,12 @@ def analyze_decision_process(text, max_steps=10):
     except Exception as e:
         return None, f"❌ Error analyzing decision process: {str(e)}"
-@spaces.GPU(duration=30)
 def analyze_german_compounds(text_input=""):
     """Analyze German compound words with multi-tokenizer comparison"""
     global model, tokenizer
-    # Ensure model is loaded for ZeroGPU
-    success, msg = ensure_model_loaded()
-    if not success:
-        return None, msg
     # Swiss/German compound examples if no input
     if not text_input.strip():

 import warnings
 import os
 import time  # For timing measurements
+# import spaces  # Disabled - CPU-only version for persistent model
 # Advanced ML components (2024 State-of-the-Art)
 try:
 HF_TOKEN = os.environ.get('HF_TOKEN', None)
 print(f"🔐 HF_TOKEN available: {bool(HF_TOKEN)}")
 def load_model():
     """Load Apertus model with HuggingFace token from environment"""
     global model, tokenizer, model_loaded
         print(f"📋 Full traceback:\n{traceback.format_exc()}")
         return f"❌ Failed to load model: {str(e)}\n💡 Check your token and model access permissions."
 def chat_with_apertus(message, max_tokens=300):
     """Simple chat function"""
     global model, tokenizer
+    if model is None or tokenizer is None:
+        return "❌ Model not loaded. Please wait for initialization or refresh the page."
     try:
         formatted_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
     except Exception as e:
         return f"❌ Error: {str(e)}"
 def analyze_attention(text, layer=15):
     """Analyze attention patterns"""
     global model, tokenizer
+    if model is None or tokenizer is None:
+        return None, "❌ Model not loaded. Please wait for initialization or refresh the page."
     try:
         inputs = tokenizer(text, return_tensors="pt")
     except Exception as e:
         return None, f"❌ Error analyzing attention: {str(e)}"
 def analyze_token_predictions(text):
     """Analyze next token predictions"""
     global model, tokenizer
+    if model is None or tokenizer is None:
+        return None, "❌ Model not loaded. Please wait for initialization or refresh the page."
     try:
         inputs = tokenizer(text, return_tensors="pt")
     except Exception as e:
         return None, f"❌ Error analyzing predictions: {str(e)}"
 def analyze_layer_evolution(text):
     """Analyze how representations evolve through layers"""
     global model, tokenizer
+    if model is None or tokenizer is None:
+        return None, "❌ Model not loaded. Please wait for initialization or refresh the page."
     try:
         inputs = tokenizer(text, return_tensors="pt")
     except Exception as e:
         return None, f"❌ Error analyzing layer evolution: {str(e)}"
 def analyze_weights(layer_num, layer_type):
     """Analyze weight distribution with research-based metrics"""
     global model
+    if model is None:
+        return None, "❌ Model not loaded. Please wait for initialization or refresh the page."
     try:
         selected_layer = f"model.layers.{layer_num}.{layer_type}"
     else:
         return masked_loss.sum()
 def analyze_memorization_patterns(text, k_values=[0.0, 0.1, 0.2, 0.3]):
     """Analyze how Goldfish Loss affects memorization"""
     global model, tokenizer
+    if model is None or tokenizer is None:
+        return None, "❌ Model not loaded. Please wait for initialization or refresh the page."
     try:
         inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
 # 🧠 DECISION PROCESS & GERMAN LANGUAGE ANALYSIS
 # =============================================================================
 def analyze_decision_process(text, max_steps=10):
     """Step-by-step decision process like CLI script"""
     global model, tokenizer
+    if model is None or tokenizer is None:
+        return None, "❌ Model not loaded. Please wait for initialization or refresh the page."
     try:
         inputs = tokenizer(text, return_tensors="pt", max_length=256, truncation=True)
     except Exception as e:
         return None, f"❌ Error analyzing decision process: {str(e)}"
 def analyze_german_compounds(text_input=""):
     """Analyze German compound words with multi-tokenizer comparison"""
     global model, tokenizer
+    if model is None or tokenizer is None:
+        return None, "❌ Model not loaded. Please wait for initialization or refresh the page."
     # Swiss/German compound examples if no input
     if not text_input.strip():