Spaces:
Sleeping
Sleeping
Evgueni Poloukarov
revert: remove hour-aware adaptive quantile selection (61% MAE degradation)
ff9fbcf
| #!/usr/bin/env python3 | |
| """ | |
| Test HF Space with expanded context window (128h -> 2160h). | |
| Validates VRAM usage and forecast variation patterns. | |
| """ | |
| import os | |
| import sys | |
| from pathlib import Path | |
| import polars as pl | |
| import numpy as np | |
| from gradio_client import Client | |
| # Get HF token from environment | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| if not HF_TOKEN: | |
| print("[ERROR] HF_TOKEN environment variable not set") | |
| sys.exit(1) | |
| def test_hf_space_smoke(): | |
| """Run smoke test on HF Space and validate results""" | |
| print("=" * 80) | |
| print("HF SPACE SMOKE TEST: Context Window Expansion (128h -> 2160h)") | |
| print("=" * 80) | |
| # Initialize client | |
| print("\nConnecting to HF Space...") | |
| client = Client("evgueni-p/fbmc-chronos2", hf_token=HF_TOKEN) | |
| print("[OK] Connected to evgueni-p/fbmc-chronos2") | |
| # Test parameters | |
| run_date = "2024-09-30" | |
| test_border = "AT_DE" | |
| forecast_type = "smoke_test" # 7 days, 1 border | |
| print(f"\nTest configuration:") | |
| print(f" Border: {test_border}") | |
| print(f" Run date: {run_date}") | |
| print(f" Forecast type: {forecast_type}") | |
| print(f" Expected context: 2160 hours (90 days)") | |
| print(f" Expected batch_size: 48") | |
| # Run forecast | |
| print(f"\nRunning forecast via API...") | |
| try: | |
| result = client.predict( | |
| run_date_str=run_date, | |
| forecast_type=forecast_type, | |
| api_name="/forecast_api" | |
| ) | |
| print(f"[OK] Forecast completed") | |
| print(f" Result file: {result}") | |
| except Exception as e: | |
| print(f"[FAIL] API call failed: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| # Download and validate forecast | |
| print(f"\nValidating forecast results...") | |
| if not os.path.exists(result): | |
| print(f"[FAIL] Forecast file not found: {result}") | |
| return False | |
| # Load forecast | |
| df = pl.read_parquet(result) | |
| print(f"[OK] Loaded forecast file") | |
| print(f" Shape: {df.shape}") | |
| print(f" Columns: {df.columns}") | |
| # Expected: 168 hours (7 days), 4 columns (timestamp + median + q10 + q90) | |
| expected_hours = 168 | |
| if len(df) != expected_hours: | |
| print(f"[FAIL] Forecast length mismatch:") | |
| print(f" Expected: {expected_hours} hours") | |
| print(f" Got: {len(df)} hours") | |
| return False | |
| print(f"[OK] Forecast length: {len(df)} hours (correct)") | |
| # Extract median forecast for AT_DE | |
| median_col = f"{test_border}_median" | |
| if median_col not in df.columns: | |
| print(f"[FAIL] Column {median_col} not found in forecast") | |
| return False | |
| median_forecast = df[median_col].to_numpy() | |
| # Check variation statistics | |
| mean_val = np.mean(median_forecast) | |
| std_val = np.std(median_forecast) | |
| min_val = np.min(median_forecast) | |
| max_val = np.max(median_forecast) | |
| range_val = max_val - min_val | |
| print(f"\n[CHECK] Forecast statistics:") | |
| print(f" Mean: {mean_val:.2f} MW") | |
| print(f" Std Dev: {std_val:.2f} MW") | |
| print(f" Min: {min_val:.2f} MW") | |
| print(f" Max: {max_val:.2f} MW") | |
| print(f" Range: {range_val:.2f} MW") | |
| # Validation 1: Check for variation | |
| if std_val < 1.0: | |
| print(f"\n[WARNING] Low variation detected (std={std_val:.4f} MW)") | |
| unique_values = len(np.unique(median_forecast)) | |
| print(f" Unique values in forecast: {unique_values}/{len(median_forecast)}") | |
| if unique_values < 5: | |
| print(f"\n[FAIL] Forecast appears constant (only {unique_values} unique values)") | |
| print(f" First 24 values: {median_forecast[:24]}") | |
| return False | |
| else: | |
| print(f"\n[OK] Forecast shows variation (std={std_val:.2f} MW)") | |
| # Validation 2: Check unique values count | |
| unique_values = len(np.unique(median_forecast)) | |
| print(f"\n[CHECK] Unique values: {unique_values}/{len(median_forecast)}") | |
| if unique_values < 50: | |
| print(f"[WARNING] Low diversity (expected >50 unique values)") | |
| else: | |
| print(f"[OK] Good diversity in forecast") | |
| # Validation 3: Check data type (should be integers now) | |
| if median_col in df.columns: | |
| dtype = df.schema[median_col] | |
| print(f"\n[CHECK] Data type: {dtype}") | |
| if "Int" in str(dtype): | |
| print(f"[OK] MW values converted to integers") | |
| else: | |
| print(f"[INFO] MW values still float (expected Int32)") | |
| # Display first 48 hours | |
| print(f"\n[CHECK] First 48 hours of median forecast:") | |
| for i in range(min(48, len(median_forecast))): | |
| if i % 12 == 0: | |
| print(f" Hour {i:3d}-{i+11:3d}: ", end="") | |
| print(f"{median_forecast[i]:7.0f} ", end="") | |
| if (i + 1) % 12 == 0: | |
| print() | |
| print() | |
| # Summary | |
| print("\n" + "=" * 80) | |
| print("SMOKE TEST VALIDATION SUMMARY") | |
| print("=" * 80) | |
| checks_passed = [] | |
| checks_failed = [] | |
| # Check 1: Length | |
| if len(df) == expected_hours: | |
| checks_passed.append("Forecast length (168 hours)") | |
| else: | |
| checks_failed.append(f"Forecast length ({len(df)} != {expected_hours})") | |
| # Check 2: Variation | |
| if std_val >= 1.0: | |
| checks_passed.append(f"Variation (std={std_val:.2f} MW)") | |
| else: | |
| checks_failed.append(f"Low variation (std={std_val:.4f} MW)") | |
| # Check 3: Diversity | |
| if unique_values >= 50: | |
| checks_passed.append(f"Diversity ({unique_values} unique values)") | |
| else: | |
| checks_failed.append(f"Low diversity ({unique_values} unique values)") | |
| print(f"\n[PASSED] {len(checks_passed)} checks:") | |
| for check in checks_passed: | |
| print(f" + {check}") | |
| if checks_failed: | |
| print(f"\n[FAILED] {len(checks_failed)} checks:") | |
| for check in checks_failed: | |
| print(f" - {check}") | |
| # Overall result | |
| if len(checks_failed) == 0: | |
| print("\n" + "=" * 80) | |
| print("[SUCCESS] ALL CHECKS PASSED - Ready for full 38-border evaluation") | |
| print("=" * 80) | |
| print("\nNext steps:") | |
| print("1. Check HF Space logs for VRAM usage (should be ~76% = 36.6 GB / 48 GB)") | |
| print("2. Run full 38-border evaluation") | |
| print("3. Compare to Session 12 baseline (15.92 MW D+1 MAE)") | |
| return True | |
| else: | |
| print("\n" + "=" * 80) | |
| print("[PARTIAL SUCCESS] Some checks failed - investigate before full evaluation") | |
| print("=" * 80) | |
| return False | |
| if __name__ == "__main__": | |
| success = test_hf_space_smoke() | |
| sys.exit(0 if success else 1) | |