Spaces:
Sleeping
Sleeping
File size: 11,686 Bytes
a321b61 67808ce a321b61 67808ce a321b61 b2daca7 a321b61 b2daca7 67808ce b2daca7 a321b61 b2daca7 a321b61 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 |
#!/usr/bin/env python3
"""
October 2024 Evaluation - Multivariate Chronos-2
Run date: 2024-09-30 (forecast Oct 1-14, 2024)
Compares 38-border × 14-day forecast against actuals
Calculates D+1 through D+14 MAE for each border
"""
import sys
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
import os
import time
import numpy as np
import polars as pl
from datetime import datetime, timedelta
from pathlib import Path
from dotenv import load_dotenv
from gradio_client import Client
load_dotenv()
def main():
print("="*70)
print("OCTOBER 2024 MULTIVARIATE CHRONOS-2 EVALUATION")
print("="*70)
total_start = time.time()
# Step 1: Connect to HF Space
print("\n[1/6] Connecting to HuggingFace Space...")
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
raise ValueError("HF_TOKEN not found in environment")
client = Client("evgueni-p/fbmc-chronos2", hf_token=hf_token)
print("[OK] Connected to HF Space")
# Step 2: Run full 14-day forecast for Oct 1-14, 2024
print("\n[2/6] Running full 38-border forecast via HF Space API...")
print(" Run date: 2024-09-30")
print(" Forecast period: Oct 1-14, 2024 (336 hours)")
print(" This may take 5-10 minutes...")
forecast_start_time = time.time()
result_file = client.predict(
"2024-09-30", # run_date
"full_14day", # forecast_type
)
forecast_time = time.time() - forecast_start_time
print(f"[OK] Forecast complete in {forecast_time/60:.2f} minutes")
print(f" Result file: {result_file}")
# Step 3: Load forecast results
print("\n[3/6] Loading forecast results...")
forecast_df = pl.read_parquet(result_file)
print(f"[OK] Loaded forecast with shape: {forecast_df.shape}")
print(f" Columns: {len(forecast_df.columns)} (timestamp + {len(forecast_df.columns)-1} forecast columns)")
# Identify border columns (median forecasts)
median_cols = [col for col in forecast_df.columns if col.endswith('_median')]
borders = [col.replace('_median', '') for col in median_cols]
print(f"[OK] Found {len(borders)} borders")
# Step 4: Load actuals from local dataset
print("\n[4/6] Loading actual values from local dataset...")
local_data_path = Path('data/processed/features_unified_24month.parquet')
if not local_data_path.exists():
print(f"[ERROR] Local dataset not found at: {local_data_path}")
sys.exit(1)
df = pl.read_parquet(local_data_path)
print(f"[OK] Loaded dataset: {len(df)} rows")
print(f" Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
# Extract October 1-14, 2024 actuals
oct_start = datetime(2024, 10, 1, 0, 0, 0)
oct_end = datetime(2024, 10, 14, 23, 0, 0)
actual_df = df.filter(
(pl.col('timestamp') >= oct_start) &
(pl.col('timestamp') <= oct_end)
)
if len(actual_df) == 0:
print("[ERROR] No actual data found for October 2024!")
print(" Dataset may not contain October 2024 data.")
print(" Available date range in dataset:")
print(f" {df['timestamp'].min()} to {df['timestamp'].max()}")
sys.exit(1)
print(f"[OK] Extracted {len(actual_df)} hours of actual values")
# Step 5: Calculate metrics for each border
print(f"\n[5/6] Calculating MAE metrics for {len(borders)} borders...")
print(" Progress:")
results = []
for i, border in enumerate(borders, 1):
# Get forecast for this border (median)
forecast_col = f"{border}_median"
if forecast_col not in forecast_df.columns:
print(f" [{i:2d}/{len(borders)}] {border:15s} - SKIPPED (no forecast)")
continue
# Get actual values for this border
target_col = f'target_border_{border}'
if target_col not in actual_df.columns:
print(f" [{i:2d}/{len(borders)}] {border:15s} - SKIPPED (no actuals)")
continue
# Merge forecast with actuals on timestamp
merged = forecast_df.select(['timestamp', forecast_col]).join(
actual_df.select(['timestamp', target_col]),
on='timestamp',
how='left'
)
# Calculate overall MAE (all 336 hours)
valid_data = merged.filter(
pl.col(forecast_col).is_not_null() &
pl.col(target_col).is_not_null()
)
if len(valid_data) == 0:
print(f" [{i:2d}/{len(borders)}] {border:15s} - SKIPPED (no valid data)")
continue
# Validation: Check for expected 336 hours (14 days × 24 hours)
if len(valid_data) != 336:
print(f" [{i:2d}/{len(borders)}] {border:15s} - WARNING: Only {len(valid_data)}/336 hours (missing {336-len(valid_data)} hours)")
# Calculate overall metrics
mae = (valid_data[forecast_col] - valid_data[target_col]).abs().mean()
rmse = ((valid_data[forecast_col] - valid_data[target_col])**2).mean()**0.5
# Validation: Flag suspicious MAE values
if mae < 0.001:
print(f" [{i:2d}/{len(borders)}] {border:15s} - WARNING: Suspiciously low MAE = {mae:.2e} MW (possible data leakage)")
# Calculate per-day MAE (D+1 through D+14)
per_day_mae = []
for day in range(1, 15):
day_start = oct_start + timedelta(days=day-1)
day_end = day_start + timedelta(days=1) - timedelta(hours=1)
day_data = valid_data.filter(
(pl.col('timestamp') >= day_start) &
(pl.col('timestamp') <= day_end)
)
if len(day_data) > 0:
day_mae = (day_data[forecast_col] - day_data[target_col]).abs().mean()
per_day_mae.append(day_mae)
else:
per_day_mae.append(np.nan)
# Build results dict with all 14 days
result_dict = {
'border': border,
'mae_overall': mae,
'rmse_overall': rmse,
'n_hours': len(valid_data),
}
# Add MAE for each day (D+1 through D+14)
for day_idx in range(14):
day_num = day_idx + 1
result_dict[f'mae_d{day_num}'] = per_day_mae[day_idx] if len(per_day_mae) > day_idx else np.nan
# Validation: Check for constant forecasts (same MAE across all days)
valid_mae_values = [m for m in per_day_mae if not np.isnan(m)]
if len(valid_mae_values) >= 5: # Need at least 5 days to check
mae_std = np.std(valid_mae_values)
mae_mean = np.mean(valid_mae_values)
if mae_std < 0.01 and mae_mean > 0: # Essentially zero variation but non-zero MAE
print(f" [{i:2d}/{len(borders)}] {border:15s} - WARNING: Constant forecast detected (MAE std={mae_std:.2e})")
results.append(result_dict)
# Status indicator
d1_mae = per_day_mae[0] if len(per_day_mae) > 0 else np.inf
status = "[OK]" if d1_mae <= 150 else "[!]"
print(f" [{i:2d}/{len(borders)}] {border:15s} - D+1 MAE: {d1_mae:6.1f} MW {status}")
# Step 6: Summary statistics
print("\n[6/6] Generating summary report...")
if not results:
print("[ERROR] No results to summarize")
sys.exit(1)
results_df = pl.DataFrame(results)
# Calculate summary statistics
mean_mae_d1 = results_df['mae_d1'].mean()
median_mae_d1 = results_df['mae_d1'].median()
min_mae_d1 = results_df['mae_d1'].min()
max_mae_d1 = results_df['mae_d1'].max()
# Save results to CSV
output_file = Path('results/october_2024_multivariate.csv')
output_file.parent.mkdir(exist_ok=True)
results_df.write_csv(output_file)
print(f"[OK] Results saved to: {output_file}")
# Generate summary report
print("\n" + "="*70)
print("EVALUATION RESULTS SUMMARY - OCTOBER 2024")
print("="*70)
print(f"\nBorders evaluated: {len(results)}/{len(borders)}")
print(f"Total forecast time: {forecast_time/60:.2f} minutes")
print(f"Total evaluation time: {(time.time() - total_start)/60:.2f} minutes")
print(f"\n*** D+1 MAE (PRIMARY METRIC) ***")
print(f"Mean: {mean_mae_d1:.2f} MW (Target: [<=]134 MW)")
print(f"Median: {median_mae_d1:.2f} MW")
print(f"Min: {min_mae_d1:.2f} MW")
print(f"Max: {max_mae_d1:.2f} MW")
# Target achievement
below_target = (results_df['mae_d1'] <= 150).sum()
print(f"\n*** TARGET ACHIEVEMENT ***")
print(f"Borders with D+1 MAE [<=]150 MW: {below_target}/{len(results)} ({below_target/len(results)*100:.1f}%)")
# Best and worst performers
print(f"\n*** TOP 5 BEST PERFORMERS (Lowest D+1 MAE) ***")
best = results_df.sort('mae_d1').head(5)
for row in best.iter_rows(named=True):
print(f" {row['border']:15s}: D+1 MAE={row['mae_d1']:6.1f} MW, Overall MAE={row['mae_overall']:6.1f} MW")
print(f"\n*** TOP 5 WORST PERFORMERS (Highest D+1 MAE) ***")
worst = results_df.sort('mae_d1', descending=True).head(5)
for row in worst.iter_rows(named=True):
print(f" {row['border']:15s}: D+1 MAE={row['mae_d1']:6.1f} MW, Overall MAE={row['mae_overall']:6.1f} MW")
# MAE degradation over forecast horizon
print(f"\n*** MAE DEGRADATION OVER FORECAST HORIZON (ALL 14 DAYS) ***")
for day in range(1, 15):
col_name = f'mae_d{day}'
mean_mae_day = results_df[col_name].mean()
delta = mean_mae_day - mean_mae_d1 if day > 1 else 0
delta_pct = (delta / mean_mae_d1 * 100) if day > 1 and mean_mae_d1 > 0 else 0
if day == 1:
print(f"D+{day:2d}: {mean_mae_day:6.2f} MW (baseline)")
else:
print(f"D+{day:2d}: {mean_mae_day:6.2f} MW (+{delta:5.2f} MW, +{delta_pct:5.1f}%)")
# Final verdict
print("\n" + "="*70)
if mean_mae_d1 <= 134:
print("[OK] TARGET ACHIEVED! Mean D+1 MAE [<=]134 MW")
print(" Zero-shot multivariate forecasting successful!")
elif mean_mae_d1 <= 150:
print("[~] CLOSE TO TARGET. Mean D+1 MAE [<=]150 MW")
print(" Zero-shot baseline established. Fine-tuning recommended.")
else:
print(f"[!] TARGET NOT MET. Mean D+1 MAE: {mean_mae_d1:.2f} MW (Target: [<=]134 MW)")
print(" Fine-tuning strongly recommended for Phase 2")
print("="*70)
# Save summary report
report_file = Path('results/october_2024_evaluation_report.txt')
with open(report_file, 'w', encoding='utf-8', errors='replace') as f:
f.write("="*70 + "\n")
f.write("OCTOBER 2024 MULTIVARIATE CHRONOS-2 EVALUATION REPORT\n")
f.write("="*70 + "\n\n")
f.write(f"Run date: 2024-09-30\n")
f.write(f"Forecast period: Oct 1-14, 2024 (336 hours)\n")
f.write(f"Model: amazon/chronos-2 (multivariate, 615 features)\n")
f.write(f"Borders evaluated: {len(results)}/{len(borders)}\n")
f.write(f"Forecast time: {forecast_time/60:.2f} minutes\n\n")
f.write(f"D+1 MAE RESULTS:\n")
f.write(f" Mean: {mean_mae_d1:.2f} MW\n")
f.write(f" Median: {median_mae_d1:.2f} MW\n")
f.write(f" Min: {min_mae_d1:.2f} MW\n")
f.write(f" Max: {max_mae_d1:.2f} MW\n\n")
f.write(f"Target achievement: {below_target}/{len(results)} borders with MAE [<=]150 MW\n\n")
if mean_mae_d1 <= 134:
f.write("[OK] TARGET ACHIEVED!\n")
else:
f.write(f"[!] Target not met - Fine-tuning recommended\n")
print(f"\n[OK] Summary report saved to: {report_file}")
print(f"\nTotal evaluation time: {(time.time() - total_start)/60:.1f} minutes")
if __name__ == '__main__':
main()
|