Spaces:
Sleeping
Sleeping
File size: 7,972 Bytes
ff9fbcf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
#!/usr/bin/env python3
"""
Compare hourly MAE: baseline vs hour-aware adaptive selection.
Loads both forecasts and compares MAE per hour-of-day to measure improvement.
"""
import polars as pl
import numpy as np
from pathlib import Path
from datetime import datetime
# Paths
PROJECT_ROOT = Path(__file__).parent.parent
BASELINE_FORECAST = PROJECT_ROOT / 'results' / 'september_2025_forecast_full_14day.parquet'
HOUR_AWARE_FORECAST = PROJECT_ROOT / 'results' / 'september_2025_forecast_hour_aware_ACTUAL.parquet'
BASELINE_SUMMARY = PROJECT_ROOT / 'results' / 'september_2025_hourly_summary.csv'
OUTPUT_PATH = PROJECT_ROOT / 'results' / 'hourly_mae_comparison.csv'
def load_actuals():
"""Load actuals from HuggingFace dataset."""
print('[INFO] Loading actuals from HuggingFace dataset...')
from datasets import load_dataset
import os
dataset = load_dataset('evgueni-p/fbmc-features-24month', split='train', token=os.environ.get('HF_TOKEN'))
df_actuals_full = pl.from_arrow(dataset.data.table)
# Filter to September 2-15, 2025
forecast_start = datetime(2025, 9, 2)
forecast_end = datetime(2025, 9, 16)
df_actuals = df_actuals_full.filter(
(pl.col('timestamp') >= forecast_start) &
(pl.col('timestamp') < forecast_end)
)
print(f'[INFO] Actuals filtered: {df_actuals.shape[0]} hours')
return df_actuals
def compute_hourly_mae(df_forecast, df_actuals, label):
"""Compute MAE per hour-of-day for all borders."""
print(f'[INFO] Computing hourly MAE for {label}...')
# Extract border names
# For hour-aware, use _adaptive column; for baseline use _median
if '_adaptive' in df_forecast.columns[0] or any(c.endswith('_adaptive') for c in df_forecast.columns):
forecast_cols = [col for col in df_forecast.columns if col.endswith('_adaptive')]
border_names = [col.replace('_adaptive', '') for col in forecast_cols]
col_suffix = '_adaptive'
else:
forecast_cols = [col for col in df_forecast.columns if col.endswith('_median')]
border_names = [col.replace('_median', '') for col in forecast_cols]
col_suffix = '_median'
print(f'[INFO] Using forecast column suffix: {col_suffix}')
hourly_results = []
for border in border_names:
forecast_col = f'{border}{col_suffix}'
actual_col = f'target_border_{border}'
if actual_col not in df_actuals.columns:
continue
# Create unified dataframe
df_border = df_forecast.select(['timestamp', forecast_col]).join(
df_actuals.select(['timestamp', actual_col]),
on='timestamp',
how='inner'
)
# Add hour-of-day
df_border = df_border.with_columns([
pl.col('timestamp').dt.hour().alias('hour')
])
# Compute MAE per hour
for hour in range(24):
hour_df = df_border.filter(pl.col('hour') == hour)
if len(hour_df) == 0:
continue
mae = (hour_df[forecast_col] - hour_df[actual_col]).abs().mean()
hourly_results.append({
'border': border,
'hour': hour,
'mae': mae,
'n_hours': len(hour_df),
'version': label
})
return pl.DataFrame(hourly_results)
def compare_results(df_baseline_hourly, df_hour_aware_hourly):
"""Compare baseline vs hour-aware hourly MAE."""
print('\n' + '='*80)
print('HOURLY MAE COMPARISON: Baseline vs Hour-Aware Adaptive Selection')
print('='*80)
# Aggregate across borders for each version
baseline_stats = df_baseline_hourly.group_by('hour').agg([
pl.col('mae').mean().alias('baseline_mae'),
pl.col('mae').median().alias('baseline_median_mae'),
pl.col('border').count().alias('n_borders')
]).sort('hour')
hour_aware_stats = df_hour_aware_hourly.group_by('hour').agg([
pl.col('mae').mean().alias('hour_aware_mae'),
pl.col('mae').median().alias('hour_aware_median_mae')
]).sort('hour')
# Join for comparison
comparison = baseline_stats.join(hour_aware_stats, on='hour', how='inner')
# Calculate improvement
comparison = comparison.with_columns([
(pl.col('baseline_mae') - pl.col('hour_aware_mae')).alias('mae_reduction'),
((pl.col('baseline_mae') - pl.col('hour_aware_mae')) / pl.col('baseline_mae') * 100).alias('improvement_pct')
])
print('\n[INFO] Hour-by-Hour Comparison:')
print(comparison)
# Overall statistics
overall_baseline = df_baseline_hourly['mae'].mean()
overall_hour_aware = df_hour_aware_hourly['mae'].mean()
overall_improvement = (overall_baseline - overall_hour_aware) / overall_baseline * 100
print(f'\n[INFO] Overall MAE:')
print(f' Baseline: {overall_baseline:.2f} MW')
print(f' Hour-Aware: {overall_hour_aware:.2f} MW')
print(f' Improvement: {overall_improvement:.2f}%')
# Problem hours analysis (15-21)
problem_hours = [15, 16, 17, 18, 19, 20, 21]
problem_baseline = comparison.filter(pl.col('hour').is_in(problem_hours))['baseline_mae'].mean()
problem_hour_aware = comparison.filter(pl.col('hour').is_in(problem_hours))['hour_aware_mae'].mean()
problem_improvement = (problem_baseline - problem_hour_aware) / problem_baseline * 100
print(f'\n[INFO] Problem Hours (15-21) MAE:')
print(f' Baseline: {problem_baseline:.2f} MW')
print(f' Hour-Aware: {problem_hour_aware:.2f} MW')
print(f' Improvement: {problem_improvement:.2f}%')
# Best/worst hours
print('\n[INFO] Top 5 Most Improved Hours:')
best_improvements = comparison.sort('improvement_pct', descending=True).head(5)
print(best_improvements.select(['hour', 'baseline_mae', 'hour_aware_mae', 'improvement_pct']))
print('\n[INFO] Top 5 Least Improved (or Degraded) Hours:')
worst_improvements = comparison.sort('improvement_pct').head(5)
print(worst_improvements.select(['hour', 'baseline_mae', 'hour_aware_mae', 'improvement_pct']))
# Success criteria check
print('\n' + '='*80)
if overall_improvement >= 5.0:
print(f'[SUCCESS] Hour-aware selection achieved {overall_improvement:.1f}% improvement (target: 5-10%)')
print('[RECOMMENDATION] Proceed to Phase 4: AutoGluon fine-tuning with sample weighting')
elif overall_improvement >= 3.0:
print(f'[PARTIAL SUCCESS] {overall_improvement:.1f}% improvement - marginal gain')
print('[RECOMMENDATION] Consider proceeding to fine-tuning, may provide larger gains')
else:
print(f'[INSUFFICIENT] Only {overall_improvement:.1f}% improvement (target: 5-10%)')
print('[RECOMMENDATION] Skip to Phase 4: AutoGluon fine-tuning with sample weighting')
print('='*80)
return comparison
def main():
"""Main comparison workflow."""
print('[START] Hourly MAE Comparison Analysis')
print(f'[INFO] Baseline forecast: {BASELINE_FORECAST}')
print(f'[INFO] Hour-aware forecast: {HOUR_AWARE_FORECAST}')
# Load data
df_actuals = load_actuals()
print(f'\n[INFO] Loading baseline forecast...')
df_baseline = pl.read_parquet(BASELINE_FORECAST)
print(f'[INFO] Baseline shape: {df_baseline.shape}')
print(f'\n[INFO] Loading hour-aware forecast...')
df_hour_aware = pl.read_parquet(HOUR_AWARE_FORECAST)
print(f'[INFO] Hour-aware shape: {df_hour_aware.shape}')
# Compute hourly MAE for both
df_baseline_hourly = compute_hourly_mae(df_baseline, df_actuals, 'baseline')
df_hour_aware_hourly = compute_hourly_mae(df_hour_aware, df_actuals, 'hour_aware')
# Compare results
comparison = compare_results(df_baseline_hourly, df_hour_aware_hourly)
# Save detailed comparison
comparison.write_csv(OUTPUT_PATH)
print(f'\n[INFO] Detailed comparison saved to: {OUTPUT_PATH}')
print('\n[SUCCESS] Hourly MAE comparison complete!')
if __name__ == '__main__':
main()
|