Spaces:
Sleeping
Sleeping
Evgueni Poloukarov
revert: remove hour-aware adaptive quantile selection (61% MAE degradation)
ff9fbcf
| #!/usr/bin/env python3 | |
| """ | |
| Compare hourly MAE: baseline vs hour-aware adaptive selection. | |
| Loads both forecasts and compares MAE per hour-of-day to measure improvement. | |
| """ | |
| import polars as pl | |
| import numpy as np | |
| from pathlib import Path | |
| from datetime import datetime | |
| # Paths | |
| PROJECT_ROOT = Path(__file__).parent.parent | |
| BASELINE_FORECAST = PROJECT_ROOT / 'results' / 'september_2025_forecast_full_14day.parquet' | |
| HOUR_AWARE_FORECAST = PROJECT_ROOT / 'results' / 'september_2025_forecast_hour_aware_ACTUAL.parquet' | |
| BASELINE_SUMMARY = PROJECT_ROOT / 'results' / 'september_2025_hourly_summary.csv' | |
| OUTPUT_PATH = PROJECT_ROOT / 'results' / 'hourly_mae_comparison.csv' | |
| def load_actuals(): | |
| """Load actuals from HuggingFace dataset.""" | |
| print('[INFO] Loading actuals from HuggingFace dataset...') | |
| from datasets import load_dataset | |
| import os | |
| dataset = load_dataset('evgueni-p/fbmc-features-24month', split='train', token=os.environ.get('HF_TOKEN')) | |
| df_actuals_full = pl.from_arrow(dataset.data.table) | |
| # Filter to September 2-15, 2025 | |
| forecast_start = datetime(2025, 9, 2) | |
| forecast_end = datetime(2025, 9, 16) | |
| df_actuals = df_actuals_full.filter( | |
| (pl.col('timestamp') >= forecast_start) & | |
| (pl.col('timestamp') < forecast_end) | |
| ) | |
| print(f'[INFO] Actuals filtered: {df_actuals.shape[0]} hours') | |
| return df_actuals | |
| def compute_hourly_mae(df_forecast, df_actuals, label): | |
| """Compute MAE per hour-of-day for all borders.""" | |
| print(f'[INFO] Computing hourly MAE for {label}...') | |
| # Extract border names | |
| # For hour-aware, use _adaptive column; for baseline use _median | |
| if '_adaptive' in df_forecast.columns[0] or any(c.endswith('_adaptive') for c in df_forecast.columns): | |
| forecast_cols = [col for col in df_forecast.columns if col.endswith('_adaptive')] | |
| border_names = [col.replace('_adaptive', '') for col in forecast_cols] | |
| col_suffix = '_adaptive' | |
| else: | |
| forecast_cols = [col for col in df_forecast.columns if col.endswith('_median')] | |
| border_names = [col.replace('_median', '') for col in forecast_cols] | |
| col_suffix = '_median' | |
| print(f'[INFO] Using forecast column suffix: {col_suffix}') | |
| hourly_results = [] | |
| for border in border_names: | |
| forecast_col = f'{border}{col_suffix}' | |
| actual_col = f'target_border_{border}' | |
| if actual_col not in df_actuals.columns: | |
| continue | |
| # Create unified dataframe | |
| df_border = df_forecast.select(['timestamp', forecast_col]).join( | |
| df_actuals.select(['timestamp', actual_col]), | |
| on='timestamp', | |
| how='inner' | |
| ) | |
| # Add hour-of-day | |
| df_border = df_border.with_columns([ | |
| pl.col('timestamp').dt.hour().alias('hour') | |
| ]) | |
| # Compute MAE per hour | |
| for hour in range(24): | |
| hour_df = df_border.filter(pl.col('hour') == hour) | |
| if len(hour_df) == 0: | |
| continue | |
| mae = (hour_df[forecast_col] - hour_df[actual_col]).abs().mean() | |
| hourly_results.append({ | |
| 'border': border, | |
| 'hour': hour, | |
| 'mae': mae, | |
| 'n_hours': len(hour_df), | |
| 'version': label | |
| }) | |
| return pl.DataFrame(hourly_results) | |
| def compare_results(df_baseline_hourly, df_hour_aware_hourly): | |
| """Compare baseline vs hour-aware hourly MAE.""" | |
| print('\n' + '='*80) | |
| print('HOURLY MAE COMPARISON: Baseline vs Hour-Aware Adaptive Selection') | |
| print('='*80) | |
| # Aggregate across borders for each version | |
| baseline_stats = df_baseline_hourly.group_by('hour').agg([ | |
| pl.col('mae').mean().alias('baseline_mae'), | |
| pl.col('mae').median().alias('baseline_median_mae'), | |
| pl.col('border').count().alias('n_borders') | |
| ]).sort('hour') | |
| hour_aware_stats = df_hour_aware_hourly.group_by('hour').agg([ | |
| pl.col('mae').mean().alias('hour_aware_mae'), | |
| pl.col('mae').median().alias('hour_aware_median_mae') | |
| ]).sort('hour') | |
| # Join for comparison | |
| comparison = baseline_stats.join(hour_aware_stats, on='hour', how='inner') | |
| # Calculate improvement | |
| comparison = comparison.with_columns([ | |
| (pl.col('baseline_mae') - pl.col('hour_aware_mae')).alias('mae_reduction'), | |
| ((pl.col('baseline_mae') - pl.col('hour_aware_mae')) / pl.col('baseline_mae') * 100).alias('improvement_pct') | |
| ]) | |
| print('\n[INFO] Hour-by-Hour Comparison:') | |
| print(comparison) | |
| # Overall statistics | |
| overall_baseline = df_baseline_hourly['mae'].mean() | |
| overall_hour_aware = df_hour_aware_hourly['mae'].mean() | |
| overall_improvement = (overall_baseline - overall_hour_aware) / overall_baseline * 100 | |
| print(f'\n[INFO] Overall MAE:') | |
| print(f' Baseline: {overall_baseline:.2f} MW') | |
| print(f' Hour-Aware: {overall_hour_aware:.2f} MW') | |
| print(f' Improvement: {overall_improvement:.2f}%') | |
| # Problem hours analysis (15-21) | |
| problem_hours = [15, 16, 17, 18, 19, 20, 21] | |
| problem_baseline = comparison.filter(pl.col('hour').is_in(problem_hours))['baseline_mae'].mean() | |
| problem_hour_aware = comparison.filter(pl.col('hour').is_in(problem_hours))['hour_aware_mae'].mean() | |
| problem_improvement = (problem_baseline - problem_hour_aware) / problem_baseline * 100 | |
| print(f'\n[INFO] Problem Hours (15-21) MAE:') | |
| print(f' Baseline: {problem_baseline:.2f} MW') | |
| print(f' Hour-Aware: {problem_hour_aware:.2f} MW') | |
| print(f' Improvement: {problem_improvement:.2f}%') | |
| # Best/worst hours | |
| print('\n[INFO] Top 5 Most Improved Hours:') | |
| best_improvements = comparison.sort('improvement_pct', descending=True).head(5) | |
| print(best_improvements.select(['hour', 'baseline_mae', 'hour_aware_mae', 'improvement_pct'])) | |
| print('\n[INFO] Top 5 Least Improved (or Degraded) Hours:') | |
| worst_improvements = comparison.sort('improvement_pct').head(5) | |
| print(worst_improvements.select(['hour', 'baseline_mae', 'hour_aware_mae', 'improvement_pct'])) | |
| # Success criteria check | |
| print('\n' + '='*80) | |
| if overall_improvement >= 5.0: | |
| print(f'[SUCCESS] Hour-aware selection achieved {overall_improvement:.1f}% improvement (target: 5-10%)') | |
| print('[RECOMMENDATION] Proceed to Phase 4: AutoGluon fine-tuning with sample weighting') | |
| elif overall_improvement >= 3.0: | |
| print(f'[PARTIAL SUCCESS] {overall_improvement:.1f}% improvement - marginal gain') | |
| print('[RECOMMENDATION] Consider proceeding to fine-tuning, may provide larger gains') | |
| else: | |
| print(f'[INSUFFICIENT] Only {overall_improvement:.1f}% improvement (target: 5-10%)') | |
| print('[RECOMMENDATION] Skip to Phase 4: AutoGluon fine-tuning with sample weighting') | |
| print('='*80) | |
| return comparison | |
| def main(): | |
| """Main comparison workflow.""" | |
| print('[START] Hourly MAE Comparison Analysis') | |
| print(f'[INFO] Baseline forecast: {BASELINE_FORECAST}') | |
| print(f'[INFO] Hour-aware forecast: {HOUR_AWARE_FORECAST}') | |
| # Load data | |
| df_actuals = load_actuals() | |
| print(f'\n[INFO] Loading baseline forecast...') | |
| df_baseline = pl.read_parquet(BASELINE_FORECAST) | |
| print(f'[INFO] Baseline shape: {df_baseline.shape}') | |
| print(f'\n[INFO] Loading hour-aware forecast...') | |
| df_hour_aware = pl.read_parquet(HOUR_AWARE_FORECAST) | |
| print(f'[INFO] Hour-aware shape: {df_hour_aware.shape}') | |
| # Compute hourly MAE for both | |
| df_baseline_hourly = compute_hourly_mae(df_baseline, df_actuals, 'baseline') | |
| df_hour_aware_hourly = compute_hourly_mae(df_hour_aware, df_actuals, 'hour_aware') | |
| # Compare results | |
| comparison = compare_results(df_baseline_hourly, df_hour_aware_hourly) | |
| # Save detailed comparison | |
| comparison.write_csv(OUTPUT_PATH) | |
| print(f'\n[INFO] Detailed comparison saved to: {OUTPUT_PATH}') | |
| print('\n[SUCCESS] Hourly MAE comparison complete!') | |
| if __name__ == '__main__': | |
| main() | |