File size: 7,972 Bytes
ff9fbcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/usr/bin/env python3
"""
Compare hourly MAE: baseline vs hour-aware adaptive selection.

Loads both forecasts and compares MAE per hour-of-day to measure improvement.
"""

import polars as pl
import numpy as np
from pathlib import Path
from datetime import datetime

# Paths
PROJECT_ROOT = Path(__file__).parent.parent
BASELINE_FORECAST = PROJECT_ROOT / 'results' / 'september_2025_forecast_full_14day.parquet'
HOUR_AWARE_FORECAST = PROJECT_ROOT / 'results' / 'september_2025_forecast_hour_aware_ACTUAL.parquet'
BASELINE_SUMMARY = PROJECT_ROOT / 'results' / 'september_2025_hourly_summary.csv'
OUTPUT_PATH = PROJECT_ROOT / 'results' / 'hourly_mae_comparison.csv'

def load_actuals():
    """Load actuals from HuggingFace dataset."""
    print('[INFO] Loading actuals from HuggingFace dataset...')
    from datasets import load_dataset
    import os

    dataset = load_dataset('evgueni-p/fbmc-features-24month', split='train', token=os.environ.get('HF_TOKEN'))
    df_actuals_full = pl.from_arrow(dataset.data.table)

    # Filter to September 2-15, 2025
    forecast_start = datetime(2025, 9, 2)
    forecast_end = datetime(2025, 9, 16)

    df_actuals = df_actuals_full.filter(
        (pl.col('timestamp') >= forecast_start) &
        (pl.col('timestamp') < forecast_end)
    )

    print(f'[INFO] Actuals filtered: {df_actuals.shape[0]} hours')
    return df_actuals


def compute_hourly_mae(df_forecast, df_actuals, label):
    """Compute MAE per hour-of-day for all borders."""
    print(f'[INFO] Computing hourly MAE for {label}...')

    # Extract border names
    # For hour-aware, use _adaptive column; for baseline use _median
    if '_adaptive' in df_forecast.columns[0] or any(c.endswith('_adaptive') for c in df_forecast.columns):
        forecast_cols = [col for col in df_forecast.columns if col.endswith('_adaptive')]
        border_names = [col.replace('_adaptive', '') for col in forecast_cols]
        col_suffix = '_adaptive'
    else:
        forecast_cols = [col for col in df_forecast.columns if col.endswith('_median')]
        border_names = [col.replace('_median', '') for col in forecast_cols]
        col_suffix = '_median'

    print(f'[INFO] Using forecast column suffix: {col_suffix}')

    hourly_results = []

    for border in border_names:
        forecast_col = f'{border}{col_suffix}'
        actual_col = f'target_border_{border}'

        if actual_col not in df_actuals.columns:
            continue

        # Create unified dataframe
        df_border = df_forecast.select(['timestamp', forecast_col]).join(
            df_actuals.select(['timestamp', actual_col]),
            on='timestamp',
            how='inner'
        )

        # Add hour-of-day
        df_border = df_border.with_columns([
            pl.col('timestamp').dt.hour().alias('hour')
        ])

        # Compute MAE per hour
        for hour in range(24):
            hour_df = df_border.filter(pl.col('hour') == hour)

            if len(hour_df) == 0:
                continue

            mae = (hour_df[forecast_col] - hour_df[actual_col]).abs().mean()

            hourly_results.append({
                'border': border,
                'hour': hour,
                'mae': mae,
                'n_hours': len(hour_df),
                'version': label
            })

    return pl.DataFrame(hourly_results)


def compare_results(df_baseline_hourly, df_hour_aware_hourly):
    """Compare baseline vs hour-aware hourly MAE."""
    print('\n' + '='*80)
    print('HOURLY MAE COMPARISON: Baseline vs Hour-Aware Adaptive Selection')
    print('='*80)

    # Aggregate across borders for each version
    baseline_stats = df_baseline_hourly.group_by('hour').agg([
        pl.col('mae').mean().alias('baseline_mae'),
        pl.col('mae').median().alias('baseline_median_mae'),
        pl.col('border').count().alias('n_borders')
    ]).sort('hour')

    hour_aware_stats = df_hour_aware_hourly.group_by('hour').agg([
        pl.col('mae').mean().alias('hour_aware_mae'),
        pl.col('mae').median().alias('hour_aware_median_mae')
    ]).sort('hour')

    # Join for comparison
    comparison = baseline_stats.join(hour_aware_stats, on='hour', how='inner')

    # Calculate improvement
    comparison = comparison.with_columns([
        (pl.col('baseline_mae') - pl.col('hour_aware_mae')).alias('mae_reduction'),
        ((pl.col('baseline_mae') - pl.col('hour_aware_mae')) / pl.col('baseline_mae') * 100).alias('improvement_pct')
    ])

    print('\n[INFO] Hour-by-Hour Comparison:')
    print(comparison)

    # Overall statistics
    overall_baseline = df_baseline_hourly['mae'].mean()
    overall_hour_aware = df_hour_aware_hourly['mae'].mean()
    overall_improvement = (overall_baseline - overall_hour_aware) / overall_baseline * 100

    print(f'\n[INFO] Overall MAE:')
    print(f'  Baseline:    {overall_baseline:.2f} MW')
    print(f'  Hour-Aware:  {overall_hour_aware:.2f} MW')
    print(f'  Improvement: {overall_improvement:.2f}%')

    # Problem hours analysis (15-21)
    problem_hours = [15, 16, 17, 18, 19, 20, 21]
    problem_baseline = comparison.filter(pl.col('hour').is_in(problem_hours))['baseline_mae'].mean()
    problem_hour_aware = comparison.filter(pl.col('hour').is_in(problem_hours))['hour_aware_mae'].mean()
    problem_improvement = (problem_baseline - problem_hour_aware) / problem_baseline * 100

    print(f'\n[INFO] Problem Hours (15-21) MAE:')
    print(f'  Baseline:    {problem_baseline:.2f} MW')
    print(f'  Hour-Aware:  {problem_hour_aware:.2f} MW')
    print(f'  Improvement: {problem_improvement:.2f}%')

    # Best/worst hours
    print('\n[INFO] Top 5 Most Improved Hours:')
    best_improvements = comparison.sort('improvement_pct', descending=True).head(5)
    print(best_improvements.select(['hour', 'baseline_mae', 'hour_aware_mae', 'improvement_pct']))

    print('\n[INFO] Top 5 Least Improved (or Degraded) Hours:')
    worst_improvements = comparison.sort('improvement_pct').head(5)
    print(worst_improvements.select(['hour', 'baseline_mae', 'hour_aware_mae', 'improvement_pct']))

    # Success criteria check
    print('\n' + '='*80)
    if overall_improvement >= 5.0:
        print(f'[SUCCESS] Hour-aware selection achieved {overall_improvement:.1f}% improvement (target: 5-10%)')
        print('[RECOMMENDATION] Proceed to Phase 4: AutoGluon fine-tuning with sample weighting')
    elif overall_improvement >= 3.0:
        print(f'[PARTIAL SUCCESS] {overall_improvement:.1f}% improvement - marginal gain')
        print('[RECOMMENDATION] Consider proceeding to fine-tuning, may provide larger gains')
    else:
        print(f'[INSUFFICIENT] Only {overall_improvement:.1f}% improvement (target: 5-10%)')
        print('[RECOMMENDATION] Skip to Phase 4: AutoGluon fine-tuning with sample weighting')
    print('='*80)

    return comparison


def main():
    """Main comparison workflow."""
    print('[START] Hourly MAE Comparison Analysis')
    print(f'[INFO] Baseline forecast: {BASELINE_FORECAST}')
    print(f'[INFO] Hour-aware forecast: {HOUR_AWARE_FORECAST}')

    # Load data
    df_actuals = load_actuals()

    print(f'\n[INFO] Loading baseline forecast...')
    df_baseline = pl.read_parquet(BASELINE_FORECAST)
    print(f'[INFO] Baseline shape: {df_baseline.shape}')

    print(f'\n[INFO] Loading hour-aware forecast...')
    df_hour_aware = pl.read_parquet(HOUR_AWARE_FORECAST)
    print(f'[INFO] Hour-aware shape: {df_hour_aware.shape}')

    # Compute hourly MAE for both
    df_baseline_hourly = compute_hourly_mae(df_baseline, df_actuals, 'baseline')
    df_hour_aware_hourly = compute_hourly_mae(df_hour_aware, df_actuals, 'hour_aware')

    # Compare results
    comparison = compare_results(df_baseline_hourly, df_hour_aware_hourly)

    # Save detailed comparison
    comparison.write_csv(OUTPUT_PATH)
    print(f'\n[INFO] Detailed comparison saved to: {OUTPUT_PATH}')

    print('\n[SUCCESS] Hourly MAE comparison complete!')


if __name__ == '__main__':
    main()