Spaces:
Sleeping
Sleeping
Evgueni Poloukarov
Claude
commited on
Commit
·
ff9fbcf
1
Parent(s):
31352ec
revert: remove hour-aware adaptive quantile selection (61% MAE degradation)
Browse filesExperiment Results:
- Hour-aware selection: 761 MW MAE
- Baseline (median): 472 MW MAE
- Degradation: -61% (WORSE, not better)
Root Cause:
- Mathematical error: q50 (median) ALWAYS minimizes MAE
- Using q75 when "uncertainty is high" increases MAE, not reduces it
- Post-hoc quantile selection cannot improve MAE
Key Learning:
- Cannot improve hourly accuracy by varying quantile selection
- Solution must be in TRAINING process (AutoGluon with sample weighting)
- Next: Fine-tune with sample_weight_column to prioritize problem hours
Reverted Changes:
- Removed _apply_adaptive_selection() method
- Removed call to adaptive selection in run_forecast()
- Back to baseline: always use q50 (median) for all hours
Co-Authored-By: Claude <[email protected]>
- .claude/settings.local.json +3 -1
- CUsersevgueprojectsfbmc_chronos2app.py +161 -0
- notebooks/__marimo__/october_2024_evaluation.html +0 -0
- scripts/compare_hourly_mae.py +208 -0
- scripts/test_hf_space_context_expansion.py +197 -0
- src/forecasting/chronos_inference.py +0 -115
- temp_analysis.txt +57 -0
- temp_final_summary.txt +148 -0
- temp_lta_analysis.txt +59 -0
- temp_raw_analysis.txt +5 -0
.claude/settings.local.json
CHANGED
|
@@ -51,7 +51,9 @@
|
|
| 51 |
"Bash(xargs ls:*)",
|
| 52 |
"Bash(pgrep:*)",
|
| 53 |
"Bash(test:*)",
|
| 54 |
-
"WebFetch(domain:jupyter-docker-stacks.readthedocs.io)"
|
|
|
|
|
|
|
| 55 |
],
|
| 56 |
"deny": [],
|
| 57 |
"ask": [],
|
|
|
|
| 51 |
"Bash(xargs ls:*)",
|
| 52 |
"Bash(pgrep:*)",
|
| 53 |
"Bash(test:*)",
|
| 54 |
+
"WebFetch(domain:jupyter-docker-stacks.readthedocs.io)",
|
| 55 |
+
"Bash(copy \"C:\\Users\\evgue\\AppData\\Local\\Temp\\gradio\\58600aa56842336ec8e6dd5758b4c36ada20b58f80a94df386830737cd693772\\forecast_2025-09-01_full_14day.parquet\" resultsseptember_2025_forecast_hour_aware.parquet)",
|
| 56 |
+
"Bash(cmd /c copy \"C:\\Users\\evgue\\AppData\\Local\\Temp\\gradio\\58600aa56842336ec8e6dd5758b4c36ada20b58f80a94df386830737cd693772\\forecast_2025-09-01_full_14day.parquet\" \"C:\\Users\\evgue\\projects\\fbmc_chronos2\\results\\september_2025_forecast_hour_aware.parquet\")"
|
| 57 |
],
|
| 58 |
"deny": [],
|
| 59 |
"ask": [],
|
CUsersevgueprojectsfbmc_chronos2app.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
FBMC Chronos-2 Forecasting API
|
| 4 |
+
HuggingFace Space Gradio Interface
|
| 5 |
+
Version: 1.0.2 (fixed memory fragmentation - expandable_segments)
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
# CRITICAL: Set PyTorch memory allocator config BEFORE any imports
|
| 9 |
+
# This prevents memory fragmentation issues that cause OOM even with sufficient free memory
|
| 10 |
+
# Must be set before torch is imported the first time (including via gradio or other dependencies)
|
| 11 |
+
import os
|
| 12 |
+
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
|
| 13 |
+
|
| 14 |
+
import sys
|
| 15 |
+
print(f"[STARTUP] Python version: {sys.version}", flush=True)
|
| 16 |
+
print(f"[STARTUP] Python path: {sys.path[:3]}", flush=True)
|
| 17 |
+
print(f"[STARTUP] PyTorch memory config: {os.environ.get('PYTORCH_CUDA_ALLOC_CONF')}", flush=True)
|
| 18 |
+
|
| 19 |
+
import gradio as gr
|
| 20 |
+
from datetime import datetime
|
| 21 |
+
|
| 22 |
+
print("[STARTUP] Basic imports successful", flush=True)
|
| 23 |
+
|
| 24 |
+
try:
|
| 25 |
+
from src.forecasting.chronos_inference import run_inference
|
| 26 |
+
print("[STARTUP] chronos_inference import successful", flush=True)
|
| 27 |
+
except Exception as e:
|
| 28 |
+
print(f"[ERROR] Failed to import chronos_inference: {e}", flush=True)
|
| 29 |
+
import traceback
|
| 30 |
+
traceback.print_exc()
|
| 31 |
+
run_inference = None
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# Global configuration
|
| 35 |
+
FORECAST_TYPES = {
|
| 36 |
+
"smoke_test": "Smoke Test (1 border × 7 days)",
|
| 37 |
+
"full_14day": "Full Forecast (All borders × 14 days)"
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
print("[STARTUP] Configuration loaded", flush=True)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def forecast_api(run_date_str, forecast_type):
|
| 44 |
+
"""
|
| 45 |
+
API endpoint for triggering forecasts.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
run_date_str: Date in YYYY-MM-DD format
|
| 49 |
+
forecast_type: 'smoke_test' or 'full_14day'
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
Path to downloadable forecast results file
|
| 53 |
+
"""
|
| 54 |
+
try:
|
| 55 |
+
# Validate run date
|
| 56 |
+
run_date = datetime.strptime(run_date_str, "%Y-%m-%d")
|
| 57 |
+
|
| 58 |
+
# Run inference
|
| 59 |
+
result_path = run_inference(
|
| 60 |
+
run_date=run_date_str,
|
| 61 |
+
forecast_type=forecast_type,
|
| 62 |
+
output_dir="/tmp"
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
return result_path
|
| 66 |
+
|
| 67 |
+
except Exception as e:
|
| 68 |
+
error_msg = f"Error: {str(e)}"
|
| 69 |
+
print(error_msg)
|
| 70 |
+
# Return error message as text file
|
| 71 |
+
error_path = "/tmp/error.txt"
|
| 72 |
+
with open(error_path, 'w') as f:
|
| 73 |
+
f.write(error_msg)
|
| 74 |
+
return error_path
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# Build Gradio interface
|
| 78 |
+
with gr.Blocks(title="FBMC Chronos-2 Forecasting") as demo:
|
| 79 |
+
gr.Markdown("""
|
| 80 |
+
# FBMC Chronos-2 Zero-Shot Forecasting API
|
| 81 |
+
|
| 82 |
+
**Flow-Based Market Coupling** electricity flow forecasting using Amazon Chronos-2.
|
| 83 |
+
|
| 84 |
+
This Space provides GPU-accelerated zero-shot inference for cross-border electricity flows.
|
| 85 |
+
""")
|
| 86 |
+
|
| 87 |
+
with gr.Row():
|
| 88 |
+
with gr.Column():
|
| 89 |
+
gr.Markdown("### Configuration")
|
| 90 |
+
|
| 91 |
+
run_date_input = gr.Textbox(
|
| 92 |
+
label="Run Date (YYYY-MM-DD)",
|
| 93 |
+
value="2025-09-30",
|
| 94 |
+
placeholder="2025-09-30",
|
| 95 |
+
info="Date when forecast is made (data up to this date is historical)"
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
forecast_type_input = gr.Radio(
|
| 99 |
+
choices=list(FORECAST_TYPES.keys()),
|
| 100 |
+
value="smoke_test",
|
| 101 |
+
label="Forecast Type",
|
| 102 |
+
info="Smoke test: Quick validation (1 border, 7 days). Full: Production forecast (all borders, 14 days)"
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
submit_btn = gr.Button("Run Forecast", variant="primary")
|
| 106 |
+
|
| 107 |
+
with gr.Column():
|
| 108 |
+
gr.Markdown("### Results")
|
| 109 |
+
|
| 110 |
+
output_file = gr.File(
|
| 111 |
+
label="Download Forecast Results",
|
| 112 |
+
type="filepath"
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
gr.Markdown("""
|
| 116 |
+
**Output format**: Parquet file with columns:
|
| 117 |
+
- `timestamp`: Hourly timestamps (D+1 to D+7 or D+14)
|
| 118 |
+
- `{border}_median`: Median forecast (MW)
|
| 119 |
+
- `{border}_q10`: 10th percentile (MW)
|
| 120 |
+
- `{border}_q90`: 90th percentile (MW)
|
| 121 |
+
|
| 122 |
+
**Inference environment**:
|
| 123 |
+
- GPU: NVIDIA T4 (16GB VRAM)
|
| 124 |
+
- Model: Chronos-T5-Large (710M parameters)
|
| 125 |
+
- Precision: bfloat16
|
| 126 |
+
""")
|
| 127 |
+
|
| 128 |
+
# Wire up the interface
|
| 129 |
+
submit_btn.click(
|
| 130 |
+
fn=forecast_api,
|
| 131 |
+
inputs=[run_date_input, forecast_type_input],
|
| 132 |
+
outputs=output_file
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
gr.Markdown("""
|
| 136 |
+
---
|
| 137 |
+
### About
|
| 138 |
+
|
| 139 |
+
**Zero-shot forecasting**: No model training required. The pre-trained Chronos-2 model
|
| 140 |
+
generalizes directly to FBMC cross-border flows using historical patterns and future covariates.
|
| 141 |
+
|
| 142 |
+
**Features**:
|
| 143 |
+
- 2,553 engineered features (weather, CNEC constraints, load forecasts, LTA)
|
| 144 |
+
- 24-month historical context (Oct 2023 - Oct 2025)
|
| 145 |
+
- Time-aware extraction (prevents data leakage)
|
| 146 |
+
- Probabilistic forecasts (10th/50th/90th percentiles)
|
| 147 |
+
|
| 148 |
+
**Performance**:
|
| 149 |
+
- Smoke test: ~30 seconds (1 border × 168 hours)
|
| 150 |
+
- Full forecast: ~5 minutes (38 borders × 336 hours)
|
| 151 |
+
|
| 152 |
+
**Project**: FBMC Flow Forecasting MVP | **Author**: Evgueni Poloukarov
|
| 153 |
+
""")
|
| 154 |
+
|
| 155 |
+
# Launch the app
|
| 156 |
+
if __name__ == "__main__":
|
| 157 |
+
demo.launch(
|
| 158 |
+
server_name="0.0.0.0",
|
| 159 |
+
server_port=7860,
|
| 160 |
+
share=False
|
| 161 |
+
)
|
notebooks/__marimo__/october_2024_evaluation.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
scripts/compare_hourly_mae.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Compare hourly MAE: baseline vs hour-aware adaptive selection.
|
| 4 |
+
|
| 5 |
+
Loads both forecasts and compares MAE per hour-of-day to measure improvement.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import polars as pl
|
| 9 |
+
import numpy as np
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
|
| 13 |
+
# Paths
|
| 14 |
+
PROJECT_ROOT = Path(__file__).parent.parent
|
| 15 |
+
BASELINE_FORECAST = PROJECT_ROOT / 'results' / 'september_2025_forecast_full_14day.parquet'
|
| 16 |
+
HOUR_AWARE_FORECAST = PROJECT_ROOT / 'results' / 'september_2025_forecast_hour_aware_ACTUAL.parquet'
|
| 17 |
+
BASELINE_SUMMARY = PROJECT_ROOT / 'results' / 'september_2025_hourly_summary.csv'
|
| 18 |
+
OUTPUT_PATH = PROJECT_ROOT / 'results' / 'hourly_mae_comparison.csv'
|
| 19 |
+
|
| 20 |
+
def load_actuals():
|
| 21 |
+
"""Load actuals from HuggingFace dataset."""
|
| 22 |
+
print('[INFO] Loading actuals from HuggingFace dataset...')
|
| 23 |
+
from datasets import load_dataset
|
| 24 |
+
import os
|
| 25 |
+
|
| 26 |
+
dataset = load_dataset('evgueni-p/fbmc-features-24month', split='train', token=os.environ.get('HF_TOKEN'))
|
| 27 |
+
df_actuals_full = pl.from_arrow(dataset.data.table)
|
| 28 |
+
|
| 29 |
+
# Filter to September 2-15, 2025
|
| 30 |
+
forecast_start = datetime(2025, 9, 2)
|
| 31 |
+
forecast_end = datetime(2025, 9, 16)
|
| 32 |
+
|
| 33 |
+
df_actuals = df_actuals_full.filter(
|
| 34 |
+
(pl.col('timestamp') >= forecast_start) &
|
| 35 |
+
(pl.col('timestamp') < forecast_end)
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
print(f'[INFO] Actuals filtered: {df_actuals.shape[0]} hours')
|
| 39 |
+
return df_actuals
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def compute_hourly_mae(df_forecast, df_actuals, label):
|
| 43 |
+
"""Compute MAE per hour-of-day for all borders."""
|
| 44 |
+
print(f'[INFO] Computing hourly MAE for {label}...')
|
| 45 |
+
|
| 46 |
+
# Extract border names
|
| 47 |
+
# For hour-aware, use _adaptive column; for baseline use _median
|
| 48 |
+
if '_adaptive' in df_forecast.columns[0] or any(c.endswith('_adaptive') for c in df_forecast.columns):
|
| 49 |
+
forecast_cols = [col for col in df_forecast.columns if col.endswith('_adaptive')]
|
| 50 |
+
border_names = [col.replace('_adaptive', '') for col in forecast_cols]
|
| 51 |
+
col_suffix = '_adaptive'
|
| 52 |
+
else:
|
| 53 |
+
forecast_cols = [col for col in df_forecast.columns if col.endswith('_median')]
|
| 54 |
+
border_names = [col.replace('_median', '') for col in forecast_cols]
|
| 55 |
+
col_suffix = '_median'
|
| 56 |
+
|
| 57 |
+
print(f'[INFO] Using forecast column suffix: {col_suffix}')
|
| 58 |
+
|
| 59 |
+
hourly_results = []
|
| 60 |
+
|
| 61 |
+
for border in border_names:
|
| 62 |
+
forecast_col = f'{border}{col_suffix}'
|
| 63 |
+
actual_col = f'target_border_{border}'
|
| 64 |
+
|
| 65 |
+
if actual_col not in df_actuals.columns:
|
| 66 |
+
continue
|
| 67 |
+
|
| 68 |
+
# Create unified dataframe
|
| 69 |
+
df_border = df_forecast.select(['timestamp', forecast_col]).join(
|
| 70 |
+
df_actuals.select(['timestamp', actual_col]),
|
| 71 |
+
on='timestamp',
|
| 72 |
+
how='inner'
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# Add hour-of-day
|
| 76 |
+
df_border = df_border.with_columns([
|
| 77 |
+
pl.col('timestamp').dt.hour().alias('hour')
|
| 78 |
+
])
|
| 79 |
+
|
| 80 |
+
# Compute MAE per hour
|
| 81 |
+
for hour in range(24):
|
| 82 |
+
hour_df = df_border.filter(pl.col('hour') == hour)
|
| 83 |
+
|
| 84 |
+
if len(hour_df) == 0:
|
| 85 |
+
continue
|
| 86 |
+
|
| 87 |
+
mae = (hour_df[forecast_col] - hour_df[actual_col]).abs().mean()
|
| 88 |
+
|
| 89 |
+
hourly_results.append({
|
| 90 |
+
'border': border,
|
| 91 |
+
'hour': hour,
|
| 92 |
+
'mae': mae,
|
| 93 |
+
'n_hours': len(hour_df),
|
| 94 |
+
'version': label
|
| 95 |
+
})
|
| 96 |
+
|
| 97 |
+
return pl.DataFrame(hourly_results)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def compare_results(df_baseline_hourly, df_hour_aware_hourly):
|
| 101 |
+
"""Compare baseline vs hour-aware hourly MAE."""
|
| 102 |
+
print('\n' + '='*80)
|
| 103 |
+
print('HOURLY MAE COMPARISON: Baseline vs Hour-Aware Adaptive Selection')
|
| 104 |
+
print('='*80)
|
| 105 |
+
|
| 106 |
+
# Aggregate across borders for each version
|
| 107 |
+
baseline_stats = df_baseline_hourly.group_by('hour').agg([
|
| 108 |
+
pl.col('mae').mean().alias('baseline_mae'),
|
| 109 |
+
pl.col('mae').median().alias('baseline_median_mae'),
|
| 110 |
+
pl.col('border').count().alias('n_borders')
|
| 111 |
+
]).sort('hour')
|
| 112 |
+
|
| 113 |
+
hour_aware_stats = df_hour_aware_hourly.group_by('hour').agg([
|
| 114 |
+
pl.col('mae').mean().alias('hour_aware_mae'),
|
| 115 |
+
pl.col('mae').median().alias('hour_aware_median_mae')
|
| 116 |
+
]).sort('hour')
|
| 117 |
+
|
| 118 |
+
# Join for comparison
|
| 119 |
+
comparison = baseline_stats.join(hour_aware_stats, on='hour', how='inner')
|
| 120 |
+
|
| 121 |
+
# Calculate improvement
|
| 122 |
+
comparison = comparison.with_columns([
|
| 123 |
+
(pl.col('baseline_mae') - pl.col('hour_aware_mae')).alias('mae_reduction'),
|
| 124 |
+
((pl.col('baseline_mae') - pl.col('hour_aware_mae')) / pl.col('baseline_mae') * 100).alias('improvement_pct')
|
| 125 |
+
])
|
| 126 |
+
|
| 127 |
+
print('\n[INFO] Hour-by-Hour Comparison:')
|
| 128 |
+
print(comparison)
|
| 129 |
+
|
| 130 |
+
# Overall statistics
|
| 131 |
+
overall_baseline = df_baseline_hourly['mae'].mean()
|
| 132 |
+
overall_hour_aware = df_hour_aware_hourly['mae'].mean()
|
| 133 |
+
overall_improvement = (overall_baseline - overall_hour_aware) / overall_baseline * 100
|
| 134 |
+
|
| 135 |
+
print(f'\n[INFO] Overall MAE:')
|
| 136 |
+
print(f' Baseline: {overall_baseline:.2f} MW')
|
| 137 |
+
print(f' Hour-Aware: {overall_hour_aware:.2f} MW')
|
| 138 |
+
print(f' Improvement: {overall_improvement:.2f}%')
|
| 139 |
+
|
| 140 |
+
# Problem hours analysis (15-21)
|
| 141 |
+
problem_hours = [15, 16, 17, 18, 19, 20, 21]
|
| 142 |
+
problem_baseline = comparison.filter(pl.col('hour').is_in(problem_hours))['baseline_mae'].mean()
|
| 143 |
+
problem_hour_aware = comparison.filter(pl.col('hour').is_in(problem_hours))['hour_aware_mae'].mean()
|
| 144 |
+
problem_improvement = (problem_baseline - problem_hour_aware) / problem_baseline * 100
|
| 145 |
+
|
| 146 |
+
print(f'\n[INFO] Problem Hours (15-21) MAE:')
|
| 147 |
+
print(f' Baseline: {problem_baseline:.2f} MW')
|
| 148 |
+
print(f' Hour-Aware: {problem_hour_aware:.2f} MW')
|
| 149 |
+
print(f' Improvement: {problem_improvement:.2f}%')
|
| 150 |
+
|
| 151 |
+
# Best/worst hours
|
| 152 |
+
print('\n[INFO] Top 5 Most Improved Hours:')
|
| 153 |
+
best_improvements = comparison.sort('improvement_pct', descending=True).head(5)
|
| 154 |
+
print(best_improvements.select(['hour', 'baseline_mae', 'hour_aware_mae', 'improvement_pct']))
|
| 155 |
+
|
| 156 |
+
print('\n[INFO] Top 5 Least Improved (or Degraded) Hours:')
|
| 157 |
+
worst_improvements = comparison.sort('improvement_pct').head(5)
|
| 158 |
+
print(worst_improvements.select(['hour', 'baseline_mae', 'hour_aware_mae', 'improvement_pct']))
|
| 159 |
+
|
| 160 |
+
# Success criteria check
|
| 161 |
+
print('\n' + '='*80)
|
| 162 |
+
if overall_improvement >= 5.0:
|
| 163 |
+
print(f'[SUCCESS] Hour-aware selection achieved {overall_improvement:.1f}% improvement (target: 5-10%)')
|
| 164 |
+
print('[RECOMMENDATION] Proceed to Phase 4: AutoGluon fine-tuning with sample weighting')
|
| 165 |
+
elif overall_improvement >= 3.0:
|
| 166 |
+
print(f'[PARTIAL SUCCESS] {overall_improvement:.1f}% improvement - marginal gain')
|
| 167 |
+
print('[RECOMMENDATION] Consider proceeding to fine-tuning, may provide larger gains')
|
| 168 |
+
else:
|
| 169 |
+
print(f'[INSUFFICIENT] Only {overall_improvement:.1f}% improvement (target: 5-10%)')
|
| 170 |
+
print('[RECOMMENDATION] Skip to Phase 4: AutoGluon fine-tuning with sample weighting')
|
| 171 |
+
print('='*80)
|
| 172 |
+
|
| 173 |
+
return comparison
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def main():
|
| 177 |
+
"""Main comparison workflow."""
|
| 178 |
+
print('[START] Hourly MAE Comparison Analysis')
|
| 179 |
+
print(f'[INFO] Baseline forecast: {BASELINE_FORECAST}')
|
| 180 |
+
print(f'[INFO] Hour-aware forecast: {HOUR_AWARE_FORECAST}')
|
| 181 |
+
|
| 182 |
+
# Load data
|
| 183 |
+
df_actuals = load_actuals()
|
| 184 |
+
|
| 185 |
+
print(f'\n[INFO] Loading baseline forecast...')
|
| 186 |
+
df_baseline = pl.read_parquet(BASELINE_FORECAST)
|
| 187 |
+
print(f'[INFO] Baseline shape: {df_baseline.shape}')
|
| 188 |
+
|
| 189 |
+
print(f'\n[INFO] Loading hour-aware forecast...')
|
| 190 |
+
df_hour_aware = pl.read_parquet(HOUR_AWARE_FORECAST)
|
| 191 |
+
print(f'[INFO] Hour-aware shape: {df_hour_aware.shape}')
|
| 192 |
+
|
| 193 |
+
# Compute hourly MAE for both
|
| 194 |
+
df_baseline_hourly = compute_hourly_mae(df_baseline, df_actuals, 'baseline')
|
| 195 |
+
df_hour_aware_hourly = compute_hourly_mae(df_hour_aware, df_actuals, 'hour_aware')
|
| 196 |
+
|
| 197 |
+
# Compare results
|
| 198 |
+
comparison = compare_results(df_baseline_hourly, df_hour_aware_hourly)
|
| 199 |
+
|
| 200 |
+
# Save detailed comparison
|
| 201 |
+
comparison.write_csv(OUTPUT_PATH)
|
| 202 |
+
print(f'\n[INFO] Detailed comparison saved to: {OUTPUT_PATH}')
|
| 203 |
+
|
| 204 |
+
print('\n[SUCCESS] Hourly MAE comparison complete!')
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
if __name__ == '__main__':
|
| 208 |
+
main()
|
scripts/test_hf_space_context_expansion.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test HF Space with expanded context window (128h -> 2160h).
|
| 4 |
+
Validates VRAM usage and forecast variation patterns.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
import polars as pl
|
| 11 |
+
import numpy as np
|
| 12 |
+
from gradio_client import Client
|
| 13 |
+
|
| 14 |
+
# Get HF token from environment
|
| 15 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 16 |
+
if not HF_TOKEN:
|
| 17 |
+
print("[ERROR] HF_TOKEN environment variable not set")
|
| 18 |
+
sys.exit(1)
|
| 19 |
+
|
| 20 |
+
def test_hf_space_smoke():
|
| 21 |
+
"""Run smoke test on HF Space and validate results"""
|
| 22 |
+
print("=" * 80)
|
| 23 |
+
print("HF SPACE SMOKE TEST: Context Window Expansion (128h -> 2160h)")
|
| 24 |
+
print("=" * 80)
|
| 25 |
+
|
| 26 |
+
# Initialize client
|
| 27 |
+
print("\nConnecting to HF Space...")
|
| 28 |
+
client = Client("evgueni-p/fbmc-chronos2", hf_token=HF_TOKEN)
|
| 29 |
+
print("[OK] Connected to evgueni-p/fbmc-chronos2")
|
| 30 |
+
|
| 31 |
+
# Test parameters
|
| 32 |
+
run_date = "2024-09-30"
|
| 33 |
+
test_border = "AT_DE"
|
| 34 |
+
forecast_type = "smoke_test" # 7 days, 1 border
|
| 35 |
+
|
| 36 |
+
print(f"\nTest configuration:")
|
| 37 |
+
print(f" Border: {test_border}")
|
| 38 |
+
print(f" Run date: {run_date}")
|
| 39 |
+
print(f" Forecast type: {forecast_type}")
|
| 40 |
+
print(f" Expected context: 2160 hours (90 days)")
|
| 41 |
+
print(f" Expected batch_size: 48")
|
| 42 |
+
|
| 43 |
+
# Run forecast
|
| 44 |
+
print(f"\nRunning forecast via API...")
|
| 45 |
+
try:
|
| 46 |
+
result = client.predict(
|
| 47 |
+
run_date_str=run_date,
|
| 48 |
+
forecast_type=forecast_type,
|
| 49 |
+
api_name="/forecast_api"
|
| 50 |
+
)
|
| 51 |
+
print(f"[OK] Forecast completed")
|
| 52 |
+
print(f" Result file: {result}")
|
| 53 |
+
except Exception as e:
|
| 54 |
+
print(f"[FAIL] API call failed: {e}")
|
| 55 |
+
import traceback
|
| 56 |
+
traceback.print_exc()
|
| 57 |
+
return False
|
| 58 |
+
|
| 59 |
+
# Download and validate forecast
|
| 60 |
+
print(f"\nValidating forecast results...")
|
| 61 |
+
|
| 62 |
+
if not os.path.exists(result):
|
| 63 |
+
print(f"[FAIL] Forecast file not found: {result}")
|
| 64 |
+
return False
|
| 65 |
+
|
| 66 |
+
# Load forecast
|
| 67 |
+
df = pl.read_parquet(result)
|
| 68 |
+
print(f"[OK] Loaded forecast file")
|
| 69 |
+
print(f" Shape: {df.shape}")
|
| 70 |
+
print(f" Columns: {df.columns}")
|
| 71 |
+
|
| 72 |
+
# Expected: 168 hours (7 days), 4 columns (timestamp + median + q10 + q90)
|
| 73 |
+
expected_hours = 168
|
| 74 |
+
if len(df) != expected_hours:
|
| 75 |
+
print(f"[FAIL] Forecast length mismatch:")
|
| 76 |
+
print(f" Expected: {expected_hours} hours")
|
| 77 |
+
print(f" Got: {len(df)} hours")
|
| 78 |
+
return False
|
| 79 |
+
print(f"[OK] Forecast length: {len(df)} hours (correct)")
|
| 80 |
+
|
| 81 |
+
# Extract median forecast for AT_DE
|
| 82 |
+
median_col = f"{test_border}_median"
|
| 83 |
+
if median_col not in df.columns:
|
| 84 |
+
print(f"[FAIL] Column {median_col} not found in forecast")
|
| 85 |
+
return False
|
| 86 |
+
|
| 87 |
+
median_forecast = df[median_col].to_numpy()
|
| 88 |
+
|
| 89 |
+
# Check variation statistics
|
| 90 |
+
mean_val = np.mean(median_forecast)
|
| 91 |
+
std_val = np.std(median_forecast)
|
| 92 |
+
min_val = np.min(median_forecast)
|
| 93 |
+
max_val = np.max(median_forecast)
|
| 94 |
+
range_val = max_val - min_val
|
| 95 |
+
|
| 96 |
+
print(f"\n[CHECK] Forecast statistics:")
|
| 97 |
+
print(f" Mean: {mean_val:.2f} MW")
|
| 98 |
+
print(f" Std Dev: {std_val:.2f} MW")
|
| 99 |
+
print(f" Min: {min_val:.2f} MW")
|
| 100 |
+
print(f" Max: {max_val:.2f} MW")
|
| 101 |
+
print(f" Range: {range_val:.2f} MW")
|
| 102 |
+
|
| 103 |
+
# Validation 1: Check for variation
|
| 104 |
+
if std_val < 1.0:
|
| 105 |
+
print(f"\n[WARNING] Low variation detected (std={std_val:.4f} MW)")
|
| 106 |
+
unique_values = len(np.unique(median_forecast))
|
| 107 |
+
print(f" Unique values in forecast: {unique_values}/{len(median_forecast)}")
|
| 108 |
+
|
| 109 |
+
if unique_values < 5:
|
| 110 |
+
print(f"\n[FAIL] Forecast appears constant (only {unique_values} unique values)")
|
| 111 |
+
print(f" First 24 values: {median_forecast[:24]}")
|
| 112 |
+
return False
|
| 113 |
+
else:
|
| 114 |
+
print(f"\n[OK] Forecast shows variation (std={std_val:.2f} MW)")
|
| 115 |
+
|
| 116 |
+
# Validation 2: Check unique values count
|
| 117 |
+
unique_values = len(np.unique(median_forecast))
|
| 118 |
+
print(f"\n[CHECK] Unique values: {unique_values}/{len(median_forecast)}")
|
| 119 |
+
if unique_values < 50:
|
| 120 |
+
print(f"[WARNING] Low diversity (expected >50 unique values)")
|
| 121 |
+
else:
|
| 122 |
+
print(f"[OK] Good diversity in forecast")
|
| 123 |
+
|
| 124 |
+
# Validation 3: Check data type (should be integers now)
|
| 125 |
+
if median_col in df.columns:
|
| 126 |
+
dtype = df.schema[median_col]
|
| 127 |
+
print(f"\n[CHECK] Data type: {dtype}")
|
| 128 |
+
if "Int" in str(dtype):
|
| 129 |
+
print(f"[OK] MW values converted to integers")
|
| 130 |
+
else:
|
| 131 |
+
print(f"[INFO] MW values still float (expected Int32)")
|
| 132 |
+
|
| 133 |
+
# Display first 48 hours
|
| 134 |
+
print(f"\n[CHECK] First 48 hours of median forecast:")
|
| 135 |
+
for i in range(min(48, len(median_forecast))):
|
| 136 |
+
if i % 12 == 0:
|
| 137 |
+
print(f" Hour {i:3d}-{i+11:3d}: ", end="")
|
| 138 |
+
print(f"{median_forecast[i]:7.0f} ", end="")
|
| 139 |
+
if (i + 1) % 12 == 0:
|
| 140 |
+
print()
|
| 141 |
+
print()
|
| 142 |
+
|
| 143 |
+
# Summary
|
| 144 |
+
print("\n" + "=" * 80)
|
| 145 |
+
print("SMOKE TEST VALIDATION SUMMARY")
|
| 146 |
+
print("=" * 80)
|
| 147 |
+
|
| 148 |
+
checks_passed = []
|
| 149 |
+
checks_failed = []
|
| 150 |
+
|
| 151 |
+
# Check 1: Length
|
| 152 |
+
if len(df) == expected_hours:
|
| 153 |
+
checks_passed.append("Forecast length (168 hours)")
|
| 154 |
+
else:
|
| 155 |
+
checks_failed.append(f"Forecast length ({len(df)} != {expected_hours})")
|
| 156 |
+
|
| 157 |
+
# Check 2: Variation
|
| 158 |
+
if std_val >= 1.0:
|
| 159 |
+
checks_passed.append(f"Variation (std={std_val:.2f} MW)")
|
| 160 |
+
else:
|
| 161 |
+
checks_failed.append(f"Low variation (std={std_val:.4f} MW)")
|
| 162 |
+
|
| 163 |
+
# Check 3: Diversity
|
| 164 |
+
if unique_values >= 50:
|
| 165 |
+
checks_passed.append(f"Diversity ({unique_values} unique values)")
|
| 166 |
+
else:
|
| 167 |
+
checks_failed.append(f"Low diversity ({unique_values} unique values)")
|
| 168 |
+
|
| 169 |
+
print(f"\n[PASSED] {len(checks_passed)} checks:")
|
| 170 |
+
for check in checks_passed:
|
| 171 |
+
print(f" + {check}")
|
| 172 |
+
|
| 173 |
+
if checks_failed:
|
| 174 |
+
print(f"\n[FAILED] {len(checks_failed)} checks:")
|
| 175 |
+
for check in checks_failed:
|
| 176 |
+
print(f" - {check}")
|
| 177 |
+
|
| 178 |
+
# Overall result
|
| 179 |
+
if len(checks_failed) == 0:
|
| 180 |
+
print("\n" + "=" * 80)
|
| 181 |
+
print("[SUCCESS] ALL CHECKS PASSED - Ready for full 38-border evaluation")
|
| 182 |
+
print("=" * 80)
|
| 183 |
+
print("\nNext steps:")
|
| 184 |
+
print("1. Check HF Space logs for VRAM usage (should be ~76% = 36.6 GB / 48 GB)")
|
| 185 |
+
print("2. Run full 38-border evaluation")
|
| 186 |
+
print("3. Compare to Session 12 baseline (15.92 MW D+1 MAE)")
|
| 187 |
+
return True
|
| 188 |
+
else:
|
| 189 |
+
print("\n" + "=" * 80)
|
| 190 |
+
print("[PARTIAL SUCCESS] Some checks failed - investigate before full evaluation")
|
| 191 |
+
print("=" * 80)
|
| 192 |
+
return False
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
if __name__ == "__main__":
|
| 196 |
+
success = test_hf_space_smoke()
|
| 197 |
+
sys.exit(0 if success else 1)
|
src/forecasting/chronos_inference.py
CHANGED
|
@@ -289,123 +289,8 @@ class ChronosInferencePipeline:
|
|
| 289 |
print(f"Total time: {results['metadata']['total_time_s']:.1f}s")
|
| 290 |
print(f"Successful: {results['metadata']['successful_borders']}/{len(forecast_borders)} borders")
|
| 291 |
|
| 292 |
-
# Apply adaptive quantile selection based on learned uncertainty
|
| 293 |
-
print(f"\n[ADAPTIVE SELECTION] Computing adaptive forecasts based on quantile spread...")
|
| 294 |
-
results = self._apply_adaptive_selection(results, run_datetime, prediction_hours)
|
| 295 |
-
print(f"[OK] Adaptive selection complete")
|
| 296 |
-
|
| 297 |
return results
|
| 298 |
|
| 299 |
-
def _apply_adaptive_selection(self, results: Dict, run_datetime: datetime, prediction_hours: int) -> Dict:
|
| 300 |
-
"""
|
| 301 |
-
Apply HOUR-AWARE adaptive quantile selection based on model's LEARNED uncertainty.
|
| 302 |
-
|
| 303 |
-
This method uses quantile spread (q90-q10) as the model's learned volatility signal,
|
| 304 |
-
but applies DIFFERENT thresholds for different hours based on electricity market patterns.
|
| 305 |
-
|
| 306 |
-
Key insight: Ramping hours (7-9, 17-21) naturally have higher volatility, so we need
|
| 307 |
-
higher thresholds to avoid false positives. Night hours should be more conservative.
|
| 308 |
-
|
| 309 |
-
Args:
|
| 310 |
-
results: Forecast results dictionary from run_forecast()
|
| 311 |
-
run_datetime: Forecast run date/time
|
| 312 |
-
prediction_hours: Number of hours in forecast horizon
|
| 313 |
-
|
| 314 |
-
Returns:
|
| 315 |
-
Updated results dictionary with 'adaptive' forecast added to each border
|
| 316 |
-
"""
|
| 317 |
-
# Generate forecast timestamps (start next day at midnight)
|
| 318 |
-
forecast_start = run_datetime + timedelta(days=1)
|
| 319 |
-
forecast_timestamps = [forecast_start + timedelta(hours=h) for h in range(prediction_hours)]
|
| 320 |
-
|
| 321 |
-
# Extract hour-of-day for each timestamp
|
| 322 |
-
hours_of_day = np.array([ts.hour for ts in forecast_timestamps])
|
| 323 |
-
|
| 324 |
-
# Define hour-specific uncertainty thresholds based on electricity market patterns
|
| 325 |
-
# From hourly MAE analysis: worst hours are 19 (578 MW), 15 (564 MW), 20 (550 MW)
|
| 326 |
-
hourly_thresholds = {
|
| 327 |
-
# Morning ramp (5-9): Higher threshold (0.45-0.50) → expect natural volatility
|
| 328 |
-
5: 0.45, 6: 0.45, 7: 0.50, 8: 0.50, 9: 0.45,
|
| 329 |
-
|
| 330 |
-
# Mid-day stable (10-16): Standard threshold (0.30-0.35)
|
| 331 |
-
10: 0.30, 11: 0.30, 12: 0.30, 13: 0.30, 14: 0.30, 15: 0.35, 16: 0.35,
|
| 332 |
-
|
| 333 |
-
# Evening ramp (17-21): Higher threshold (0.45-0.50) → worst observed hours
|
| 334 |
-
17: 0.45, 18: 0.50, 19: 0.50, 20: 0.50, 21: 0.45,
|
| 335 |
-
|
| 336 |
-
# Night stable (22-4): Lower threshold (0.25) → expect precision
|
| 337 |
-
22: 0.25, 23: 0.25, 0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25, 4: 0.30
|
| 338 |
-
}
|
| 339 |
-
|
| 340 |
-
for border, data in results['borders'].items():
|
| 341 |
-
if 'error' in data:
|
| 342 |
-
continue # Skip failed borders
|
| 343 |
-
|
| 344 |
-
# Extract quantiles as numpy arrays for vectorized operations
|
| 345 |
-
q10_array = np.array(data['q10'])
|
| 346 |
-
q90_array = np.array(data['q90'])
|
| 347 |
-
median_array = np.array(data['median'])
|
| 348 |
-
q75_array = np.array(data['q75'])
|
| 349 |
-
q25_array = np.array(data['q25'])
|
| 350 |
-
|
| 351 |
-
# Calculate quantile spread (model's learned uncertainty estimate)
|
| 352 |
-
# This captures WHEN the model predicts volatility based on input features
|
| 353 |
-
spread = q90_array - q10_array
|
| 354 |
-
|
| 355 |
-
# Normalize spread as percentage of median (handles different border capacities)
|
| 356 |
-
# Add +1 to avoid division by zero for near-zero medians
|
| 357 |
-
uncertainty_pct = spread / (np.abs(median_array) + 1.0)
|
| 358 |
-
|
| 359 |
-
# HOUR-AWARE adaptive selection using hour-specific thresholds
|
| 360 |
-
adaptive_forecast = np.zeros_like(median_array, dtype=float)
|
| 361 |
-
|
| 362 |
-
for i, hour in enumerate(hours_of_day):
|
| 363 |
-
# Get threshold for this hour (default to 0.30 if hour not in map)
|
| 364 |
-
threshold_high = hourly_thresholds.get(hour, 0.30)
|
| 365 |
-
threshold_medium = threshold_high * 0.5 # Medium threshold is 50% of high
|
| 366 |
-
|
| 367 |
-
if uncertainty_pct[i] > threshold_high:
|
| 368 |
-
# High uncertainty: use q75
|
| 369 |
-
adaptive_forecast[i] = q75_array[i]
|
| 370 |
-
elif uncertainty_pct[i] >= threshold_medium:
|
| 371 |
-
# Medium uncertainty: interpolate q60 between median and q75
|
| 372 |
-
adaptive_forecast[i] = 0.6 * median_array[i] + 0.4 * q75_array[i]
|
| 373 |
-
else:
|
| 374 |
-
# Low uncertainty: use median
|
| 375 |
-
adaptive_forecast[i] = median_array[i]
|
| 376 |
-
|
| 377 |
-
# Round to integers (capacity values are always whole MW)
|
| 378 |
-
adaptive_forecast = np.round(adaptive_forecast).astype(int)
|
| 379 |
-
|
| 380 |
-
# Store adaptive forecast and uncertainty metadata
|
| 381 |
-
data['adaptive'] = adaptive_forecast.tolist()
|
| 382 |
-
data['uncertainty_pct'] = uncertainty_pct.tolist()
|
| 383 |
-
|
| 384 |
-
# Store selection statistics for analysis (using hour-aware thresholds)
|
| 385 |
-
high_uncertainty_hours = 0
|
| 386 |
-
medium_uncertainty_hours = 0
|
| 387 |
-
low_uncertainty_hours = 0
|
| 388 |
-
|
| 389 |
-
for i, hour in enumerate(hours_of_day):
|
| 390 |
-
threshold_high = hourly_thresholds.get(hour, 0.30)
|
| 391 |
-
threshold_medium = threshold_high * 0.5
|
| 392 |
-
|
| 393 |
-
if uncertainty_pct[i] > threshold_high:
|
| 394 |
-
high_uncertainty_hours += 1
|
| 395 |
-
elif uncertainty_pct[i] >= threshold_medium:
|
| 396 |
-
medium_uncertainty_hours += 1
|
| 397 |
-
else:
|
| 398 |
-
low_uncertainty_hours += 1
|
| 399 |
-
|
| 400 |
-
data['adaptive_stats'] = {
|
| 401 |
-
'high_uncertainty_hours': int(high_uncertainty_hours),
|
| 402 |
-
'medium_uncertainty_hours': int(medium_uncertainty_hours),
|
| 403 |
-
'low_uncertainty_hours': int(low_uncertainty_hours),
|
| 404 |
-
'mean_uncertainty_pct': float(np.mean(uncertainty_pct)),
|
| 405 |
-
'max_uncertainty_pct': float(np.max(uncertainty_pct))
|
| 406 |
-
}
|
| 407 |
-
|
| 408 |
-
return results
|
| 409 |
|
| 410 |
def export_to_parquet(self, results: Dict, output_path: str):
|
| 411 |
"""
|
|
|
|
| 289 |
print(f"Total time: {results['metadata']['total_time_s']:.1f}s")
|
| 290 |
print(f"Successful: {results['metadata']['successful_borders']}/{len(forecast_borders)} borders")
|
| 291 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
return results
|
| 293 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
def export_to_parquet(self, results: Dict, output_path: str):
|
| 296 |
"""
|
temp_analysis.txt
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
=== DATA STRUCTURE ===
|
| 2 |
+
Shape: (17544, 199)
|
| 3 |
+
Directional columns (e.g., CZ>PL): 132
|
| 4 |
+
Border_ columns (e.g., border_CZ_PL): 38
|
| 5 |
+
|
| 6 |
+
=== SAMPLE VALUES ===
|
| 7 |
+
shape: (5, 5)
|
| 8 |
+
┌────────────────────────────────┬────────┬────────┬──────────────┬──────────────┐
|
| 9 |
+
│ mtu ┆ CZ>PL ┆ PL>CZ ┆ border_CZ_PL ┆ border_PL_CZ │
|
| 10 |
+
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
| 11 |
+
│ datetime[ns, Europe/Amsterdam] ┆ f64 ┆ f64 ┆ i64 ┆ i64 │
|
| 12 |
+
╞════════════════════════════════╪════════╪════════╪══════════════╪══════════════╡
|
| 13 |
+
│ 2023-10-01 02:00:00 CEST ┆ 2785.0 ┆ 3883.0 ┆ 0 ┆ 0 │
|
| 14 |
+
│ 2023-10-01 03:00:00 CEST ┆ 2711.0 ┆ 3775.0 ┆ 0 ┆ 0 │
|
| 15 |
+
│ 2023-10-01 04:00:00 CEST ┆ 2831.0 ┆ 3787.0 ┆ 0 ┆ 0 │
|
| 16 |
+
│ 2023-10-01 05:00:00 CEST ┆ 2778.0 ┆ 3361.0 ┆ 0 ┆ 0 │
|
| 17 |
+
│ 2023-10-01 06:00:00 CEST ┆ 2744.0 ┆ 3057.0 ┆ 0 ┆ 0 │
|
| 18 |
+
└────────────────────────────────┴────────┴────────┴──────────────┴──────────────┘
|
| 19 |
+
|
| 20 |
+
=== STATISTICS ===
|
| 21 |
+
shape: (1, 4)
|
| 22 |
+
┌─────────────┬─────────────┬───────────────────┬───────────────────┐
|
| 23 |
+
│ CZ>PL_mean ┆ PL>CZ_mean ┆ border_CZ_PL_mean ┆ border_PL_CZ_mean │
|
| 24 |
+
│ --- ┆ --- ┆ --- ┆ --- │
|
| 25 |
+
│ f64 ┆ f64 ┆ f64 ┆ f64 │
|
| 26 |
+
╞═════════════╪═════════════╪═══════════════════╪═══════════════════╡
|
| 27 |
+
│ 3481.789045 ┆ 2697.566404 ┆ 0.0 ┆ 9.573358 │
|
| 28 |
+
└─────────────┴─────────────┴───────────────────┴───────────────────┘
|
| 29 |
+
|
| 30 |
+
=== ARE THEY THE SAME? ===
|
| 31 |
+
shape: (1, 2)
|
| 32 |
+
┌───────────────────────┬───────────────────────┐
|
| 33 |
+
│ CZ>PL == border_CZ_PL ┆ PL>CZ == border_PL_CZ │
|
| 34 |
+
│ --- ┆ --- │
|
| 35 |
+
│ bool ┆ bool │
|
| 36 |
+
╞═══════════════════════╪═══════════════════════╡
|
| 37 |
+
│ false ┆ false │
|
| 38 |
+
└───────────────────────┴───────────────────────┘
|
| 39 |
+
|
| 40 |
+
=== CHECKING IF BORDER COLUMNS ARE MAX OF BOTH DIRECTIONS ===
|
| 41 |
+
shape: (10, 4)
|
| 42 |
+
┌─────────────────────────────────┬────────┬────────┬──────────────┐
|
| 43 |
+
│ border_CZ_PL == max(CZ>PL, PL>… ┆ CZ>PL ┆ PL>CZ ┆ border_CZ_PL │
|
| 44 |
+
│ --- ┆ --- ┆ --- ┆ --- │
|
| 45 |
+
│ bool ┆ f64 ┆ f64 ┆ i64 │
|
| 46 |
+
╞═════════════════════════════════╪════════╪════════╪══════════════╡
|
| 47 |
+
│ false ┆ 2785.0 ┆ 3883.0 ┆ 0 │
|
| 48 |
+
│ false ┆ 2711.0 ┆ 3775.0 ┆ 0 │
|
| 49 |
+
│ false ┆ 2831.0 ┆ 3787.0 ┆ 0 │
|
| 50 |
+
│ false ┆ 2778.0 ┆ 3361.0 ┆ 0 │
|
| 51 |
+
│ false ┆ 2744.0 ┆ 3057.0 ┆ 0 │
|
| 52 |
+
│ false ┆ 2838.0 ┆ 2574.0 ┆ 0 │
|
| 53 |
+
│ false ┆ 2941.0 ┆ 2660.0 ┆ 0 │
|
| 54 |
+
│ false ┆ 3364.0 ┆ 2545.0 ┆ 0 │
|
| 55 |
+
│ false ┆ 3762.0 ┆ 2438.0 ┆ 0 │
|
| 56 |
+
│ false ┆ 3731.0 ┆ 3120.0 ┆ 0 │
|
| 57 |
+
└─────────────────────────────────┴────────┴────────┴──────────────┘
|
temp_final_summary.txt
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
================================================================================
|
| 2 |
+
JAO DATA STRUCTURE VERIFICATION - FINAL REPORT
|
| 3 |
+
================================================================================
|
| 4 |
+
|
| 5 |
+
QUESTION: What should be the forecast target for "max capacity in a given direction"?
|
| 6 |
+
|
| 7 |
+
================================================================================
|
| 8 |
+
1. JAO DATA TYPES IDENTIFIED
|
| 9 |
+
================================================================================
|
| 10 |
+
|
| 11 |
+
A. DIRECTIONAL FLOW COLUMNS (CZ>PL, PL>CZ format)
|
| 12 |
+
- Total: 132 columns (12 x 11 bidirectional combinations)
|
| 13 |
+
- Source: MaxBEX dataset from JAO
|
| 14 |
+
- Represents: Maximum Bilateral Exchange Capacity (hub-to-hub)
|
| 15 |
+
- Type: Commercial trading capacity (MW)
|
| 16 |
+
- Includes: ALL zone pairs (physical + virtual borders)
|
| 17 |
+
|
| 18 |
+
Example values for CZ<->PL:
|
| 19 |
+
shape: (5, 3)
|
| 20 |
+
┌────────────────────────────────┬────────┬────────┐
|
| 21 |
+
│ mtu ┆ CZ>PL ┆ PL>CZ │
|
| 22 |
+
│ --- ┆ --- ┆ --- │
|
| 23 |
+
│ datetime[ns, Europe/Amsterdam] ┆ f64 ┆ f64 │
|
| 24 |
+
╞════════════════════════════════╪════════╪════════╡
|
| 25 |
+
│ 2023-10-01 02:00:00 CEST ┆ 2785.0 ┆ 3883.0 │
|
| 26 |
+
│ 2023-10-01 03:00:00 CEST ┆ 2711.0 ┆ 3775.0 │
|
| 27 |
+
│ 2023-10-01 04:00:00 CEST ┆ 2831.0 ┆ 3787.0 │
|
| 28 |
+
│ 2023-10-01 05:00:00 CEST ┆ 2778.0 ┆ 3361.0 │
|
| 29 |
+
│ 2023-10-01 06:00:00 CEST ┆ 2744.0 ┆ 3057.0 │
|
| 30 |
+
└────────────────────────────────┴────────┴────────┘
|
| 31 |
+
|
| 32 |
+
Statistics (CZ<->PL):
|
| 33 |
+
shape: (1, 6)
|
| 34 |
+
┌───────────────┬───────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
|
| 35 |
+
│ CZ>PL_mean_MW ┆ PL>CZ_mean_MW ┆ CZ>PL_min_MW ┆ PL>CZ_min_MW ┆ CZ>PL_max_MW ┆ PL>CZ_max_MW │
|
| 36 |
+
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
| 37 |
+
│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
|
| 38 |
+
╞═══════════════╪═══════════════╪══════════════╪══════════════╪══════════════╪══════════════╡
|
| 39 |
+
│ 3481.789045 ┆ 2697.566404 ┆ 144.0 ┆ 0.0 ┆ 5699.0 ┆ 4631.0 │
|
| 40 |
+
└───────────────┴───────────────┴──────────────┴──────────────┴──────────────┴──────────────┘
|
| 41 |
+
|
| 42 |
+
B. BORDER COLUMNS (border_CZ_PL format)
|
| 43 |
+
- Total: 38 columns
|
| 44 |
+
- Source: LTA (Long-Term Allocations) dataset from JAO
|
| 45 |
+
- Represents: Pre-allocated capacity from long-term contracts (MW)
|
| 46 |
+
- Type: Allocated capacity (reduces available MaxBEX)
|
| 47 |
+
- Includes: ONLY physical borders with direct interconnectors
|
| 48 |
+
|
| 49 |
+
Example values for CZ-PL border:
|
| 50 |
+
shape: (5, 3)
|
| 51 |
+
┌────────────────────────────────┬──────────────┬──────────────┐
|
| 52 |
+
│ mtu ┆ border_CZ_PL ┆ border_PL_CZ │
|
| 53 |
+
│ --- ┆ --- ┆ --- │
|
| 54 |
+
│ datetime[ns, Europe/Amsterdam] ┆ i64 ┆ i64 │
|
| 55 |
+
╞════════════════════════════════╪══════════════╪══════════════╡
|
| 56 |
+
│ 2023-10-01 02:00:00 CEST ┆ 0 ┆ 0 │
|
| 57 |
+
│ 2023-10-01 03:00:00 CEST ┆ 0 ┆ 0 │
|
| 58 |
+
│ 2023-10-01 04:00:00 CEST ┆ 0 ┆ 0 │
|
| 59 |
+
│ 2023-10-01 05:00:00 CEST ┆ 0 ┆ 0 │
|
| 60 |
+
│ 2023-10-01 06:00:00 CEST ┆ 0 ┆ 0 │
|
| 61 |
+
└────────────────────────────────┴──────────────┴──────────────┘
|
| 62 |
+
|
| 63 |
+
Statistics (CZ-PL border):
|
| 64 |
+
shape: (1, 4)
|
| 65 |
+
┌──────────────────────┬──────────────────────┬───────────────────────┬───────────────────────┐
|
| 66 |
+
│ border_CZ_PL_mean_MW ┆ border_PL_CZ_mean_MW ┆ border_CZ_PL_total_MW ┆ border_PL_CZ_total_MW │
|
| 67 |
+
│ --- ┆ --- ┆ --- ┆ --- │
|
| 68 |
+
│ f64 ┆ f64 ┆ i64 ┆ i64 │
|
| 69 |
+
╞══════════════════════╪══════════════════════╪═══════════════════════╪═══════════════════════╡
|
| 70 |
+
│ 0.0 ┆ 9.573358 ┆ 0 ┆ 167955 │
|
| 71 |
+
└──────────────────────┴──────────────────────┴───────────────────────┴───────────────────────┘
|
| 72 |
+
|
| 73 |
+
================================================================================
|
| 74 |
+
2. KEY DIFFERENCES
|
| 75 |
+
================================================================================
|
| 76 |
+
|
| 77 |
+
DIRECTIONAL COLUMNS (CZ>PL):
|
| 78 |
+
- MaxBEX = Commercial trading capacity in specific direction
|
| 79 |
+
- CZ>PL != PL>CZ (asymmetric, depends on network constraints)
|
| 80 |
+
- Avg CZ>PL: 3,482 MW vs Avg PL>CZ: 2,698 MW (significant difference!)
|
| 81 |
+
- Calculated by JAO optimization considering ALL network constraints
|
| 82 |
+
- THIS IS THE FORECAST TARGET!
|
| 83 |
+
|
| 84 |
+
BORDER COLUMNS (border_CZ_PL):
|
| 85 |
+
- LTA = Long-term allocated capacity (pre-sold)
|
| 86 |
+
- Only exists for 38 physical borders (not all 132 zone pairs)
|
| 87 |
+
- Much smaller values (avg border_CZ_PL: 0 MW, border_PL_CZ: 9.6 MW)
|
| 88 |
+
- Acts as INPUT/CONSTRAINT to MaxBEX calculation
|
| 89 |
+
- NOT a capacity forecast target
|
| 90 |
+
|
| 91 |
+
================================================================================
|
| 92 |
+
3. RELATIONSHIP BETWEEN MaxBEX AND LTA
|
| 93 |
+
================================================================================
|
| 94 |
+
|
| 95 |
+
From JAO documentation:
|
| 96 |
+
MaxBEX (available capacity) = Optimized capacity - LTA allocations
|
| 97 |
+
|
| 98 |
+
LTA reduces available MaxBEX because capacity is pre-sold in:
|
| 99 |
+
- Yearly auctions
|
| 100 |
+
- Monthly auctions
|
| 101 |
+
- Other long-term contracts
|
| 102 |
+
|
| 103 |
+
================================================================================
|
| 104 |
+
4. VERIFICATION: PHYSICAL vs VIRTUAL BORDERS
|
| 105 |
+
================================================================================
|
| 106 |
+
|
| 107 |
+
Physical borders (with LTA): 38
|
| 108 |
+
Total MaxBEX pairs: 132 (12 x 11)
|
| 109 |
+
Virtual borders: 94 (zone pairs without physical interconnectors)
|
| 110 |
+
|
| 111 |
+
================================================================================
|
| 112 |
+
5. FINAL ANSWER
|
| 113 |
+
================================================================================
|
| 114 |
+
|
| 115 |
+
TARGET FOR FORECASTING "Max Capacity in a Given Direction":
|
| 116 |
+
|
| 117 |
+
USE: Directional columns (CZ>PL, PL>CZ, DE>FR, etc.)
|
| 118 |
+
- These are MaxBEX values = commercial trading capacity
|
| 119 |
+
- Represents actual available capacity in that specific direction
|
| 120 |
+
- Accounts for network constraints, LTA allocations, and physics
|
| 121 |
+
- 132 total targets (all zone-pair combinations)
|
| 122 |
+
|
| 123 |
+
DO NOT USE: border_ columns (border_CZ_PL, border_PL_CZ, etc.)
|
| 124 |
+
- These are LTA values = pre-allocated capacity
|
| 125 |
+
- Should be used as INPUT FEATURES (future covariates)
|
| 126 |
+
- Only 38 physical borders (incomplete coverage)
|
| 127 |
+
- Much smaller values (often near zero)
|
| 128 |
+
|
| 129 |
+
================================================================================
|
| 130 |
+
6. CURRENT IMPLEMENTATION STATUS
|
| 131 |
+
================================================================================
|
| 132 |
+
|
| 133 |
+
[OK] The change from border_* to directional columns was CORRECT!
|
| 134 |
+
|
| 135 |
+
Before: Using border_CZ_PL (LTA allocations) as targets
|
| 136 |
+
- WRONG: Forecasting pre-allocated capacity (not meaningful)
|
| 137 |
+
- Only 38 borders covered
|
| 138 |
+
- Very low values (mostly zeros)
|
| 139 |
+
|
| 140 |
+
After: Using CZ>PL directional columns (MaxBEX) as targets
|
| 141 |
+
- CORRECT: Forecasting commercial trading capacity
|
| 142 |
+
- All 132 zone pairs covered
|
| 143 |
+
- Represents actual "max capacity in given direction"
|
| 144 |
+
- Values match expected capacity ranges (hundreds to thousands of MW)
|
| 145 |
+
|
| 146 |
+
================================================================================
|
| 147 |
+
END OF REPORT
|
| 148 |
+
================================================================================
|
temp_lta_analysis.txt
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
=== LTA DATA STRUCTURE ===
|
| 2 |
+
Shape: (16834, 41)
|
| 3 |
+
Columns: ['mtu', 'border_AT_CZ', 'border_AT_HU', 'border_AT_SI', 'border_BE_DE', 'border_CZ_AT', 'border_CZ_DE', 'border_CZ_PL', 'border_CZ_SK', 'border_DE_BE', 'border_DE_CZ', 'border_DE_PL', 'border_HU_AT', 'border_HU_SI', 'border_HU_SK', 'border_HU_HR', 'border_HU_RO', 'border_HR_HU', 'border_HR_SI', 'border_PL_CZ', 'border_PL_DE', 'border_PL_SK', 'border_RO_HU', 'border_SI_AT', 'border_SI_HR', 'border_SI_HU', 'border_SK_CZ', 'border_SK_HU', 'border_SK_PL', 'border_AT_DE', 'border_BE_NL', 'border_BE_FR', 'border_DE_AT', 'border_DE_FR', 'border_DE_NL', 'border_FR_BE', 'border_FR_DE', 'border_NL_BE', 'border_NL_DE', 'is_masked', 'masking_method']
|
| 4 |
+
|
| 5 |
+
=== LTA SAMPLE DATA ===
|
| 6 |
+
shape: (10, 41)
|
| 7 |
+
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
|
| 8 |
+
│ mtu ┆ border_AT ┆ border_AT ┆ border_AT ┆ … ┆ border_NL ┆ border_NL ┆ is_masked ┆ masking_ │
|
| 9 |
+
│ --- ┆ _CZ ┆ _HU ┆ _SI ┆ ┆ _BE ┆ _DE ┆ --- ┆ method │
|
| 10 |
+
│ datetime[ ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ bool ┆ --- │
|
| 11 |
+
│ ns, Europ ┆ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ ┆ str │
|
| 12 |
+
│ e/Amsterd ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 13 |
+
│ am] ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 14 |
+
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
|
| 15 |
+
│ 2023-10-0 ┆ 350 ┆ 400 ┆ 600 ┆ … ┆ 619 ┆ 1081 ┆ false ┆ null │
|
| 16 |
+
│ 1 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 17 |
+
│ 02:00:00 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 18 |
+
│ CEST ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 19 |
+
│ 2023-10-0 ┆ 350 ┆ 400 ┆ 600 ┆ … ┆ 619 ┆ 1081 ┆ false ┆ null │
|
| 20 |
+
│ 1 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 21 |
+
│ 03:00:00 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 22 |
+
│ CEST ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 23 |
+
│ 2023-10-0 ┆ 350 ┆ 400 ┆ 600 ┆ … ┆ 619 ┆ 1081 ┆ false ┆ null │
|
| 24 |
+
│ 1 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 25 |
+
│ 04:00:00 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 26 |
+
│ CEST ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 27 |
+
│ 2023-10-0 ┆ 350 ┆ 400 ┆ 600 ┆ … ┆ 619 ┆ 1081 ┆ false ┆ null │
|
| 28 |
+
│ 1 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 29 |
+
│ 05:00:00 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 30 |
+
│ CEST ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 31 |
+
│ 2023-10-0 ┆ 350 ┆ 400 ┆ 600 ┆ … ┆ 619 ┆ 1081 ┆ false ┆ null │
|
| 32 |
+
│ 1 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 33 |
+
│ 06:00:00 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 34 |
+
│ CEST ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 35 |
+
│ 2023-10-0 ┆ 350 ┆ 400 ┆ 600 ┆ … ┆ 619 ┆ 1081 ┆ false ┆ null │
|
| 36 |
+
│ 1 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 37 |
+
│ 07:00:00 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 38 |
+
│ CEST ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 39 |
+
│ 2023-10-0 ┆ 350 ┆ 400 ┆ 600 ┆ … ┆ 619 ┆ 1081 ┆ false ┆ null │
|
| 40 |
+
│ 1 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 41 |
+
│ 08:00:00 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 42 |
+
│ CEST ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 43 |
+
│ 2023-10-0 ┆ 350 ┆ 400 ┆ 600 ┆ … ┆ 619 ┆ 1081 ┆ false ┆ null │
|
| 44 |
+
│ 1 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 45 |
+
│ 09:00:00 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 46 |
+
│ CEST ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 47 |
+
│ 2023-10-0 ┆ 350 ┆ 400 ┆ 600 ┆ … ┆ 619 ┆ 1081 ┆ false ┆ null │
|
| 48 |
+
│ 1 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 49 |
+
│ 10:00:00 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 50 |
+
│ CEST ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 51 |
+
│ 2023-10-0 ┆ 350 ┆ 400 ┆ 600 ┆ … ┆ 619 ┆ 1081 ┆ false ┆ null │
|
| 52 |
+
│ 1 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 53 |
+
│ 11:00:00 ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 54 |
+
│ CEST ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │
|
| 55 |
+
└───────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴──────────┘
|
| 56 |
+
|
| 57 |
+
=== COLUMN NAMES IN LTA ===
|
| 58 |
+
Found 38 border_ columns
|
| 59 |
+
['border_AT_CZ', 'border_AT_HU', 'border_AT_SI', 'border_BE_DE', 'border_CZ_AT', 'border_CZ_DE', 'border_CZ_PL', 'border_CZ_SK', 'border_DE_BE', 'border_DE_CZ', 'border_DE_PL', 'border_HU_AT', 'border_HU_SI', 'border_HU_SK', 'border_HU_HR', 'border_HU_RO', 'border_HR_HU', 'border_HR_SI', 'border_PL_CZ', 'border_PL_DE', 'border_PL_SK', 'border_RO_HU', 'border_SI_AT', 'border_SI_HR', 'border_SI_HU', 'border_SK_CZ', 'border_SK_HU', 'border_SK_PL', 'border_AT_DE', 'border_BE_NL', 'border_BE_FR', 'border_DE_AT', 'border_DE_FR', 'border_DE_NL', 'border_FR_BE', 'border_FR_DE', 'border_NL_BE', 'border_NL_DE']
|
temp_raw_analysis.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
=== RAW JAO MAXBEX DATA ===
|
| 2 |
+
Shape: (18696, 132)
|
| 3 |
+
Columns: ['AT>BE', 'AT>CZ', 'AT>DE', 'AT>FR', 'AT>HR', 'AT>HU', 'AT>NL', 'AT>PL', 'AT>RO', 'AT>SI', 'AT>SK', 'BE>AT', 'BE>CZ', 'BE>DE', 'BE>FR', 'BE>HR', 'BE>HU', 'BE>NL', 'BE>PL', 'BE>RO', 'BE>SI', 'BE>SK', 'CZ>AT', 'CZ>BE', 'CZ>DE', 'CZ>FR', 'CZ>HR', 'CZ>HU', 'CZ>NL', 'CZ>PL', 'CZ>RO', 'CZ>SI', 'CZ>SK', 'DE>AT', 'DE>BE', 'DE>CZ', 'DE>FR', 'DE>HR', 'DE>HU', 'DE>NL', 'DE>PL', 'DE>RO', 'DE>SI', 'DE>SK', 'FR>AT', 'FR>BE', 'FR>CZ', 'FR>DE', 'FR>HR', 'FR>HU', 'FR>NL', 'FR>PL', 'FR>RO', 'FR>SI', 'FR>SK', 'HR>AT', 'HR>BE', 'HR>CZ', 'HR>DE', 'HR>FR', 'HR>HU', 'HR>NL', 'HR>PL', 'HR>RO', 'HR>SI', 'HR>SK', 'HU>AT', 'HU>BE', 'HU>CZ', 'HU>DE', 'HU>FR', 'HU>HR', 'HU>NL', 'HU>PL', 'HU>RO', 'HU>SI', 'HU>SK', 'NL>AT', 'NL>BE', 'NL>CZ', 'NL>DE', 'NL>FR', 'NL>HR', 'NL>HU', 'NL>PL', 'NL>RO', 'NL>SI', 'NL>SK', 'PL>AT', 'PL>BE', 'PL>CZ', 'PL>DE', 'PL>FR', 'PL>HR', 'PL>HU', 'PL>NL', 'PL>RO', 'PL>SI', 'PL>SK', 'RO>AT', 'RO>BE', 'RO>CZ', 'RO>DE', 'RO>FR', 'RO>HR', 'RO>HU', 'RO>NL', 'RO>PL', 'RO>SI', 'RO>SK', 'SI>AT', 'SI>BE', 'SI>CZ', 'SI>DE', 'SI>FR', 'SI>HR', 'SI>HU', 'SI>NL', 'SI>PL', 'SI>RO', 'SI>SK', 'SK>AT', 'SK>BE', 'SK>CZ', 'SK>DE', 'SK>FR', 'SK>HR', 'SK>HU', 'SK>NL', 'SK>PL', 'SK>RO', 'SK>SI']
|
| 4 |
+
|
| 5 |
+
=== SAMPLE RAW DATA ===
|