fbmc-chronos2 / gradio_app.py
Evgueni Poloukarov
fix: replace hardcoded HF token with environment variable for security
f2524dc
#!/usr/bin/env python3
"""
Gradio Interface for Dynamic Forecast System
Interactive interface for time-aware forecasting with run date selection.
"""
import os
import gradio as gr
import polars as pl
import pandas as pd
from datetime import datetime, timedelta
from datasets import load_dataset
from src.forecasting.dynamic_forecast import DynamicForecast
from src.forecasting.feature_availability import FeatureAvailability
# Global variables for caching
dataset = None
forecaster = None
borders = None
def load_data():
"""Load dataset once at startup."""
global dataset, forecaster, borders
print("[*] Loading dataset from HuggingFace...")
# Load HF token from environment variable
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
raise ValueError(
"HF_TOKEN not found in environment variables. "
"Please set HF_TOKEN in your environment or .env file."
)
ds = load_dataset(
"evgueni-p/fbmc-features-24month",
split="train",
token=hf_token
)
dataset = pl.from_pandas(ds.to_pandas())
# Ensure timestamp is datetime
if dataset['timestamp'].dtype == pl.String:
dataset = dataset.with_columns(pl.col('timestamp').str.to_datetime())
elif dataset['timestamp'].dtype != pl.Datetime:
dataset = dataset.with_columns(pl.col('timestamp').cast(pl.Datetime))
# Initialize forecaster
forecaster = DynamicForecast(
dataset=dataset,
context_hours=512,
forecast_hours=336 # Fixed at 14 days
)
# Extract borders
target_cols = [col for col in dataset.columns if col.startswith('target_border_')]
borders = [col.replace('target_border_', '') for col in target_cols]
print(f"[OK] Loaded {len(dataset)} rows, {len(dataset.columns)} columns")
print(f"[OK] Found {len(borders)} borders")
print(f"[OK] Date range: {dataset['timestamp'].min()} to {dataset['timestamp'].max()}")
return True
def get_dataset_info():
"""Get dataset information for display."""
if dataset is None:
return "Dataset not loaded"
date_min = str(dataset['timestamp'].min())
date_max = str(dataset['timestamp'].max())
info = f"""
**Dataset Information**
- Total rows: {len(dataset):,}
- Total columns: {len(dataset.columns)}
- Date range: {date_min} to {date_max}
- Borders available: {len(borders)}
"""
return info
def get_feature_summary():
"""Get feature categorization summary."""
if forecaster is None:
return "Forecaster not initialized"
summary = forecaster.get_feature_summary()
text = f"""
**Feature Categorization**
- Full-horizon D+14: {summary['full_horizon_d14']} features
(temporal, weather, CNEC outages, LTA)
- Partial D+1: {summary['partial_d1']} features
(load forecasts, masked D+2-D+14)
- Historical only: {summary['historical']} features
(prices, generation, demand, lags, etc.)
- **Total: {summary['total']} features**
"""
return text
def validate_run_date(run_date_str):
"""Validate run date is within dataset bounds."""
if not run_date_str:
return False, "Please select a run date"
try:
run_date = datetime.strptime(run_date_str, "%Y-%m-%d %H:%M:%S")
except:
return False, "Invalid date format (use YYYY-MM-DD HH:MM:SS)"
dataset_min = dataset['timestamp'].min()
dataset_max = dataset['timestamp'].max()
# Run date must have 512 hours of context before it
min_valid = dataset_min + timedelta(hours=512)
# Run date must have 336 hours of future data after it
max_valid = dataset_max - timedelta(hours=336)
if run_date < min_valid:
return False, f"Run date too early (need 512h context). Minimum: {min_valid}"
if run_date > max_valid:
return False, f"Run date too late (need 336h future data). Maximum: {max_valid}"
return True, "Run date valid"
def prepare_forecast(run_date_str, border):
"""Prepare forecast data for selected run date and border."""
if dataset is None or forecaster is None:
return "Error: Dataset not loaded", "", ""
# Validate inputs
if not border:
return "Error: Please select a border", "", ""
is_valid, msg = validate_run_date(run_date_str)
if not is_valid:
return f"Error: {msg}", "", ""
try:
run_date = datetime.strptime(run_date_str, "%Y-%m-%d %H:%M:%S")
# Prepare data
context_data, future_data = forecaster.prepare_forecast_data(run_date, border)
# Validate no leakage
is_valid, errors = forecaster.validate_no_leakage(
context_data, future_data, run_date
)
if not is_valid:
error_msg = "Data leakage detected:\n" + "\n".join(f"- {e}" for e in errors)
return error_msg, "", ""
# Build result summary
forecast_start = run_date + timedelta(hours=1)
forecast_end = forecast_start + timedelta(hours=335)
result = f"""
**Forecast Configuration**
- Border: {border}
- Run date: {run_date}
- Forecast horizon: D+1 to D+14 (336 hours, FIXED)
- Forecast period: {forecast_start} to {forecast_end}
**Data Preparation Summary**
- Context shape: {context_data.shape} (historical data)
- Future shape: {future_data.shape} (future covariates)
- Context dates: {context_data['timestamp'].min()} to {context_data['timestamp'].max()}
- Future dates: {future_data['timestamp'].min()} to {future_data['timestamp'].max()}
- Leakage validation: PASSED
**Feature Availability**
- Full-horizon D+14: Available for all 336 hours
- Partial D+1 (load forecasts): Available for first 24 hours, masked 25-336
- Historical features: Not used for forecasting (context only)
**Next Steps**
1. Data has been prepared with time-aware extraction
2. Load forecast masking applied (D+1 only)
3. LTA forward-filling applied (constant across horizon)
4. Ready for Chronos-2 inference (requires GPU)
**Note**: This is a dry-run demonstration. Actual inference requires GPU with Chronos-2 model.
"""
# Create context preview
context_preview = context_data.head(10).to_string()
# Create future preview
future_preview = future_data.head(10).to_string()
return result, context_preview, future_preview
except Exception as e:
return f"Error: {str(e)}", "", ""
def create_interface():
"""Create Gradio interface."""
# Load data at startup
load_data()
with gr.Blocks(title="FBMC Dynamic Forecast System") as app:
gr.Markdown("# FBMC Dynamic Forecast System")
gr.Markdown("""
**Time-Aware Forecasting with Run Date Selection**
This interface demonstrates the dynamic forecast pipeline that prevents data leakage
by using only data available at the selected run date.
**Key Features**:
- Dynamic run date selection (prevents data leakage)
- Fixed 14-day forecast horizon (D+1 to D+14, always 336 hours)
- Time-aware feature categorization (603 full + 12 partial + 1,899 historical)
- Availability masking for partial features (load forecasts D+1 only)
- Built-in leakage validation
""")
with gr.Tab("Forecast Configuration"):
with gr.Row():
with gr.Column():
gr.Markdown("### Dataset Information")
dataset_info = gr.Textbox(
label="Dataset Info",
value=get_dataset_info(),
lines=8,
interactive=False
)
feature_summary = gr.Textbox(
label="Feature Summary",
value=get_feature_summary(),
lines=10,
interactive=False
)
with gr.Column():
gr.Markdown("### Forecast Configuration")
run_date_input = gr.Textbox(
label="Run Date (YYYY-MM-DD HH:MM:SS)",
placeholder="2025-08-15 23:00:00",
value="2025-08-15 23:00:00"
)
border_dropdown = gr.Dropdown(
label="Border",
choices=borders if borders else [],
value=borders[0] if borders else None
)
gr.Markdown("""
**Forecast Horizon**: Fixed at 14 days (D+1 to D+14, 336 hours)
**Validation Rules**:
- Run date must have 512 hours of historical context
- Run date must have 336 hours of future data (for this demo)
- Valid range: ~22 days from dataset start to ~14 days before dataset end
""")
prepare_btn = gr.Button("Prepare Forecast Data", variant="primary")
with gr.Row():
result_output = gr.Textbox(
label="Forecast Preparation Result",
lines=25,
interactive=False
)
with gr.Tab("Data Preview"):
with gr.Row():
context_preview = gr.Textbox(
label="Context Data (first 10 rows)",
lines=20,
interactive=False
)
future_preview = gr.Textbox(
label="Future Covariates (first 10 rows)",
lines=20,
interactive=False
)
with gr.Tab("About"):
gr.Markdown("""
## About This System
### Purpose
Prevent data leakage in FBMC cross-border flow forecasting by implementing
time-aware data extraction that respects feature availability windows.
### Architecture
1. **Feature Categorization**: All 2,514 features categorized by availability
- Full-horizon D+14: 603 features (temporal, weather, outages, LTA)
- Partial D+1: 12 features (load forecasts, masked D+2-D+14)
- Historical: 1,899 features (prices, generation, demand, lags)
2. **Time-Aware Extraction**: DynamicForecast class
- Extracts context data (all data before run_date)
- Extracts future covariates (D+1 to D+14 only)
- Applies availability masking for partial features
3. **Leakage Validation**: Built-in checks
- Context timestamps < run_date
- Future timestamps >= run_date + 1 hour
- No overlap between context and future
- Only future covariates in future data
### Forecast Horizon
- **FIXED at 14 days** (D+1 to D+14, 336 hours)
- No horizon selector needed (always forecasts full 14 days)
- D+1 starts 1 hour after run_date (ET convention)
### Feature Availability
- **Load Forecasts**: Published day-ahead, available D+1 only
- **Weather**: Forecasts available for full D+14 horizon
- **CNEC Outages**: Planned maintenance published weeks ahead
- **LTA**: Long-term allocations, forward-filled from D+0
- **Historical**: Prices, generation, demand (context only)
### Time Conventions
- **Electricity Time (ET)**: Hour 1 = 00:00-01:00, Hour 24 = 23:00-00:00
- **D+1**: Next day, hours 1-24 (24 hours starting at 00:00)
- **D+14**: 14 days ahead (336 hours total)
### Model
- **Chronos 2 Large** (710M params, zero-shot inference)
- Supports partial availability via NaN masking
- Multivariate time series forecasting
### Files
- `src/forecasting/feature_availability.py`: Feature categorization
- `src/forecasting/dynamic_forecast.py`: Time-aware data extraction
- `smoke_test.py`, `full_inference.py`: Updated inference scripts
- `tests/test_feature_availability.py`: Unit tests (27 tests, all passing)
### Authors
Evgueni Poloukarov, 2025-11-13
""")
# Wire up the button
prepare_btn.click(
fn=prepare_forecast,
inputs=[run_date_input, border_dropdown],
outputs=[result_output, context_preview, future_preview]
)
return app
if __name__ == "__main__":
app = create_interface()
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)