Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Gradio Interface for Dynamic Forecast System | |
| Interactive interface for time-aware forecasting with run date selection. | |
| """ | |
| import os | |
| import gradio as gr | |
| import polars as pl | |
| import pandas as pd | |
| from datetime import datetime, timedelta | |
| from datasets import load_dataset | |
| from src.forecasting.dynamic_forecast import DynamicForecast | |
| from src.forecasting.feature_availability import FeatureAvailability | |
| # Global variables for caching | |
| dataset = None | |
| forecaster = None | |
| borders = None | |
| def load_data(): | |
| """Load dataset once at startup.""" | |
| global dataset, forecaster, borders | |
| print("[*] Loading dataset from HuggingFace...") | |
| # Load HF token from environment variable | |
| hf_token = os.getenv("HF_TOKEN") | |
| if not hf_token: | |
| raise ValueError( | |
| "HF_TOKEN not found in environment variables. " | |
| "Please set HF_TOKEN in your environment or .env file." | |
| ) | |
| ds = load_dataset( | |
| "evgueni-p/fbmc-features-24month", | |
| split="train", | |
| token=hf_token | |
| ) | |
| dataset = pl.from_pandas(ds.to_pandas()) | |
| # Ensure timestamp is datetime | |
| if dataset['timestamp'].dtype == pl.String: | |
| dataset = dataset.with_columns(pl.col('timestamp').str.to_datetime()) | |
| elif dataset['timestamp'].dtype != pl.Datetime: | |
| dataset = dataset.with_columns(pl.col('timestamp').cast(pl.Datetime)) | |
| # Initialize forecaster | |
| forecaster = DynamicForecast( | |
| dataset=dataset, | |
| context_hours=512, | |
| forecast_hours=336 # Fixed at 14 days | |
| ) | |
| # Extract borders | |
| target_cols = [col for col in dataset.columns if col.startswith('target_border_')] | |
| borders = [col.replace('target_border_', '') for col in target_cols] | |
| print(f"[OK] Loaded {len(dataset)} rows, {len(dataset.columns)} columns") | |
| print(f"[OK] Found {len(borders)} borders") | |
| print(f"[OK] Date range: {dataset['timestamp'].min()} to {dataset['timestamp'].max()}") | |
| return True | |
| def get_dataset_info(): | |
| """Get dataset information for display.""" | |
| if dataset is None: | |
| return "Dataset not loaded" | |
| date_min = str(dataset['timestamp'].min()) | |
| date_max = str(dataset['timestamp'].max()) | |
| info = f""" | |
| **Dataset Information** | |
| - Total rows: {len(dataset):,} | |
| - Total columns: {len(dataset.columns)} | |
| - Date range: {date_min} to {date_max} | |
| - Borders available: {len(borders)} | |
| """ | |
| return info | |
| def get_feature_summary(): | |
| """Get feature categorization summary.""" | |
| if forecaster is None: | |
| return "Forecaster not initialized" | |
| summary = forecaster.get_feature_summary() | |
| text = f""" | |
| **Feature Categorization** | |
| - Full-horizon D+14: {summary['full_horizon_d14']} features | |
| (temporal, weather, CNEC outages, LTA) | |
| - Partial D+1: {summary['partial_d1']} features | |
| (load forecasts, masked D+2-D+14) | |
| - Historical only: {summary['historical']} features | |
| (prices, generation, demand, lags, etc.) | |
| - **Total: {summary['total']} features** | |
| """ | |
| return text | |
| def validate_run_date(run_date_str): | |
| """Validate run date is within dataset bounds.""" | |
| if not run_date_str: | |
| return False, "Please select a run date" | |
| try: | |
| run_date = datetime.strptime(run_date_str, "%Y-%m-%d %H:%M:%S") | |
| except: | |
| return False, "Invalid date format (use YYYY-MM-DD HH:MM:SS)" | |
| dataset_min = dataset['timestamp'].min() | |
| dataset_max = dataset['timestamp'].max() | |
| # Run date must have 512 hours of context before it | |
| min_valid = dataset_min + timedelta(hours=512) | |
| # Run date must have 336 hours of future data after it | |
| max_valid = dataset_max - timedelta(hours=336) | |
| if run_date < min_valid: | |
| return False, f"Run date too early (need 512h context). Minimum: {min_valid}" | |
| if run_date > max_valid: | |
| return False, f"Run date too late (need 336h future data). Maximum: {max_valid}" | |
| return True, "Run date valid" | |
| def prepare_forecast(run_date_str, border): | |
| """Prepare forecast data for selected run date and border.""" | |
| if dataset is None or forecaster is None: | |
| return "Error: Dataset not loaded", "", "" | |
| # Validate inputs | |
| if not border: | |
| return "Error: Please select a border", "", "" | |
| is_valid, msg = validate_run_date(run_date_str) | |
| if not is_valid: | |
| return f"Error: {msg}", "", "" | |
| try: | |
| run_date = datetime.strptime(run_date_str, "%Y-%m-%d %H:%M:%S") | |
| # Prepare data | |
| context_data, future_data = forecaster.prepare_forecast_data(run_date, border) | |
| # Validate no leakage | |
| is_valid, errors = forecaster.validate_no_leakage( | |
| context_data, future_data, run_date | |
| ) | |
| if not is_valid: | |
| error_msg = "Data leakage detected:\n" + "\n".join(f"- {e}" for e in errors) | |
| return error_msg, "", "" | |
| # Build result summary | |
| forecast_start = run_date + timedelta(hours=1) | |
| forecast_end = forecast_start + timedelta(hours=335) | |
| result = f""" | |
| **Forecast Configuration** | |
| - Border: {border} | |
| - Run date: {run_date} | |
| - Forecast horizon: D+1 to D+14 (336 hours, FIXED) | |
| - Forecast period: {forecast_start} to {forecast_end} | |
| **Data Preparation Summary** | |
| - Context shape: {context_data.shape} (historical data) | |
| - Future shape: {future_data.shape} (future covariates) | |
| - Context dates: {context_data['timestamp'].min()} to {context_data['timestamp'].max()} | |
| - Future dates: {future_data['timestamp'].min()} to {future_data['timestamp'].max()} | |
| - Leakage validation: PASSED | |
| **Feature Availability** | |
| - Full-horizon D+14: Available for all 336 hours | |
| - Partial D+1 (load forecasts): Available for first 24 hours, masked 25-336 | |
| - Historical features: Not used for forecasting (context only) | |
| **Next Steps** | |
| 1. Data has been prepared with time-aware extraction | |
| 2. Load forecast masking applied (D+1 only) | |
| 3. LTA forward-filling applied (constant across horizon) | |
| 4. Ready for Chronos-2 inference (requires GPU) | |
| **Note**: This is a dry-run demonstration. Actual inference requires GPU with Chronos-2 model. | |
| """ | |
| # Create context preview | |
| context_preview = context_data.head(10).to_string() | |
| # Create future preview | |
| future_preview = future_data.head(10).to_string() | |
| return result, context_preview, future_preview | |
| except Exception as e: | |
| return f"Error: {str(e)}", "", "" | |
| def create_interface(): | |
| """Create Gradio interface.""" | |
| # Load data at startup | |
| load_data() | |
| with gr.Blocks(title="FBMC Dynamic Forecast System") as app: | |
| gr.Markdown("# FBMC Dynamic Forecast System") | |
| gr.Markdown(""" | |
| **Time-Aware Forecasting with Run Date Selection** | |
| This interface demonstrates the dynamic forecast pipeline that prevents data leakage | |
| by using only data available at the selected run date. | |
| **Key Features**: | |
| - Dynamic run date selection (prevents data leakage) | |
| - Fixed 14-day forecast horizon (D+1 to D+14, always 336 hours) | |
| - Time-aware feature categorization (603 full + 12 partial + 1,899 historical) | |
| - Availability masking for partial features (load forecasts D+1 only) | |
| - Built-in leakage validation | |
| """) | |
| with gr.Tab("Forecast Configuration"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Dataset Information") | |
| dataset_info = gr.Textbox( | |
| label="Dataset Info", | |
| value=get_dataset_info(), | |
| lines=8, | |
| interactive=False | |
| ) | |
| feature_summary = gr.Textbox( | |
| label="Feature Summary", | |
| value=get_feature_summary(), | |
| lines=10, | |
| interactive=False | |
| ) | |
| with gr.Column(): | |
| gr.Markdown("### Forecast Configuration") | |
| run_date_input = gr.Textbox( | |
| label="Run Date (YYYY-MM-DD HH:MM:SS)", | |
| placeholder="2025-08-15 23:00:00", | |
| value="2025-08-15 23:00:00" | |
| ) | |
| border_dropdown = gr.Dropdown( | |
| label="Border", | |
| choices=borders if borders else [], | |
| value=borders[0] if borders else None | |
| ) | |
| gr.Markdown(""" | |
| **Forecast Horizon**: Fixed at 14 days (D+1 to D+14, 336 hours) | |
| **Validation Rules**: | |
| - Run date must have 512 hours of historical context | |
| - Run date must have 336 hours of future data (for this demo) | |
| - Valid range: ~22 days from dataset start to ~14 days before dataset end | |
| """) | |
| prepare_btn = gr.Button("Prepare Forecast Data", variant="primary") | |
| with gr.Row(): | |
| result_output = gr.Textbox( | |
| label="Forecast Preparation Result", | |
| lines=25, | |
| interactive=False | |
| ) | |
| with gr.Tab("Data Preview"): | |
| with gr.Row(): | |
| context_preview = gr.Textbox( | |
| label="Context Data (first 10 rows)", | |
| lines=20, | |
| interactive=False | |
| ) | |
| future_preview = gr.Textbox( | |
| label="Future Covariates (first 10 rows)", | |
| lines=20, | |
| interactive=False | |
| ) | |
| with gr.Tab("About"): | |
| gr.Markdown(""" | |
| ## About This System | |
| ### Purpose | |
| Prevent data leakage in FBMC cross-border flow forecasting by implementing | |
| time-aware data extraction that respects feature availability windows. | |
| ### Architecture | |
| 1. **Feature Categorization**: All 2,514 features categorized by availability | |
| - Full-horizon D+14: 603 features (temporal, weather, outages, LTA) | |
| - Partial D+1: 12 features (load forecasts, masked D+2-D+14) | |
| - Historical: 1,899 features (prices, generation, demand, lags) | |
| 2. **Time-Aware Extraction**: DynamicForecast class | |
| - Extracts context data (all data before run_date) | |
| - Extracts future covariates (D+1 to D+14 only) | |
| - Applies availability masking for partial features | |
| 3. **Leakage Validation**: Built-in checks | |
| - Context timestamps < run_date | |
| - Future timestamps >= run_date + 1 hour | |
| - No overlap between context and future | |
| - Only future covariates in future data | |
| ### Forecast Horizon | |
| - **FIXED at 14 days** (D+1 to D+14, 336 hours) | |
| - No horizon selector needed (always forecasts full 14 days) | |
| - D+1 starts 1 hour after run_date (ET convention) | |
| ### Feature Availability | |
| - **Load Forecasts**: Published day-ahead, available D+1 only | |
| - **Weather**: Forecasts available for full D+14 horizon | |
| - **CNEC Outages**: Planned maintenance published weeks ahead | |
| - **LTA**: Long-term allocations, forward-filled from D+0 | |
| - **Historical**: Prices, generation, demand (context only) | |
| ### Time Conventions | |
| - **Electricity Time (ET)**: Hour 1 = 00:00-01:00, Hour 24 = 23:00-00:00 | |
| - **D+1**: Next day, hours 1-24 (24 hours starting at 00:00) | |
| - **D+14**: 14 days ahead (336 hours total) | |
| ### Model | |
| - **Chronos 2 Large** (710M params, zero-shot inference) | |
| - Supports partial availability via NaN masking | |
| - Multivariate time series forecasting | |
| ### Files | |
| - `src/forecasting/feature_availability.py`: Feature categorization | |
| - `src/forecasting/dynamic_forecast.py`: Time-aware data extraction | |
| - `smoke_test.py`, `full_inference.py`: Updated inference scripts | |
| - `tests/test_feature_availability.py`: Unit tests (27 tests, all passing) | |
| ### Authors | |
| Evgueni Poloukarov, 2025-11-13 | |
| """) | |
| # Wire up the button | |
| prepare_btn.click( | |
| fn=prepare_forecast, | |
| inputs=[run_date_input, border_dropdown], | |
| outputs=[result_output, context_preview, future_preview] | |
| ) | |
| return app | |
| if __name__ == "__main__": | |
| app = create_interface() | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) | |