Spaces:

evgueni-p
/

fbmc-chronos2

Sleeping

fbmc-chronos2 / gradio_app.py

Evgueni Poloukarov

fix: replace hardcoded HF token with environment variable for security

f2524dc 29 days ago

12.9 kB

	#!/usr/bin/env python3
	"""
	Gradio Interface for Dynamic Forecast System
	Interactive interface for time-aware forecasting with run date selection.
	"""

	import os
	import gradio as gr
	import polars as pl
	import pandas as pd
	from datetime import datetime, timedelta
	from datasets import load_dataset
	from src.forecasting.dynamic_forecast import DynamicForecast
	from src.forecasting.feature_availability import FeatureAvailability

	# Global variables for caching
	dataset = None
	forecaster = None
	borders = None

	def load_data():
	"""Load dataset once at startup."""
	global dataset, forecaster, borders

	print("[*] Loading dataset from HuggingFace...")

	# Load HF token from environment variable
	hf_token = os.getenv("HF_TOKEN")
	if not hf_token:
	raise ValueError(
	"HF_TOKEN not found in environment variables. "
	"Please set HF_TOKEN in your environment or .env file."
	)

	ds = load_dataset(
	"evgueni-p/fbmc-features-24month",
	split="train",
	token=hf_token
	)
	dataset = pl.from_pandas(ds.to_pandas())

	# Ensure timestamp is datetime
	if dataset['timestamp'].dtype == pl.String:
	dataset = dataset.with_columns(pl.col('timestamp').str.to_datetime())
	elif dataset['timestamp'].dtype != pl.Datetime:
	dataset = dataset.with_columns(pl.col('timestamp').cast(pl.Datetime))

	# Initialize forecaster
	forecaster = DynamicForecast(
	dataset=dataset,
	context_hours=512,
	forecast_hours=336 # Fixed at 14 days
	)

	# Extract borders
	target_cols = [col for col in dataset.columns if col.startswith('target_border_')]
	borders = [col.replace('target_border_', '') for col in target_cols]

	print(f"[OK] Loaded {len(dataset)} rows, {len(dataset.columns)} columns")
	print(f"[OK] Found {len(borders)} borders")
	print(f"[OK] Date range: {dataset['timestamp'].min()} to {dataset['timestamp'].max()}")

	return True


	def get_dataset_info():
	"""Get dataset information for display."""
	if dataset is None:
	return "Dataset not loaded"

	date_min = str(dataset['timestamp'].min())
	date_max = str(dataset['timestamp'].max())

	info = f"""
	Dataset Information
	- Total rows: {len(dataset):,}
	- Total columns: {len(dataset.columns)}
	- Date range: {date_min} to {date_max}
	- Borders available: {len(borders)}
	"""
	return info


	def get_feature_summary():
	"""Get feature categorization summary."""
	if forecaster is None:
	return "Forecaster not initialized"

	summary = forecaster.get_feature_summary()

	text = f"""
	Feature Categorization
	- Full-horizon D+14: {summary['full_horizon_d14']} features
	(temporal, weather, CNEC outages, LTA)
	- Partial D+1: {summary['partial_d1']} features
	(load forecasts, masked D+2-D+14)
	- Historical only: {summary['historical']} features
	(prices, generation, demand, lags, etc.)
	- Total: {summary['total']} features
	"""
	return text


	def validate_run_date(run_date_str):
	"""Validate run date is within dataset bounds."""
	if not run_date_str:
	return False, "Please select a run date"

	try:
	run_date = datetime.strptime(run_date_str, "%Y-%m-%d %H:%M:%S")
	except:
	return False, "Invalid date format (use YYYY-MM-DD HH:MM:SS)"

	dataset_min = dataset['timestamp'].min()
	dataset_max = dataset['timestamp'].max()

	# Run date must have 512 hours of context before it
	min_valid = dataset_min + timedelta(hours=512)
	# Run date must have 336 hours of future data after it
	max_valid = dataset_max - timedelta(hours=336)

	if run_date < min_valid:
	return False, f"Run date too early (need 512h context). Minimum: {min_valid}"

	if run_date > max_valid:
	return False, f"Run date too late (need 336h future data). Maximum: {max_valid}"

	return True, "Run date valid"


	def prepare_forecast(run_date_str, border):
	"""Prepare forecast data for selected run date and border."""
	if dataset is None or forecaster is None:
	return "Error: Dataset not loaded", "", ""

	# Validate inputs
	if not border:
	return "Error: Please select a border", "", ""

	is_valid, msg = validate_run_date(run_date_str)
	if not is_valid:
	return f"Error: {msg}", "", ""

	try:
	run_date = datetime.strptime(run_date_str, "%Y-%m-%d %H:%M:%S")

	# Prepare data
	context_data, future_data = forecaster.prepare_forecast_data(run_date, border)

	# Validate no leakage
	is_valid, errors = forecaster.validate_no_leakage(
	context_data, future_data, run_date
	)

	if not is_valid:
	error_msg = "Data leakage detected:\n" + "\n".join(f"- {e}" for e in errors)
	return error_msg, "", ""

	# Build result summary
	forecast_start = run_date + timedelta(hours=1)
	forecast_end = forecast_start + timedelta(hours=335)

	result = f"""
	Forecast Configuration
	- Border: {border}
	- Run date: {run_date}
	- Forecast horizon: D+1 to D+14 (336 hours, FIXED)
	- Forecast period: {forecast_start} to {forecast_end}

	Data Preparation Summary
	- Context shape: {context_data.shape} (historical data)
	- Future shape: {future_data.shape} (future covariates)
	- Context dates: {context_data['timestamp'].min()} to {context_data['timestamp'].max()}
	- Future dates: {future_data['timestamp'].min()} to {future_data['timestamp'].max()}
	- Leakage validation: PASSED

	Feature Availability
	- Full-horizon D+14: Available for all 336 hours
	- Partial D+1 (load forecasts): Available for first 24 hours, masked 25-336
	- Historical features: Not used for forecasting (context only)

	Next Steps
	1. Data has been prepared with time-aware extraction
	2. Load forecast masking applied (D+1 only)
	3. LTA forward-filling applied (constant across horizon)
	4. Ready for Chronos-2 inference (requires GPU)

	Note: This is a dry-run demonstration. Actual inference requires GPU with Chronos-2 model.
	"""

	# Create context preview
	context_preview = context_data.head(10).to_string()

	# Create future preview
	future_preview = future_data.head(10).to_string()

	return result, context_preview, future_preview

	except Exception as e:
	return f"Error: {str(e)}", "", ""


	def create_interface():
	"""Create Gradio interface."""
	# Load data at startup
	load_data()

	with gr.Blocks(title="FBMC Dynamic Forecast System") as app:
	gr.Markdown("# FBMC Dynamic Forecast System")
	gr.Markdown("""
	Time-Aware Forecasting with Run Date Selection

	This interface demonstrates the dynamic forecast pipeline that prevents data leakage
	by using only data available at the selected run date.

	Key Features:
	- Dynamic run date selection (prevents data leakage)
	- Fixed 14-day forecast horizon (D+1 to D+14, always 336 hours)
	- Time-aware feature categorization (603 full + 12 partial + 1,899 historical)
	- Availability masking for partial features (load forecasts D+1 only)
	- Built-in leakage validation
	""")

	with gr.Tab("Forecast Configuration"):
	with gr.Row():
	with gr.Column():
	gr.Markdown("### Dataset Information")
	dataset_info = gr.Textbox(
	label="Dataset Info",
	value=get_dataset_info(),
	lines=8,
	interactive=False
	)

	feature_summary = gr.Textbox(
	label="Feature Summary",
	value=get_feature_summary(),
	lines=10,
	interactive=False
	)

	with gr.Column():
	gr.Markdown("### Forecast Configuration")

	run_date_input = gr.Textbox(
	label="Run Date (YYYY-MM-DD HH:MM:SS)",
	placeholder="2025-08-15 23:00:00",
	value="2025-08-15 23:00:00"
	)

	border_dropdown = gr.Dropdown(
	label="Border",
	choices=borders if borders else [],
	value=borders[0] if borders else None
	)

	gr.Markdown("""
	Forecast Horizon: Fixed at 14 days (D+1 to D+14, 336 hours)

	Validation Rules:
	- Run date must have 512 hours of historical context
	- Run date must have 336 hours of future data (for this demo)
	- Valid range: ~22 days from dataset start to ~14 days before dataset end
	""")

	prepare_btn = gr.Button("Prepare Forecast Data", variant="primary")

	with gr.Row():
	result_output = gr.Textbox(
	label="Forecast Preparation Result",
	lines=25,
	interactive=False
	)

	with gr.Tab("Data Preview"):
	with gr.Row():
	context_preview = gr.Textbox(
	label="Context Data (first 10 rows)",
	lines=20,
	interactive=False
	)

	future_preview = gr.Textbox(
	label="Future Covariates (first 10 rows)",
	lines=20,
	interactive=False
	)

	with gr.Tab("About"):
	gr.Markdown("""
	## About This System

	### Purpose
	Prevent data leakage in FBMC cross-border flow forecasting by implementing
	time-aware data extraction that respects feature availability windows.

	### Architecture
	1. Feature Categorization: All 2,514 features categorized by availability
	- Full-horizon D+14: 603 features (temporal, weather, outages, LTA)
	- Partial D+1: 12 features (load forecasts, masked D+2-D+14)
	- Historical: 1,899 features (prices, generation, demand, lags)

	2. Time-Aware Extraction: DynamicForecast class
	- Extracts context data (all data before run_date)
	- Extracts future covariates (D+1 to D+14 only)
	- Applies availability masking for partial features

	3. Leakage Validation: Built-in checks
	- Context timestamps < run_date
	- Future timestamps >= run_date + 1 hour
	- No overlap between context and future
	- Only future covariates in future data

	### Forecast Horizon
	- FIXED at 14 days (D+1 to D+14, 336 hours)
	- No horizon selector needed (always forecasts full 14 days)
	- D+1 starts 1 hour after run_date (ET convention)

	### Feature Availability
	- Load Forecasts: Published day-ahead, available D+1 only
	- Weather: Forecasts available for full D+14 horizon
	- CNEC Outages: Planned maintenance published weeks ahead
	- LTA: Long-term allocations, forward-filled from D+0
	- Historical: Prices, generation, demand (context only)

	### Time Conventions
	- Electricity Time (ET): Hour 1 = 00:00-01:00, Hour 24 = 23:00-00:00
	- D+1: Next day, hours 1-24 (24 hours starting at 00:00)
	- D+14: 14 days ahead (336 hours total)

	### Model
	- Chronos 2 Large (710M params, zero-shot inference)
	- Supports partial availability via NaN masking
	- Multivariate time series forecasting

	### Files
	- `src/forecasting/feature_availability.py`: Feature categorization
	- `src/forecasting/dynamic_forecast.py`: Time-aware data extraction
	- `smoke_test.py`, `full_inference.py`: Updated inference scripts
	- `tests/test_feature_availability.py`: Unit tests (27 tests, all passing)

	### Authors
	Evgueni Poloukarov, 2025-11-13
	""")

	# Wire up the button
	prepare_btn.click(
	fn=prepare_forecast,
	inputs=[run_date_input, border_dropdown],
	outputs=[result_output, context_preview, future_preview]
	)

	return app


	if __name__ == "__main__":
	app = create_interface()
	app.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)