fbmc-chronos2 / scripts /validate_jao_features.py
Evgueni Poloukarov
feat: Phase 1 complete - Master CNEC list + synchronized feature engineering
d4939ce
"""Validate JAO feature engineering results with master 176 CNECs."""
import polars as pl
from pathlib import Path
# Load features
features_path = Path('data/processed/features_jao_24month.parquet')
features = pl.read_parquet(features_path)
print("=" * 80)
print("JAO FEATURE VALIDATION - MASTER 176 CNEC LIST")
print("=" * 80)
print(f"\nTotal columns: {features.shape[1]}")
print(f"Total rows: {features.shape[0]:,}")
# Feature breakdown by prefix
print("\nFeature breakdown by category:")
categories = {
'Tier-1 CNEC': 'cnec_t1_',
'Tier-2 CNEC': 'cnec_t2_',
'PTDF': 'ptdf_',
'LTA': 'lta_',
'NetPos (min/max)': ['min', 'max'],
'Border (MaxBEX)': 'border_',
'Temporal': ['hour', 'day', 'month', 'weekday', 'year', 'is_weekend'],
'Target': 'target_'
}
total_features = 0
for cat_name, prefixes in categories.items():
if isinstance(prefixes, str):
prefixes = [prefixes]
count = len([c for c in features.columns if any(c.startswith(p) for p in prefixes)])
if count > 0:
print(f" {cat_name:<25s}: {count:>4d} features")
total_features += count
# Subtract target columns from feature count
target_count = len([c for c in features.columns if c.startswith('target_')])
print(f"\n Total features (excl mtu): {total_features - target_count}")
print(f" Target variables: {target_count}")
print("=" * 80)