"""Validate JAO feature engineering results with master 176 CNECs.""" import polars as pl from pathlib import Path # Load features features_path = Path('data/processed/features_jao_24month.parquet') features = pl.read_parquet(features_path) print("=" * 80) print("JAO FEATURE VALIDATION - MASTER 176 CNEC LIST") print("=" * 80) print(f"\nTotal columns: {features.shape[1]}") print(f"Total rows: {features.shape[0]:,}") # Feature breakdown by prefix print("\nFeature breakdown by category:") categories = { 'Tier-1 CNEC': 'cnec_t1_', 'Tier-2 CNEC': 'cnec_t2_', 'PTDF': 'ptdf_', 'LTA': 'lta_', 'NetPos (min/max)': ['min', 'max'], 'Border (MaxBEX)': 'border_', 'Temporal': ['hour', 'day', 'month', 'weekday', 'year', 'is_weekend'], 'Target': 'target_' } total_features = 0 for cat_name, prefixes in categories.items(): if isinstance(prefixes, str): prefixes = [prefixes] count = len([c for c in features.columns if any(c.startswith(p) for p in prefixes)]) if count > 0: print(f" {cat_name:<25s}: {count:>4d} features") total_features += count # Subtract target columns from feature count target_count = len([c for c in features.columns if c.startswith('target_')]) print(f"\n Total features (excl mtu): {total_features - target_count}") print(f" Target variables: {target_count}") print("=" * 80)