File size: 1,381 Bytes
d4939ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
"""Validate JAO feature engineering results with master 176 CNECs."""
import polars as pl
from pathlib import Path

# Load features
features_path = Path('data/processed/features_jao_24month.parquet')
features = pl.read_parquet(features_path)

print("=" * 80)
print("JAO FEATURE VALIDATION - MASTER 176 CNEC LIST")
print("=" * 80)
print(f"\nTotal columns: {features.shape[1]}")
print(f"Total rows: {features.shape[0]:,}")

# Feature breakdown by prefix
print("\nFeature breakdown by category:")
categories = {
    'Tier-1 CNEC': 'cnec_t1_',
    'Tier-2 CNEC': 'cnec_t2_',
    'PTDF': 'ptdf_',
    'LTA': 'lta_',
    'NetPos (min/max)': ['min', 'max'],
    'Border (MaxBEX)': 'border_',
    'Temporal': ['hour', 'day', 'month', 'weekday', 'year', 'is_weekend'],
    'Target': 'target_'
}

total_features = 0
for cat_name, prefixes in categories.items():
    if isinstance(prefixes, str):
        prefixes = [prefixes]

    count = len([c for c in features.columns if any(c.startswith(p) for p in prefixes)])
    if count > 0:
        print(f"  {cat_name:<25s}: {count:>4d} features")
        total_features += count

# Subtract target columns from feature count
target_count = len([c for c in features.columns if c.startswith('target_')])
print(f"\n  Total features (excl mtu): {total_features - target_count}")
print(f"  Target variables:          {target_count}")

print("=" * 80)