fbmc-chronos2 / src /utils /border_extraction.py
Evgueni Poloukarov
feat: Phase 1 complete - Master CNEC list + synchronized feature engineering
d4939ce
"""
CNEC Border Extraction Utility
================================
Extracts commercial border information from CNEC EIC codes, TSO fields,
and PTDF profiles using a hierarchical approach.
Strategy:
1. Parse EIC codes (10T-XX-YY-NNNNNN format) - Primary, 33% coverage
2. Special case mapping (Alegro CNECs) - 8 CNECs
3. TSO + neighbor PTDF analysis - Fallback, ~67% coverage
4. Manual review for remaining cases
Author: Claude + Evgueni Poloukarov
Date: 2025-11-08
"""
from typing import Dict, Optional
# TSO to Country/Zone Mapping
TSO_TO_ZONE: Dict[str, str] = {
# Germany (4 TSOs)
'50Hertz': 'DE',
'Amprion': 'DE',
'TennetGmbh': 'DE',
'TransnetBw': 'DE',
# Other countries
'Rte': 'FR', # France
'Elia': 'BE', # Belgium
'TennetBv': 'NL', # Netherlands
'Apg': 'AT', # Austria
'Ceps': 'CZ', # Czech Republic
'Pse': 'PL', # Poland
'Mavir': 'HU', # Hungary
'Seps': 'SK', # Slovakia
'Transelectrica': 'RO', # Romania
'Hops': 'HR', # Croatia
'Eles': 'SI', # Slovenia
}
# FBMC Border Neighbors (from ENTSO-E BORDERS list)
ZONE_NEIGHBORS: Dict[str, list] = {
'DE': ['NL', 'FR', 'BE', 'AT', 'CZ', 'PL'], # DE_LU treated as DE
'FR': ['DE', 'BE', 'ES', 'CH'], # ES/CH external but affect FBMC
'AT': ['DE', 'CZ', 'HU', 'SI', 'CH'],
'CZ': ['DE', 'AT', 'SK', 'PL'],
'HU': ['AT', 'SK', 'RO', 'HR'],
'SK': ['CZ', 'HU', 'PL'],
'PL': ['DE', 'CZ', 'SK'],
'RO': ['HU'],
'HR': ['HU', 'SI'],
'SI': ['AT', 'HR'],
'BE': ['DE', 'FR', 'NL'],
'NL': ['DE', 'BE'],
}
# Special case mappings (Alegro cable + edge cases)
SPECIAL_BORDER_MAPPING: Dict[str, str] = {
# Alegro DC cable (Belgium - Germany)
'ALEGRO_EXTERNAL_BE_IMPORT': 'BE_DE',
'ALEGRO_EXTERNAL_DE_EXPORT': 'BE_DE',
'ALEGRO_EXTERNAL_DE_IMPORT': 'BE_DE',
'ALEGRO_EXTERNAL_BE_EXPORT': 'BE_DE',
'ALEGRO_INTERNAL_DE_IMPORT': 'BE_DE',
'ALEGRO_INTERNAL_BE_EXPORT': 'BE_DE',
'ALEGRO_INTERNAL_BE_IMPORT': 'BE_DE',
'ALEGRO_INTERNAL_DE_EXPORT': 'BE_DE',
}
def extract_border_from_eic(eic: str) -> Optional[str]:
"""
Extract border from EIC code with 10T-XX-YY-NNNNNN format.
This is the most reliable method as border is explicitly encoded.
Args:
eic: CNEC EIC code
Returns:
Border string (e.g., "DE_FR", "AT_SI") or None if not parseable
Examples:
>>> extract_border_from_eic("10T-DE-FR-000068")
"DE_FR"
>>> extract_border_from_eic("10T-AT-SI-00003P")
"AT_SI"
>>> extract_border_from_eic("17T0000000215642")
None
"""
if not eic.startswith('10T-'):
return None
parts = eic.split('-')
if len(parts) < 3:
return None
zone1, zone2 = parts[1], parts[2]
# Normalize to alphabetical order for consistency
border = f"{min(zone1, zone2)}_{max(zone1, zone2)}"
return border
def get_special_border(eic: str) -> Optional[str]:
"""
Get border for special case CNECs (Alegro cable, etc.).
Args:
eic: CNEC EIC code
Returns:
Border string or None if not a special case
"""
return SPECIAL_BORDER_MAPPING.get(eic)
def infer_border_from_tso_and_ptdf(
tso: str,
ptdf_dict: Dict[str, float]
) -> Optional[str]:
"""
Infer border using TSO home zone + highest PTDF in neighbor zones.
This is a fallback method when EIC doesn't encode border explicitly.
Uses TSO to identify home country, then finds neighbor with highest
|PTDF| value.
Args:
tso: TSO name (e.g., "Apg", "Rte", "Amprion")
ptdf_dict: Dictionary of PTDF values
Format: {"ptdf_AT": -0.45, "ptdf_DE": 0.12, ...}
Returns:
Border string or None if cannot be determined
Example:
>>> ptdfs = {"ptdf_AT": -0.45, "ptdf_SI": 0.38, "ptdf_DE": 0.12}
>>> infer_border_from_tso_and_ptdf("Apg", ptdfs)
"AT_SI" # Apg is Austrian TSO, SI has highest |PTDF| among neighbors
"""
home_zone = TSO_TO_ZONE.get(tso)
if not home_zone:
return None
neighbors = ZONE_NEIGHBORS.get(home_zone, [])
if not neighbors:
return None
# Find neighbor with highest |PTDF|
neighbor_ptdfs = {}
for neighbor in neighbors:
ptdf_key = f'ptdf_{neighbor}'
if ptdf_key in ptdf_dict:
neighbor_ptdfs[neighbor] = abs(ptdf_dict[ptdf_key])
if not neighbor_ptdfs:
return None
# Get neighbor with maximum absolute PTDF
max_neighbor = max(neighbor_ptdfs, key=neighbor_ptdfs.get)
# Normalize border to alphabetical order
border = f"{min(home_zone, max_neighbor)}_{max(home_zone, max_neighbor)}"
return border
def extract_cnec_border(
cnec_eic: str,
tso: str,
ptdf_dict: Optional[Dict[str, float]] = None
) -> str:
"""
Extract border for a CNEC using hierarchical strategy.
Tries methods in order:
1. Parse EIC (10T-XX-YY format) - most reliable
2. Special case mapping (Alegro, etc.)
3. TSO + neighbor PTDF analysis - fallback
4. Return "UNKNOWN" if all methods fail
Args:
cnec_eic: CNEC EIC code
tso: TSO name
ptdf_dict: Optional dictionary of PTDF values
Format: {"ptdf_AT": -0.45, "ptdf_BE": 0.12, ...}
Returns:
Border string (e.g., "DE_FR", "AT_SI") or "UNKNOWN"
Examples:
>>> extract_cnec_border("10T-DE-FR-000068", "Amprion")
"DE_FR"
>>> extract_cnec_border("ALEGRO_EXTERNAL_BE_IMPORT", "Elia")
"BE_DE"
>>> ptdfs = {"ptdf_AT": -0.45, "ptdf_SI": 0.38}
>>> extract_cnec_border("17T0000000215642", "Apg", ptdfs)
"AT_SI"
"""
# Method 1: Parse EIC for 10T- pattern
border = extract_border_from_eic(cnec_eic)
if border:
return border
# Method 2: Special cases (Alegro)
border = get_special_border(cnec_eic)
if border:
return border
# Method 3: TSO + PTDF neighbor analysis
if ptdf_dict:
border = infer_border_from_tso_and_ptdf(tso, ptdf_dict)
if border:
return border
# Method 4: TSO-only fallback (use first alphabetical neighbor)
# This is very approximate but better than UNKNOWN
home_zone = TSO_TO_ZONE.get(tso)
if home_zone:
neighbors = ZONE_NEIGHBORS.get(home_zone, [])
if neighbors:
# Use first alphabetical neighbor as guess
first_neighbor = sorted(neighbors)[0]
border = f"{min(home_zone, first_neighbor)}_{max(home_zone, first_neighbor)}"
return border
return "UNKNOWN"
def validate_border_assignment(
border: str,
ptdf_dict: Dict[str, float],
threshold: float = 0.05
) -> bool:
"""
Validate border assignment using PTDF sanity check.
For a border XX_YY, at least one of ptdf_XX or ptdf_YY should have
significant magnitude (|PTDF| > threshold).
Args:
border: Assigned border (e.g., "DE_FR")
ptdf_dict: Dictionary of PTDF values
threshold: Minimum |PTDF| to consider significant (default 0.05)
Returns:
True if validation passes, False otherwise
Example:
>>> validate_border_assignment("DE_FR", {"ptdf_DE": -0.42, "ptdf_FR": 0.38})
True
>>> validate_border_assignment("DE_FR", {"ptdf_DE": 0.01, "ptdf_FR": 0.02})
False
"""
if border == "UNKNOWN":
return False
zones = border.split('_')
if len(zones) != 2:
return False
zone1, zone2 = zones
ptdf1 = abs(ptdf_dict.get(f'ptdf_{zone1}', 0.0))
ptdf2 = abs(ptdf_dict.get(f'ptdf_{zone2}', 0.0))
# At least one zone should have significant PTDF
return (ptdf1 > threshold) or (ptdf2 > threshold)
def get_border_statistics(borders: list) -> Dict[str, int]:
"""
Get frequency statistics for border assignments.
Useful for validating that major FBMC borders are well-represented.
Args:
borders: List of border assignments
Returns:
Dictionary mapping border → count
Example:
>>> get_border_statistics(["DE_FR", "AT_SI", "DE_FR", "UNKNOWN"])
{"DE_FR": 2, "AT_SI": 1, "UNKNOWN": 1}
"""
stats = {}
for border in borders:
stats[border] = stats.get(border, 0) + 1
# Sort by count (descending)
return dict(sorted(stats.items(), key=lambda x: x[1], reverse=True))