Spaces:
Sleeping
Sleeping
| """ | |
| Phase 1E: Diagnose Low CNEC Coverage | |
| ===================================== | |
| Investigates why only 4% CNEC coverage achieved: | |
| 1. Test bidirectional queries (reverse from/to) | |
| 2. Test historical period (more outages than future) | |
| 3. Check EIC code format differences | |
| 4. Validate CNEC list EIC codes | |
| """ | |
| import os | |
| import sys | |
| from pathlib import Path | |
| import pandas as pd | |
| import polars as pl | |
| from dotenv import load_dotenv | |
| from entsoe import EntsoePandasClient | |
| import time | |
| sys.path.append(str(Path(__file__).parent.parent)) | |
| load_dotenv() | |
| API_KEY = os.getenv('ENTSOE_API_KEY') | |
| client = EntsoePandasClient(api_key=API_KEY) | |
| print("="*80) | |
| print("PHASE 1E: DIAGNOSE LOW CNEC COVERAGE") | |
| print("="*80) | |
| print() | |
| # ============================================================================ | |
| # Investigation 1: Test with HISTORICAL period (more outages) | |
| # ============================================================================ | |
| print("-"*80) | |
| print("INVESTIGATION 1: HISTORICAL vs FUTURE PERIOD") | |
| print("-"*80) | |
| print() | |
| print("Hypothesis: Future period (Sept 2025) has few planned outages") | |
| print("Testing: Historical period (Sept 2024) likely has more outage records") | |
| print() | |
| FBMC_ZONES = { | |
| 'FR': '10YFR-RTE------C', | |
| 'DE_LU': '10Y1001A1001A82H' | |
| } | |
| # Test DE_LU -> FR with historical data | |
| print("Test: DE_LU -> FR (historical Sept 2024)") | |
| try: | |
| response = client._base_request( | |
| params={ | |
| 'documentType': 'A78', | |
| 'in_Domain': FBMC_ZONES['FR'], | |
| 'out_Domain': FBMC_ZONES['DE_LU'] | |
| }, | |
| start=pd.Timestamp('2024-09-01', tz='UTC'), | |
| end=pd.Timestamp('2024-09-30', tz='UTC') | |
| ) | |
| outages_zip = response.content | |
| import zipfile | |
| from io import BytesIO | |
| with zipfile.ZipFile(BytesIO(outages_zip), 'r') as zf: | |
| xml_count = len([f for f in zf.namelist() if f.endswith('.xml')]) | |
| print(f" [OK] Historical period: {xml_count} XML files") | |
| except Exception as e: | |
| print(f" [FAIL] {e}") | |
| print() | |
| # Compare with future period | |
| print("Test: DE_LU -> FR (future Sept 2025)") | |
| try: | |
| response = client._base_request( | |
| params={ | |
| 'documentType': 'A78', | |
| 'in_Domain': FBMC_ZONES['FR'], | |
| 'out_Domain': FBMC_ZONES['DE_LU'] | |
| }, | |
| start=pd.Timestamp('2025-09-01', tz='UTC'), | |
| end=pd.Timestamp('2025-09-30', tz='UTC') | |
| ) | |
| outages_zip = response.content | |
| import zipfile | |
| from io import BytesIO | |
| with zipfile.ZipFile(BytesIO(outages_zip), 'r') as zf: | |
| xml_count = len([f for f in zf.namelist() if f.endswith('.xml')]) | |
| print(f" [OK] Future period: {xml_count} XML files") | |
| except Exception as e: | |
| print(f" [FAIL] {e}") | |
| print() | |
| # ============================================================================ | |
| # Investigation 2: Check EIC Code Format Differences | |
| # ============================================================================ | |
| print("-"*80) | |
| print("INVESTIGATION 2: EIC CODE FORMAT ANALYSIS") | |
| print("-"*80) | |
| print() | |
| # Load CNEC EICs | |
| cnec_file = Path(__file__).parent.parent / 'data' / 'processed' / 'critical_cnecs_all.csv' | |
| cnec_df = pl.read_csv(cnec_file) | |
| print("Sample CNEC EIC codes from JAO data:") | |
| sample_cnecs = cnec_df.select(['cnec_eic', 'cnec_name']).head(10) | |
| for row in sample_cnecs.iter_rows(): | |
| print(f" {row[0]}: {row[1]}") | |
| print() | |
| print("EIC codes extracted from ENTSO-E (Phase 1D):") | |
| entso_e_eics = [ | |
| '11T0-0000-0011-L', | |
| '10T-DE-PL-000039', | |
| '11TD8L553------B', | |
| '10T-BE-FR-000015', | |
| '10T-DE-FR-00005A', | |
| '22T-BE-IN-LI0130', | |
| '10T-CH-DE-000034', | |
| '10T-AT-DE-000061' | |
| ] | |
| for eic in entso_e_eics[:10]: | |
| in_cnec = eic in cnec_df.select('cnec_eic').to_series().to_list() | |
| print(f" {eic}: {'MATCH' if in_cnec else 'NO MATCH'}") | |
| print() | |
| # ============================================================================ | |
| # Investigation 3: Bidirectional Queries | |
| # ============================================================================ | |
| print("-"*80) | |
| print("INVESTIGATION 3: BIDIRECTIONAL QUERIES") | |
| print("-"*80) | |
| print() | |
| print("Hypothesis: Some borders need reverse direction queries") | |
| print("Testing: DE_LU -> BE vs BE -> DE_LU") | |
| print() | |
| FBMC_ZONES['BE'] = '10YBE----------2' | |
| # Forward direction | |
| print("Forward: DE_LU -> BE") | |
| try: | |
| response = client._base_request( | |
| params={ | |
| 'documentType': 'A78', | |
| 'in_Domain': FBMC_ZONES['BE'], | |
| 'out_Domain': FBMC_ZONES['DE_LU'] | |
| }, | |
| start=pd.Timestamp('2024-09-01', tz='UTC'), | |
| end=pd.Timestamp('2024-09-30', tz='UTC') | |
| ) | |
| outages_zip = response.content | |
| import zipfile | |
| from io import BytesIO | |
| with zipfile.ZipFile(BytesIO(outages_zip), 'r') as zf: | |
| xml_count = len([f for f in zf.namelist() if f.endswith('.xml')]) | |
| print(f" [OK] {xml_count} XML files") | |
| except Exception as e: | |
| print(f" [FAIL] {e}") | |
| time.sleep(2.2) | |
| # Reverse direction | |
| print("Reverse: BE -> DE_LU") | |
| try: | |
| response = client._base_request( | |
| params={ | |
| 'documentType': 'A78', | |
| 'in_Domain': FBMC_ZONES['DE_LU'], | |
| 'out_Domain': FBMC_ZONES['BE'] | |
| }, | |
| start=pd.Timestamp('2024-09-01', tz='UTC'), | |
| end=pd.Timestamp('2024-09-30', tz='UTC') | |
| ) | |
| outages_zip = response.content | |
| import zipfile | |
| from io import BytesIO | |
| with zipfile.ZipFile(BytesIO(outages_zip), 'r') as zf: | |
| xml_count = len([f for f in zf.namelist() if f.endswith('.xml')]) | |
| print(f" [OK] {xml_count} XML files") | |
| except Exception as e: | |
| print(f" [FAIL] {e}") | |
| print() | |
| # ============================================================================ | |
| # Investigation 4: CNEC Tier Distribution | |
| # ============================================================================ | |
| print("-"*80) | |
| print("INVESTIGATION 4: CNEC TIER DISTRIBUTION") | |
| print("-"*80) | |
| print() | |
| tier_dist = cnec_df.group_by('tier').agg(pl.count()).sort('tier') | |
| print("CNEC distribution by tier:") | |
| print(tier_dist) | |
| print() | |
| # Check if matched CNECs are from specific tier | |
| matched_eics = [ | |
| '11T0-0000-0011-L', | |
| '10T-DE-PL-000039', | |
| '11TD8L553------B', | |
| '10T-BE-FR-000015', | |
| '10T-DE-FR-00005A', | |
| '22T-BE-IN-LI0130', | |
| '10T-CH-DE-000034', | |
| '10T-AT-DE-000061' | |
| ] | |
| print("Matched CNECs by tier:") | |
| for eic in matched_eics: | |
| matched = cnec_df.filter(pl.col('cnec_eic') == eic) | |
| if len(matched) > 0: | |
| tier = matched.select('tier').item(0, 0) | |
| name = matched.select('cnec_name').item(0, 0) | |
| print(f" Tier-{tier}: {eic} ({name})") | |
| print() | |
| # ============================================================================ | |
| # SUMMARY | |
| # ============================================================================ | |
| print("="*80) | |
| print("DIAGNOSTIC SUMMARY") | |
| print("="*80) | |
| print() | |
| print("Possible reasons for low coverage:") | |
| print(" 1. Future period (Sept 2025) has fewer outages than historical") | |
| print(" 2. EIC code format differences between JAO and ENTSO-E") | |
| print(" 3. Bidirectional queries needed for some borders") | |
| print(" 4. CNEC list includes internal lines not in transmission outages") | |
| print(" 5. 200 CNECs may be aggregated identifiers, not individual assets") | |
| print() | |
| print("Recommendations:") | |
| print(" 1. Use historical period (last 24 months) for better coverage") | |
| print(" 2. Query both directions for each border") | |
| print(" 3. Investigate EIC mapping between JAO and ENTSO-E") | |
| print(" 4. Consider using ALL extracted EICs as features (63 total)") | |
| print(" 5. Alternative: Use border-level outages (20 features)") | |
| print() | |