Spaces:
Sleeping
Sleeping
| """ | |
| Phase 1 ENTSO-E API Testing Script | |
| =================================== | |
| Tests critical implementation details: | |
| 1. Pumped storage query method (Scenario A/B/C) | |
| 2. Transmission outages (planned A53 vs unplanned A54) | |
| 3. Forward-looking outage queries (TODAY -> +14 days) | |
| 4. CNEC EIC filtering match rate | |
| Run this before implementing full collection script. | |
| """ | |
| import os | |
| import sys | |
| from pathlib import Path | |
| from datetime import datetime, timedelta | |
| import pandas as pd | |
| import polars as pl | |
| from dotenv import load_dotenv | |
| from entsoe import EntsoePandasClient | |
| # Add src to path for imports | |
| sys.path.append(str(Path(__file__).parent.parent)) | |
| # Load environment variables | |
| load_dotenv() | |
| API_KEY = os.getenv('ENTSOE_API_KEY') | |
| if not API_KEY: | |
| raise ValueError("ENTSOE_API_KEY not found in .env file") | |
| # Initialize client | |
| client = EntsoePandasClient(api_key=API_KEY) | |
| print("="*80) | |
| print("PHASE 1 ENTSO-E API TESTING") | |
| print("="*80) | |
| print() | |
| # ============================================================================ | |
| # TEST 1: Pumped Storage Query Method | |
| # ============================================================================ | |
| print("-"*80) | |
| print("TEST 1: PUMPED STORAGE QUERY METHOD") | |
| print("-"*80) | |
| print() | |
| print("Testing query_generation() with PSR type B10 (Hydro Pumped Storage)") | |
| print("Zone: Switzerland (CH) - largest pumped storage in Europe") | |
| print("Period: 2025-09-23 to 2025-09-30 (1 week)") | |
| print() | |
| try: | |
| test_pumped = client.query_generation( | |
| country_code='CH', | |
| start=pd.Timestamp('2025-09-23', tz='UTC'), | |
| end=pd.Timestamp('2025-09-30', tz='UTC'), | |
| psr_type='B10' # Hydro Pumped Storage | |
| ) | |
| print(f"[OK] Query successful!") | |
| print(f" Data type: {type(test_pumped)}") | |
| print(f" Shape: {test_pumped.shape}") | |
| print(f" Columns: {test_pumped.columns.tolist() if hasattr(test_pumped, 'columns') else 'N/A (Series)'}") | |
| print() | |
| # Analyze values | |
| if isinstance(test_pumped, pd.Series): | |
| print(" Data is a Series (single column)") | |
| print(f" Min value: {test_pumped.min():.2f} MW") | |
| print(f" Max value: {test_pumped.max():.2f} MW") | |
| print(f" Mean value: {test_pumped.mean():.2f} MW") | |
| print() | |
| # Check for negative values (would indicate net balance) | |
| negative_count = (test_pumped < 0).sum() | |
| print(f" Negative values: {negative_count} / {len(test_pumped)} ({negative_count/len(test_pumped)*100:.1f}%)") | |
| if negative_count > 0: | |
| print("\n >> SCENARIO A: Returns NET BALANCE (generation - pumping)") | |
| print(" >> Need to derive gross generation and consumption separately") | |
| print(" >> OR query twice with different parameters") | |
| else: | |
| print("\n >> SCENARIO B: Returns GENERATION ONLY (always positive)") | |
| print(" >> Need to find separate method for pumping consumption") | |
| elif isinstance(test_pumped, pd.DataFrame): | |
| print(" Data is a DataFrame (multiple columns)") | |
| print(f" Columns: {test_pumped.columns.tolist()}") | |
| print() | |
| for col in test_pumped.columns: | |
| print(f" Column '{col}':") | |
| print(f" Min: {test_pumped[col].min():.2f} MW") | |
| print(f" Max: {test_pumped[col].max():.2f} MW") | |
| print(f" Negative values: {(test_pumped[col] < 0).sum()}") | |
| print("\n >> SCENARIO C: Returns MULTIPLE COLUMNS") | |
| print(" >> Check if separate generation/consumption/net columns exist") | |
| # Show sample values (48 hours = 2 days) | |
| print("\n Sample values (first 48 hours):") | |
| print(test_pumped.head(48)) | |
| except Exception as e: | |
| print(f"[FAIL] Query failed: {e}") | |
| print(" >> Cannot determine pumped storage query method") | |
| print() | |
| # ============================================================================ | |
| # TEST 2: Transmission Outages - Planned vs Unplanned | |
| # ============================================================================ | |
| print("-"*80) | |
| print("TEST 2: TRANSMISSION OUTAGES - PLANNED (A53) vs UNPLANNED (A54)") | |
| print("-"*80) | |
| print() | |
| print("Testing query_unavailability_transmission()") | |
| print("Border: Germany/Luxembourg (DE_LU) -> France (FR)") | |
| print("Period: 2025-09-23 to 2025-09-30 (1 week)") | |
| print() | |
| try: | |
| test_outages = client.query_unavailability_transmission( | |
| country_code_from='10Y1001A1001A82H', # DE_LU | |
| country_code_to='10YFR-RTE------C', # FR | |
| start=pd.Timestamp('2025-09-23', tz='UTC'), | |
| end=pd.Timestamp('2025-09-30', tz='UTC') | |
| ) | |
| print(f"[OK] Query successful!") | |
| print(f" Records returned: {len(test_outages)}") | |
| print(f" Columns: {test_outages.columns.tolist()}") | |
| print() | |
| # Check for businessType column | |
| if 'businessType' in test_outages.columns: | |
| print(" [OK] businessType column found!") | |
| print("\n Business types distribution:") | |
| business_counts = test_outages['businessType'].value_counts() | |
| print(business_counts) | |
| print() | |
| # Check for A53 (Planned) and A54 (Unplanned) | |
| has_a53 = 'A53' in business_counts.index | |
| has_a54 = 'A54' in business_counts.index | |
| if has_a53 and has_a54: | |
| print(" [OK] BOTH A53 (Planned) and A54 (Unplanned) present!") | |
| print(" >> Can use standard client for all outages") | |
| elif has_a53: | |
| print(" [OK] A53 (Planned) found, but no A54 (Unplanned)") | |
| print(" >> Standard client returns only planned outages") | |
| elif has_a54: | |
| print(" [FAIL] Only A54 (Unplanned) found - NO PLANNED OUTAGES (A53)") | |
| print(" >> CRITICAL: Need EntsoeRawClient workaround for planned outages!") | |
| else: | |
| print(" [WARN] Unknown business types") | |
| print(" >> Manual investigation required") | |
| else: | |
| print(" [FAIL] businessType column NOT found!") | |
| print(" >> Cannot determine if planned outages are included") | |
| print(" >> May need EntsoeRawClient to access businessType parameter") | |
| # Show sample outages | |
| print("\n Sample outage records:") | |
| display_cols = ['start', 'end', 'unavailability_reason'] if 'unavailability_reason' in test_outages.columns else ['start', 'end'] | |
| if 'businessType' in test_outages.columns: | |
| display_cols.append('businessType') | |
| print(test_outages[display_cols].head(10)) | |
| except Exception as e: | |
| print(f"[FAIL] Query failed: {e}") | |
| print(" >> Cannot test transmission outages") | |
| print() | |
| # ============================================================================ | |
| # TEST 3: Forward-Looking Outage Queries | |
| # ============================================================================ | |
| print("-"*80) | |
| print("TEST 3: FORWARD-LOOKING OUTAGE QUERIES (TODAY -> +14 DAYS)") | |
| print("-"*80) | |
| print() | |
| today = datetime.now() | |
| future_end = today + timedelta(days=14) | |
| print(f"Testing forward-looking transmission outages") | |
| print(f"Border: Germany/Luxembourg (DE_LU) -> France (FR)") | |
| print(f"Period: {today.strftime('%Y-%m-%d')} to {future_end.strftime('%Y-%m-%d')}") | |
| print() | |
| try: | |
| future_outages = client.query_unavailability_transmission( | |
| country_code_from='10Y1001A1001A82H', # DE_LU | |
| country_code_to='10YFR-RTE------C', # FR | |
| start=pd.Timestamp(today, tz='UTC'), | |
| end=pd.Timestamp(future_end, tz='UTC') | |
| ) | |
| print(f"[OK] Forward-looking query successful!") | |
| print(f" Future outages found: {len(future_outages)}") | |
| if len(future_outages) > 0: | |
| print(f" Date range: {future_outages['start'].min()} to {future_outages['end'].max()}") | |
| print("\n Sample future outages:") | |
| display_cols = ['start', 'end'] | |
| if 'businessType' in future_outages.columns: | |
| display_cols.append('businessType') | |
| if 'unavailability_reason' in future_outages.columns: | |
| display_cols.append('unavailability_reason') | |
| print(future_outages[display_cols].head()) | |
| else: | |
| print(" >> No future outages found (may be normal if no planned maintenance)") | |
| except Exception as e: | |
| print(f"[FAIL] Forward-looking query failed: {e}") | |
| print(" >> Cannot query future outages") | |
| print() | |
| # ============================================================================ | |
| # TEST 4: CNEC EIC Filtering | |
| # ============================================================================ | |
| print("-"*80) | |
| print("TEST 4: CNEC EIC FILTERING MATCH RATE") | |
| print("-"*80) | |
| print() | |
| print("Loading 208 critical CNEC EIC codes...") | |
| try: | |
| # Load CNEC EIC codes | |
| cnec_file = Path(__file__).parent.parent / 'data' / 'processed' / 'critical_cnecs_all.csv' | |
| if not cnec_file.exists(): | |
| print(f" [WARN] File not found: {cnec_file}") | |
| print(" >> Trying separate tier files...") | |
| tier1_file = Path(__file__).parent.parent / 'data' / 'processed' / 'critical_cnecs_tier1.csv' | |
| tier2_file = Path(__file__).parent.parent / 'data' / 'processed' / 'critical_cnecs_tier2.csv' | |
| if tier1_file.exists() and tier2_file.exists(): | |
| tier1 = pl.read_csv(tier1_file) | |
| tier2 = pl.read_csv(tier2_file) | |
| cnec_df = pl.concat([tier1, tier2]) | |
| print(f" [OK] Loaded from separate tier files") | |
| else: | |
| raise FileNotFoundError("CNEC files not found") | |
| else: | |
| cnec_df = pl.read_csv(cnec_file) | |
| print(f" [OK] Loaded from combined file") | |
| cnec_eics = cnec_df.select('cnec_eic').to_series().to_list() | |
| print(f" CNEC EICs loaded: {len(cnec_eics)}") | |
| print() | |
| # Filter test outages from Test 2 | |
| if 'test_outages' in locals() and len(test_outages) > 0: | |
| print(f" Filtering {len(test_outages)} outages to CNEC EICs...") | |
| # Check which column contains EIC codes | |
| eic_column = None | |
| for col in test_outages.columns: | |
| if 'eic' in col.lower() or 'mrid' in col.lower(): | |
| eic_column = col | |
| break | |
| if eic_column: | |
| print(f" Using column: {eic_column}") | |
| filtered = test_outages[test_outages[eic_column].isin(cnec_eics)] | |
| match_rate = len(filtered) / len(test_outages) * 100 if len(test_outages) > 0 else 0 | |
| print(f"\n Results:") | |
| print(f" Total outages: {len(test_outages)}") | |
| print(f" Matching CNECs: {len(filtered)}") | |
| print(f" Match rate: {match_rate:.1f}%") | |
| if match_rate > 0: | |
| print(f"\n [OK] CNEC filtering works!") | |
| print(f" >> Expected match rate: 5-15% (most outages are non-critical lines)") | |
| else: | |
| print(f"\n [FAIL] No matches found") | |
| print(f" >> May need to verify CNEC EIC codes or outage data structure") | |
| else: | |
| print(" [FAIL] Could not identify EIC column in outage data") | |
| print(f" >> Available columns: {test_outages.columns.tolist()}") | |
| else: | |
| print(" >> No outage data from Test 2 to filter") | |
| print(" >> Run Test 2 successfully first") | |
| except Exception as e: | |
| print(f"[FAIL] CNEC filtering test failed: {e}") | |
| print() | |
| # ============================================================================ | |
| # SUMMARY & RECOMMENDATIONS | |
| # ============================================================================ | |
| print("="*80) | |
| print("PHASE 1 TESTING SUMMARY") | |
| print("="*80) | |
| print() | |
| print("Review the test results above to determine:") | |
| print() | |
| print("1. PUMPED STORAGE:") | |
| print(" - Scenario A: Implement separate gross generation/consumption extraction") | |
| print(" - Scenario B: Find alternative method for pumping consumption") | |
| print(" - Scenario C: Extract all columns directly") | |
| print() | |
| print("2. TRANSMISSION OUTAGES:") | |
| print(" - If A53 present: Use standard client [OK]") | |
| print(" - If only A54: Implement EntsoeRawClient for planned outages [FAIL]") | |
| print() | |
| print("3. FORWARD-LOOKING:") | |
| print(" - If successful: Can query future outages [OK]") | |
| print(" - If failed: Need alternative approach [FAIL]") | |
| print() | |
| print("4. CNEC FILTERING:") | |
| print(" - If match rate 5-15%: Expected behavior [OK]") | |
| print(" - If 0%: Verify CNEC EIC codes or data structure [FAIL]") | |
| print() | |
| print("="*80) | |
| print("Next: Implement collection script based on test results") | |
| print("="*80) | |