zhiminy commited on
Commit
c0eda81
Β·
1 Parent(s): 3d31827
Files changed (2) hide show
  1. app.py +268 -238
  2. msr.py +243 -499
app.py CHANGED
@@ -25,32 +25,15 @@ load_dotenv()
25
 
26
  # Parse command-line arguments
27
  parser = argparse.ArgumentParser(description='SWE Agent Review Leaderboard')
28
- parser.add_argument('--debug', '--DEBUG', action='store_true',
29
- help='Enable debug mode (limits review retrieval to 10 per query pattern)')
30
- parser.add_argument('--no-debug', '--production', action='store_true',
31
- help='Explicitly disable debug mode (force production mode)')
32
  args = parser.parse_args()
33
 
34
  # =============================================================================
35
  # CONFIGURATION
36
  # =============================================================================
37
 
38
- # DEBUG MODE: Set to True to limit review retrieval for testing
39
- # When enabled, only fetches up to 10 reviews per query pattern per agent
40
- # Priority: 1) Command-line args, 2) Environment variable, 3) Default (False)
41
- if args.no_debug:
42
- DEBUG_MODE = False
43
- elif args.debug:
44
- DEBUG_MODE = True
45
- else:
46
- DEBUG_MODE = os.getenv('DEBUG_MODE', 'False').lower() in ('true', '1', 'yes')
47
-
48
- # In-memory cache for debug mode (data persists during session but NOT saved to HF)
49
- DEBUG_REVIEW_METADATA_CACHE = defaultdict(list)
50
-
51
  AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
52
  REVIEW_METADATA_REPO = "SWE-Arena/review_metadata" # HuggingFace dataset for review metadata
53
- LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for leaderboard (past 6 months)
54
 
55
  LEADERBOARD_COLUMNS = [
56
  ("Agent Name", "string"),
@@ -191,7 +174,7 @@ def fetch_reviews_from_bigquery(client, identifier, start_date, end_date):
191
  SELECT
192
  repo.name as repo_name,
193
  actor.login as actor_login,
194
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as pr_url,
195
  CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') AS INT64) as pr_number,
196
  JSON_EXTRACT_SCALAR(payload, '$.review.submitted_at') as reviewed_at,
197
  created_at
@@ -222,7 +205,7 @@ def fetch_reviews_from_bigquery(client, identifier, start_date, end_date):
222
  return []
223
 
224
 
225
- def fetch_pr_status_from_bigquery(client, pr_urls, start_date, end_date):
226
  """
227
  Fetch PR status (merged/closed) from GitHub Archive PullRequestEvent.
228
 
@@ -231,29 +214,29 @@ def fetch_pr_status_from_bigquery(client, pr_urls, start_date, end_date):
231
 
232
  Args:
233
  client: BigQuery client instance
234
- pr_urls: List of PR URLs to check status for
235
  start_date: Start datetime (should cover review period and after)
236
  end_date: End datetime (should be recent/current)
237
 
238
  Returns:
239
  Dictionary mapping PR URL to status dict:
240
  {
241
- 'pr_url': {
242
  'status': 'merged'|'closed'|'open',
243
  'merged': bool,
244
  'closed_at': timestamp or None
245
  }
246
  }
247
  """
248
- if not pr_urls:
249
  return {}
250
 
251
- print(f"\nπŸ” Querying BigQuery for PR status ({len(pr_urls)} PRs)...")
252
 
253
  # Extract repo and PR number from URLs
254
  # URL format: https://github.com/owner/repo/pull/123
255
  pr_info = []
256
- for url in pr_urls:
257
  try:
258
  parts = url.replace('https://github.com/', '').split('/')
259
  if len(parts) >= 4:
@@ -305,7 +288,7 @@ def fetch_pr_status_from_bigquery(client, pr_urls, start_date, end_date):
305
  SELECT
306
  repo.name as repo_name,
307
  CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') AS INT64) as pr_number,
308
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as pr_url,
309
  JSON_EXTRACT_SCALAR(payload, '$.action') as action,
310
  CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.merged') AS BOOL) as merged,
311
  JSON_EXTRACT_SCALAR(payload, '$.pull_request.closed_at') as closed_at,
@@ -331,7 +314,7 @@ def fetch_pr_status_from_bigquery(client, pr_urls, start_date, end_date):
331
  # Build status map by PR URL
332
  status_map = {}
333
  for row in results:
334
- pr_url = row.pr_url
335
 
336
  merged = row.merged if row.merged is not None else False
337
  closed_at = row.closed_at or row.merged_at
@@ -342,14 +325,14 @@ def fetch_pr_status_from_bigquery(client, pr_urls, start_date, end_date):
342
 
343
  status = 'merged' if merged else 'closed'
344
 
345
- status_map[pr_url] = {
346
  'status': status,
347
  'merged': merged,
348
  'closed_at': closed_at
349
  }
350
 
351
  # Mark remaining PRs as open
352
- for url in pr_urls:
353
  if url not in status_map:
354
  status_map[url] = {
355
  'status': 'open',
@@ -368,7 +351,7 @@ def fetch_pr_status_from_bigquery(client, pr_urls, start_date, end_date):
368
  except Exception as e:
369
  print(f" βœ— BigQuery error: {str(e)}")
370
  # Return all as open on error
371
- return {url: {'status': 'open', 'merged': False, 'closed_at': None} for url in pr_urls}
372
 
373
 
374
  def extract_review_metadata_from_bigquery(review_row, status_info):
@@ -382,7 +365,7 @@ def extract_review_metadata_from_bigquery(review_row, status_info):
382
  Returns:
383
  Dictionary with review metadata
384
  """
385
- pr_url = review_row.pr_url
386
  pr_number = review_row.pr_number
387
  reviewed_at = review_row.reviewed_at or review_row.created_at
388
 
@@ -391,12 +374,12 @@ def extract_review_metadata_from_bigquery(review_row, status_info):
391
  reviewed_at = reviewed_at.isoformat()
392
 
393
  return {
394
- 'html_url': pr_url,
395
  'reviewed_at': reviewed_at,
396
  'pr_status': status_info['status'],
397
- 'pr_merged': status_info['merged'],
398
- 'pr_closed_at': status_info['closed_at'],
399
- 'pr_url': pr_url,
400
  'review_id': f"pr_{pr_number}"
401
  }
402
 
@@ -703,14 +686,13 @@ def validate_github_username(identifier):
703
  return False, f"Validation error: {str(e)}"
704
 
705
 
706
- def fetch_reviews_with_time_partition(base_query, start_date, end_date, token_pool, prs_by_url, debug_limit=None, depth=0):
707
  """
708
  Fetch reviews within a specific time range using time-based partitioning.
709
  Recursively splits the time range if hitting the 1000-result limit.
710
  Supports splitting by day, hour, minute, and second as needed.
711
 
712
  Args:
713
- debug_limit: If set, stops fetching after this many NEW reviews total across all partitions (for testing)
714
  depth: Current recursion depth (for tracking)
715
 
716
  Returns the number of reviews found in this time partition.
@@ -748,10 +730,6 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, token_po
748
  total_in_partition = 0
749
 
750
  while True:
751
- # Check debug limit GLOBALLY (total unique PRs across all partitions)
752
- if debug_limit is not None and len(prs_by_url) >= debug_limit:
753
- print(f"{indent} πŸ› DEBUG MODE: Reached global limit of {debug_limit} PRs, stopping...")
754
- return total_in_partition
755
  url = 'https://api.github.com/search/issues' # Use issues endpoint for PR search
756
  params = {
757
  'q': query,
@@ -782,11 +760,11 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, token_po
782
 
783
  # Add PR reviews to global dict (keyed by PR URL)
784
  for pr in items:
785
- pr_url = pr.get('html_url')
786
  pr_number = pr.get('number')
787
  # Use PR URL as unique key (more reliable than number alone)
788
- if pr_url and pr_url not in prs_by_url:
789
- prs_by_url[pr_url] = pr
790
  total_in_partition += 1
791
 
792
  # Check if we hit the 1000-result limit
@@ -813,7 +791,7 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, token_po
813
  split_start = split_start + timedelta(seconds=1)
814
 
815
  count = fetch_reviews_with_time_partition(
816
- base_query, split_start, split_end, token_pool, prs_by_url, debug_limit, depth + 1
817
  )
818
  total_from_splits += count
819
 
@@ -834,7 +812,7 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, token_po
834
  split_start = split_start + timedelta(minutes=1)
835
 
836
  count = fetch_reviews_with_time_partition(
837
- base_query, split_start, split_end, token_pool, prs_by_url, debug_limit, depth + 1
838
  )
839
  total_from_splits += count
840
 
@@ -855,7 +833,7 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, token_po
855
  split_start = split_start + timedelta(hours=1)
856
 
857
  count = fetch_reviews_with_time_partition(
858
- base_query, split_start, split_end, token_pool, prs_by_url, debug_limit, depth + 1
859
  )
860
  total_from_splits += count
861
 
@@ -886,7 +864,7 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, token_po
886
  split_start = split_start + timedelta(days=1)
887
 
888
  count = fetch_reviews_with_time_partition(
889
- base_query, split_start, split_end, token_pool, prs_by_url, debug_limit, depth + 1
890
  )
891
  total_from_splits += count
892
 
@@ -897,10 +875,10 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, token_po
897
 
898
  # Recursively fetch both halves
899
  count1 = fetch_reviews_with_time_partition(
900
- base_query, start_date, mid_date, token_pool, prs_by_url, debug_limit, depth + 1
901
  )
902
  count2 = fetch_reviews_with_time_partition(
903
- base_query, mid_date + timedelta(days=1), end_date, token_pool, prs_by_url, debug_limit, depth + 1
904
  )
905
 
906
  return count1 + count2
@@ -922,7 +900,7 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, token_po
922
  return total_in_partition
923
 
924
 
925
- def fetch_reviews_parallel(query_patterns, start_date, end_date, token_pool, prs_by_url, debug_limit=None):
926
  """
927
  Fetch reviews for multiple query patterns in parallel using available parallel tokens.
928
 
@@ -936,7 +914,6 @@ def fetch_reviews_parallel(query_patterns, start_date, end_date, token_pool, prs
936
  end_date: End datetime for time range
937
  token_pool: TokenPool instance for token management
938
  prs_by_url: Dictionary to collect PRs by URL (shared across patterns)
939
- debug_limit: Optional limit on total PRs to fetch (for testing)
940
 
941
  Returns:
942
  Total number of PRs found across all patterns
@@ -954,7 +931,7 @@ def fetch_reviews_parallel(query_patterns, start_date, end_date, token_pool, prs
954
  for pattern in query_patterns:
955
  pattern_prs = {}
956
  count = fetch_reviews_with_time_partition(
957
- pattern, start_date, end_date, token_pool, pattern_prs, debug_limit, depth=0
958
  )
959
  # Merge pattern results into global dict
960
  with threading.Lock():
@@ -975,7 +952,7 @@ def fetch_reviews_parallel(query_patterns, start_date, end_date, token_pool, prs
975
  pattern_prs = {}
976
  try:
977
  count = fetch_reviews_with_time_partition(
978
- pattern, start_date, end_date, token_pool, pattern_prs, debug_limit, depth=0
979
  )
980
  return pattern, pattern_prs, count
981
  except Exception as e:
@@ -1017,20 +994,20 @@ def fetch_reviews_parallel(query_patterns, start_date, end_date, token_pool, prs
1017
  def extract_review_metadata(pr):
1018
  """
1019
  Extract minimal PR review metadata for efficient storage.
1020
- Only keeps essential fields: html_url, reviewed_at, pr_status, pr_merged, pr_closed_at.
1021
  Note: agent_name is not stored as it's inferred from the folder structure.
1022
 
1023
  PR status:
1024
  - pr_status: 'open', 'merged', or 'closed'
1025
- - pr_merged: True if PR was merged, False otherwise
1026
- - pr_closed_at: Date when PR was closed/merged (if applicable)
1027
 
1028
  Merged PR = PR that was merged after agent review
1029
  Rejected PR = PR that was closed without merging after agent review
1030
  """
1031
  # Extract PR metadata from search results
1032
  # The GitHub search API returns PR data from /search/issues endpoint
1033
- pr_url = pr.get('html_url')
1034
  pr_number = pr.get('number')
1035
  created_at = pr.get('created_at')
1036
  closed_at = pr.get('closed_at')
@@ -1041,10 +1018,10 @@ def extract_review_metadata(pr):
1041
 
1042
  # For initial extraction, we don't know if merged yet
1043
  # This will be updated by update_pr_status function
1044
- pr_merged = pull_request_data.get('merged_at') is not None if pull_request_data else False
1045
 
1046
  # Determine initial status
1047
- if pr_merged:
1048
  status = 'merged'
1049
  elif state == 'closed':
1050
  status = 'closed'
@@ -1052,12 +1029,11 @@ def extract_review_metadata(pr):
1052
  status = 'open'
1053
 
1054
  return {
1055
- 'html_url': pr_url,
1056
  'reviewed_at': created_at, # When the PR was created (agent reviewed it)
1057
  'pr_status': status,
1058
- 'pr_merged': pr_merged,
1059
- 'pr_closed_at': closed_at,
1060
- 'pr_url': pr_url, # Store PR URL for tracking
1061
  'review_id': f"pr_{pr_number}" # Use PR number for deduplication
1062
  }
1063
 
@@ -1069,8 +1045,6 @@ def update_pr_status(metadata_list, token_pool):
1069
  For each PR associated with a review, fetch current status from GitHub API.
1070
  Updates metadata_list in-place with PR status information.
1071
 
1072
- In DEBUG MODE: Skips status updates to avoid API rate limits.
1073
-
1074
  Args:
1075
  metadata_list: List of review metadata dictionaries
1076
  token_pool: TokenPool instance for rotating tokens
@@ -1081,32 +1055,27 @@ def update_pr_status(metadata_list, token_pool):
1081
  if not metadata_list:
1082
  return metadata_list
1083
 
1084
- # In debug mode, skip status updates to avoid excessive API calls
1085
- if DEBUG_MODE:
1086
- print(f" πŸ› DEBUG MODE: Skipping PR status updates for {len(metadata_list)} reviews")
1087
- return metadata_list
1088
-
1089
  # Track unique PRs to avoid duplicate API calls
1090
- pr_url_to_status = {}
1091
  updated_count = 0
1092
 
1093
  for metadata in metadata_list:
1094
- pr_url = metadata.get('pr_url')
1095
- if not pr_url:
1096
  continue
1097
 
1098
  # Skip if already fetched for this PR
1099
- if pr_url in pr_url_to_status:
1100
- status_info = pr_url_to_status[pr_url]
1101
  metadata['pr_status'] = status_info['status']
1102
- metadata['pr_merged'] = status_info['merged']
1103
- metadata['pr_closed_at'] = status_info['closed_at']
1104
  continue
1105
 
1106
  try:
1107
  # Convert HTML URL to API URL
1108
  # https://github.com/owner/repo/pull/123 -> https://api.github.com/repos/owner/repo/pulls/123
1109
- parts = pr_url.replace('https://github.com/', '').split('/')
1110
  if len(parts) >= 4:
1111
  owner, repo, pull_word, pr_number = parts[0], parts[1], parts[2], parts[3]
1112
  api_url = f'https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}'
@@ -1137,17 +1106,17 @@ def update_pr_status(metadata_list, token_pool):
1137
  }
1138
 
1139
  # Cache and update
1140
- pr_url_to_status[pr_url] = status_info
1141
  metadata['pr_status'] = status
1142
- metadata['pr_merged'] = merged
1143
- metadata['pr_closed_at'] = closed_at or merged_at
1144
  updated_count += 1
1145
 
1146
  # Small delay to avoid rate limiting
1147
  time.sleep(0.1)
1148
 
1149
  except Exception as e:
1150
- print(f" Warning: Could not check PR status for {pr_url}: {e}")
1151
  continue
1152
 
1153
  if updated_count > 0:
@@ -1158,33 +1127,57 @@ def update_pr_status(metadata_list, token_pool):
1158
 
1159
 
1160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1161
  def calculate_review_stats_from_metadata(metadata_list):
1162
  """
1163
  Calculate statistics from a list of review metadata (lightweight objects).
1164
- Works with minimal metadata: html_url, reviewed_at, pr_status, pr_merged, pr_closed_at.
1165
 
1166
  Returns a dictionary with comprehensive review metrics.
1167
 
1168
  Acceptance Rate is calculated as:
1169
  merged PRs / (merged PRs + rejected PRs) * 100
1170
 
1171
- Merged PRs = PRs that were merged (pr_status='merged')
1172
- Rejected PRs = PRs that were closed without merging (pr_status='closed')
1173
- Pending PRs = PRs still open (pr_status='open') - excluded from acceptance rate
1174
  """
1175
  total_reviews = len(metadata_list)
1176
 
1177
- # Count merged PRs (merged)
1178
  merged_prs = sum(1 for review_meta in metadata_list
1179
- if review_meta.get('pr_status') == 'merged')
1180
 
1181
  # Count rejected PRs (closed without merging)
1182
  rejected_prs = sum(1 for review_meta in metadata_list
1183
- if review_meta.get('pr_status') == 'closed')
1184
 
1185
  # Count pending PRs (still open)
1186
  pending_prs = sum(1 for review_meta in metadata_list
1187
- if review_meta.get('pr_status') == 'open')
1188
 
1189
  # Calculate acceptance rate (exclude pending PRs)
1190
  completed_prs = merged_prs + rejected_prs
@@ -1198,11 +1191,15 @@ def calculate_review_stats_from_metadata(metadata_list):
1198
  }
1199
 
1200
 
1201
- def calculate_monthly_metrics_by_agent():
1202
  """
1203
- Calculate monthly metrics for all agents for visualization.
1204
  Loads data directly from SWE-Arena/review_metadata dataset.
1205
 
 
 
 
 
1206
  Returns:
1207
  dict: {
1208
  'agents': list of agent names,
@@ -1220,7 +1217,7 @@ def calculate_monthly_metrics_by_agent():
1220
  agents = load_agents_from_hf()
1221
 
1222
  # Create mapping from agent_identifier to agent_name
1223
- identifier_to_name = {agent.get('github_identifier'): agent.get('agent_name') for agent in agents if agent.get('github_identifier')}
1224
 
1225
  # Load all review metadata from review_metadata dataset
1226
  all_metadata = load_review_metadata()
@@ -1290,8 +1287,25 @@ def calculate_monthly_metrics_by_agent():
1290
  'merged_prs': merged_prs_list,
1291
  }
1292
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1293
  return {
1294
- 'agents': sorted(list(agent_month_data.keys())),
1295
  'months': months,
1296
  'data': result_data
1297
  }
@@ -1327,7 +1341,6 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
1327
  """
1328
  Save review metadata to HuggingFace dataset, organized by [agent_identifier]/YYYY.MM.DD.jsonl.
1329
  Each file is stored in the agent's folder and named YYYY.MM.DD.jsonl for that day's reviews.
1330
- In debug mode, saves to in-memory cache only.
1331
 
1332
  This function APPENDS new metadata and DEDUPLICATES by review_id.
1333
  Uses batch upload to avoid rate limit (uploads entire folder in single commit).
@@ -1339,17 +1352,6 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
1339
  import tempfile
1340
  import shutil
1341
 
1342
- # Skip saving to HF in debug mode - use in-memory cache instead
1343
- if DEBUG_MODE:
1344
- global DEBUG_REVIEW_METADATA_CACHE
1345
- # Merge with existing cache, deduplicating by review_id
1346
- existing = {review['review_id']: review for review in DEBUG_REVIEW_METADATA_CACHE[agent_identifier] if review.get('review_id')}
1347
- new = {review['review_id']: review for review in metadata_list if review.get('review_id')}
1348
- existing.update(new)
1349
- DEBUG_REVIEW_METADATA_CACHE[agent_identifier] = list(existing.values())
1350
- print(f"πŸ› DEBUG MODE: Saved to in-memory cache only ({len(metadata_list)} reviews) - NOT saved to HuggingFace")
1351
- return True
1352
-
1353
  try:
1354
  token = get_hf_token()
1355
  if not token:
@@ -1428,8 +1430,6 @@ def load_review_metadata():
1428
  """
1429
  Load review metadata from the last LEADERBOARD_TIME_FRAME_DAYS.
1430
 
1431
- In debug mode, loads from in-memory cache if available and filters by time frame.
1432
-
1433
  Structure: [agent_identifier]/YYYY.MM.DD.jsonl
1434
 
1435
  Returns:
@@ -1440,28 +1440,6 @@ def load_review_metadata():
1440
  current_time = datetime.now(timezone.utc)
1441
  cutoff_date = current_time - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
1442
 
1443
- # In debug mode, check in-memory cache first
1444
- if DEBUG_MODE and DEBUG_REVIEW_METADATA_CACHE:
1445
- all_metadata = []
1446
- for agent_identifier, metadata_list in DEBUG_REVIEW_METADATA_CACHE.items():
1447
- for review_meta in metadata_list:
1448
- # Filter by time frame
1449
- reviewed_at = review_meta.get('reviewed_at')
1450
- if reviewed_at:
1451
- try:
1452
- dt = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
1453
- if dt < cutoff_date:
1454
- continue # Skip reviews older than time frame
1455
- except Exception:
1456
- pass # Keep reviews with unparseable dates
1457
-
1458
- review_with_agent = review_meta.copy()
1459
- review_with_agent['agent_identifier'] = agent_identifier
1460
- all_metadata.append(review_with_agent)
1461
- if all_metadata:
1462
- print(f"πŸ› DEBUG MODE: Loading review metadata from in-memory cache (last {LEADERBOARD_TIME_FRAME_DAYS} days, {len(all_metadata)} reviews)")
1463
- return all_metadata
1464
-
1465
  try:
1466
  api = HfApi()
1467
  token = get_hf_token()
@@ -1495,6 +1473,8 @@ def load_review_metadata():
1495
  print(f"πŸ“₯ Loading review metadata from last {LEADERBOARD_TIME_FRAME_DAYS} days ({len(time_frame_files)} daily files across all agents)...")
1496
 
1497
  all_metadata = []
 
 
1498
  for filename in time_frame_files:
1499
  try:
1500
  # Extract agent_identifier from path (first part)
@@ -1505,6 +1485,7 @@ def load_review_metadata():
1505
  continue
1506
 
1507
  agent_identifier = parts[0]
 
1508
 
1509
  file_path = hf_hub_download(
1510
  repo_id=REVIEW_METADATA_REPO,
@@ -1536,6 +1517,14 @@ def load_review_metadata():
1536
  print(f" Warning: Could not load {filename}: {str(e)}")
1537
 
1538
  print(f"βœ“ Loaded {len(all_metadata)} total reviews from last {LEADERBOARD_TIME_FRAME_DAYS} days")
 
 
 
 
 
 
 
 
1539
  return all_metadata
1540
 
1541
  except Exception as e:
@@ -1601,13 +1590,12 @@ def get_latest_review_date_for_agent(agent_identifier):
1601
  return None
1602
 
1603
 
1604
- def get_daily_files_last_n_months(agent_identifier, n_months=6):
1605
  """
1606
- Get list of daily file paths for an agent from the last N months.
1607
 
1608
  Args:
1609
  agent_identifier: GitHub identifier of the agent
1610
- n_months: Number of months to look back (default: 6)
1611
 
1612
  Returns:
1613
  List of file paths in format: [agent_identifier]/YYYY.MM.DD.jsonl
@@ -1616,9 +1604,9 @@ def get_daily_files_last_n_months(agent_identifier, n_months=6):
1616
  api = HfApi()
1617
  token = get_hf_token()
1618
 
1619
- # Calculate date range
1620
  today = datetime.now(timezone.utc)
1621
- n_months_ago = today - timedelta(days=30 * n_months)
1622
 
1623
  # List all files in the repository
1624
  files = api.list_repo_files(repo_id=REVIEW_METADATA_REPO, repo_type="dataset")
@@ -1644,8 +1632,8 @@ def get_daily_files_last_n_months(agent_identifier, n_months=6):
1644
  file_year, file_month, file_day = map(int, date_components)
1645
  file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc)
1646
 
1647
- # Include if within last n_months
1648
- if n_months_ago <= file_date <= today:
1649
  recent_files.append(filename)
1650
  except Exception:
1651
  continue
@@ -1704,7 +1692,7 @@ def fetch_review_current_status(review_url, token):
1704
 
1705
  def refresh_review_status_for_agent(agent_identifier, token):
1706
  """
1707
- Refresh status for all open reviews from the last 6 months for an agent.
1708
  Only updates reviews that are still open (state="open" or no state_reason).
1709
 
1710
  This implements the smart update strategy:
@@ -1719,11 +1707,11 @@ def refresh_review_status_for_agent(agent_identifier, token):
1719
  Returns:
1720
  Tuple: (total_checked, updated_count)
1721
  """
1722
- print(f"\nπŸ”„ Refreshing open reviews for {agent_identifier} (last 6 months)...")
1723
 
1724
  try:
1725
- # Get daily files from last 6 months
1726
- recent_files = get_daily_files_last_n_months(agent_identifier, n_months=6)
1727
 
1728
  if not recent_files:
1729
  print(f" No recent files found for {agent_identifier}")
@@ -1760,7 +1748,7 @@ def refresh_review_status_for_agent(agent_identifier, token):
1760
  continue
1761
 
1762
  # Review may have been reverted, check status
1763
- review_url = review.get("html_url")
1764
 
1765
  if not review_url:
1766
  updated_reviews.append(review)
@@ -1848,6 +1836,16 @@ def load_agents_from_hf():
1848
 
1849
  with open(file_path, 'r') as f:
1850
  agent_data = json.load(f)
 
 
 
 
 
 
 
 
 
 
1851
  agents.append(agent_data)
1852
 
1853
  except Exception as e:
@@ -1961,21 +1959,21 @@ def save_agent_to_hf(data):
1961
 
1962
  def update_all_agents_incremental():
1963
  """
1964
- Daily scheduled task for incremental review mining and statistics update.
1965
 
1966
  Strategy:
1967
- 1. Update PR status for all existing metadata (last LEADERBOARD_TIME_FRAME_DAYS - 1)
1968
- 2. Fetch yesterday's new reviews
1969
  3. Save all updated/new metadata back to HuggingFace
1970
  4. Reload statistics from updated metadata
1971
  """
1972
  print(f"\n{'='*80}")
1973
- print(f"πŸ•› Daily Incremental Update started at {datetime.now(timezone.utc).isoformat()}")
1974
  print(f"{'='*80}")
1975
 
1976
  try:
1977
  # Fetch and update reviews
1978
- fetch_and_update_daily_reviews()
1979
 
1980
  # Reload statistics from updated metadata
1981
  print(f"\nπŸ“‹ Reloading statistics from updated review metadata...")
@@ -1984,14 +1982,14 @@ def update_all_agents_incremental():
1984
  print(f"\n{'='*80}")
1985
  print(f"πŸ“Š Update Summary:")
1986
  print(f" βœ“ Updated existing review statuses")
1987
- print(f" βœ“ Fetched yesterday's new reviews")
1988
  print(f" βœ“ Statistics reloaded")
1989
  print(f"{'='*80}")
1990
 
1991
- print(f"\nβœ… Daily Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
1992
 
1993
  except Exception as e:
1994
- print(f"βœ— Daily update failed: {str(e)}")
1995
  import traceback
1996
  traceback.print_exc()
1997
 
@@ -2004,24 +2002,39 @@ def construct_leaderboard_from_metadata():
2004
  Returns dictionary of agent stats.
2005
  """
2006
  print("πŸ“Š Constructing leaderboard from review metadata...")
 
2007
  # Load agents
2008
  agents = load_agents_from_hf()
2009
  if not agents:
2010
- print("No agents found")
2011
  return {}
2012
 
 
 
2013
  # Load all review metadata
2014
  all_metadata = load_review_metadata()
 
 
 
 
 
 
 
 
2015
 
2016
  cache_dict = {}
2017
 
2018
  for agent in agents:
2019
  identifier = agent.get('github_identifier')
2020
- agent_name = agent.get('agent_name', 'Unknown')
2021
 
2022
  # Filter metadata for this agent
2023
  agent_metadata = [review for review in all_metadata if review.get("agent_identifier") == identifier]
2024
 
 
 
 
 
2025
  # Calculate stats
2026
  stats = calculate_review_stats_from_metadata(agent_metadata)
2027
 
@@ -2032,6 +2045,8 @@ def construct_leaderboard_from_metadata():
2032
  **stats
2033
  }
2034
 
 
 
2035
  return cache_dict
2036
 
2037
 
@@ -2039,15 +2054,18 @@ def construct_leaderboard_from_metadata():
2039
  # UI FUNCTIONS
2040
  # =============================================================================
2041
 
2042
- def create_monthly_metrics_plot():
2043
  """
2044
  Create a Plotly figure with dual y-axes showing:
2045
  - Left y-axis: Acceptance Rate (%) as line curves
2046
  - Right y-axis: Total Reviews created as bar charts
2047
 
2048
  Each agent gets a unique color for both their line and bars.
 
 
 
2049
  """
2050
- metrics = calculate_monthly_metrics_by_agent()
2051
 
2052
  if not metrics['agents'] or not metrics['months']:
2053
  # Return an empty figure with a message
@@ -2104,7 +2122,7 @@ def create_monthly_metrics_plot():
2104
  line=dict(color=color, width=2),
2105
  marker=dict(size=8),
2106
  legendgroup=agent_name,
2107
- showlegend=False, # Hide legend for 70+ agents
2108
  hovertemplate='<b>Agent: %{fullData.name}</b><br>' +
2109
  'Month: %{x}<br>' +
2110
  'Acceptance Rate: %{y:.2f}%<br>' +
@@ -2130,7 +2148,7 @@ def create_monthly_metrics_plot():
2130
  name=agent_name,
2131
  marker=dict(color=color, opacity=0.6),
2132
  legendgroup=agent_name,
2133
- showlegend=False, # Hide legend for 70+ agents
2134
  hovertemplate='<b>Agent: %{fullData.name}</b><br>' +
2135
  'Month: %{x}<br>' +
2136
  'Total Reviews: %{y}<br>' +
@@ -2146,13 +2164,14 @@ def create_monthly_metrics_plot():
2146
  fig.update_yaxes(title_text="<b>Total Reviews</b>", secondary_y=True)
2147
 
2148
  # Update layout
 
2149
  fig.update_layout(
2150
  title=None,
2151
  hovermode='closest', # Show individual agent info on hover
2152
  barmode='group',
2153
  height=600,
2154
- showlegend=False, # Hide legend for 70+ agents
2155
- margin=dict(l=50, r=50, t=50, b=50) # Reduced top margin since no legend
2156
  )
2157
 
2158
  return fig
@@ -2163,28 +2182,44 @@ def get_leaderboard_dataframe():
2163
  Construct leaderboard from review metadata and convert to pandas DataFrame for display.
2164
  Returns formatted DataFrame sorted by retention rate.
2165
  """
 
 
 
 
2166
  # Construct leaderboard from metadata
2167
  cache_dict = construct_leaderboard_from_metadata()
2168
 
 
 
2169
  if not cache_dict:
 
2170
  # Return empty DataFrame with correct columns if no data
2171
  column_names = [col[0] for col in LEADERBOARD_COLUMNS]
2172
  return pd.DataFrame(columns=column_names)
2173
 
2174
  rows = []
2175
- for data in cache_dict.values():
 
 
 
 
2176
  # Filter out agents with zero total reviews
2177
- if data.get('total_reviews', 0) == 0:
 
2178
  continue
 
2179
  # Only include display-relevant fields
2180
  rows.append([
2181
  data.get('agent_name', 'Unknown'),
2182
  data.get('website', 'N/A'),
2183
- data.get('total_reviews', 0),
2184
  data.get('merged_prs', 0),
2185
  data.get('acceptance_rate', 0.0),
2186
  ])
2187
 
 
 
 
2188
  # Create DataFrame
2189
  column_names = [col[0] for col in LEADERBOARD_COLUMNS]
2190
  df = pd.DataFrame(rows, columns=column_names)
@@ -2199,6 +2234,9 @@ def get_leaderboard_dataframe():
2199
  if "Acceptance Rate (%)" in df.columns and not df.empty:
2200
  df = df.sort_values(by="Acceptance Rate (%)", ascending=False).reset_index(drop=True)
2201
 
 
 
 
2202
  return df
2203
 
2204
 
@@ -2209,13 +2247,13 @@ def submit_agent(identifier, agent_name, organization, description, website):
2209
  """
2210
  # Validate required fields
2211
  if not identifier or not identifier.strip():
2212
- return "❌ GitHub identifier is required", get_leaderboard_dataframe(), create_monthly_metrics_plot()
2213
  if not agent_name or not agent_name.strip():
2214
- return "❌ Agent name is required", get_leaderboard_dataframe(), create_monthly_metrics_plot()
2215
  if not organization or not organization.strip():
2216
- return "❌ Organization name is required", get_leaderboard_dataframe(), create_monthly_metrics_plot()
2217
  if not website or not website.strip():
2218
- return "❌ Website URL is required", get_leaderboard_dataframe(), create_monthly_metrics_plot()
2219
 
2220
  # Clean inputs
2221
  identifier = identifier.strip()
@@ -2227,14 +2265,14 @@ def submit_agent(identifier, agent_name, organization, description, website):
2227
  # Validate GitHub identifier
2228
  is_valid, message = validate_github_username(identifier)
2229
  if not is_valid:
2230
- return f"❌ {message}", get_leaderboard_dataframe(), create_monthly_metrics_plot()
2231
 
2232
  # Check for duplicates by loading agents from HuggingFace
2233
  agents = load_agents_from_hf()
2234
  if agents:
2235
  existing_names = {agent['github_identifier'] for agent in agents}
2236
  if identifier in existing_names:
2237
- return f"⚠️ Agent with identifier '{identifier}' already exists", get_leaderboard_dataframe(), create_monthly_metrics_plot()
2238
 
2239
  # Create submission
2240
  submission = {
@@ -2247,25 +2285,25 @@ def submit_agent(identifier, agent_name, organization, description, website):
2247
 
2248
  # Save to HuggingFace
2249
  if not save_agent_to_hf(submission):
2250
- return "❌ Failed to save submission", get_leaderboard_dataframe(), create_monthly_metrics_plot()
2251
 
2252
  # Return success message - data will be populated by daily incremental updates
2253
- return f"βœ… Successfully submitted {agent_name}! Review data will be populated by the next daily incremental update.", get_leaderboard_dataframe(), create_monthly_metrics_plot()
2254
 
2255
 
2256
  # =============================================================================
2257
  # BACKGROUND TASKS
2258
  # =============================================================================
2259
 
2260
- def fetch_and_update_daily_reviews():
2261
  """
2262
  Fetch and update reviews with comprehensive status checking using BigQuery.
2263
 
2264
  Strategy:
2265
  1. For each agent:
2266
- - Examine ALL open reviews from last LEADERBOARD_TIME_FRAME_DAYS - 1 for their closed_at status
2267
- - Update PR status for all existing metadata using BigQuery (last LEADERBOARD_TIME_FRAME_DAYS - 1)
2268
- - Fetch new reviews from yesterday 12am to today 12am using BigQuery
2269
  - Save all updated/new metadata back to HuggingFace
2270
  """
2271
  # Initialize BigQuery client
@@ -2284,18 +2322,18 @@ def fetch_and_update_daily_reviews():
2284
  # Calculate date range
2285
  today_utc = datetime.now(timezone.utc)
2286
  today_midnight = datetime.combine(today_utc.date(), datetime.min.time(), tzinfo=timezone.utc)
2287
- yesterday_midnight = today_midnight - timedelta(days=1)
2288
- cutoff_date = today_midnight - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS - 1)
2289
 
2290
  print(f"πŸ“… Time Range Configuration:")
2291
- print(f" Yesterday 12am UTC: {yesterday_midnight.isoformat()}")
2292
  print(f" Today 12am UTC: {today_midnight.isoformat()}")
2293
  print(f" Cutoff for existing reviews: {cutoff_date.isoformat()}")
2294
  print(f" Examining reviews from: {cutoff_date.date()} to {today_midnight.date()}")
2295
 
2296
  for agent in agents:
2297
  identifier = agent.get('github_identifier')
2298
- agent_name = agent.get('agent_name', 'Unknown')
2299
 
2300
  if not identifier:
2301
  print(f"Warning: Skipping agent without identifier: {agent}")
@@ -2330,46 +2368,46 @@ def fetch_and_update_daily_reviews():
2330
  if recent_metadata:
2331
  print(f"πŸ” Updating PR status for {len(recent_metadata)} existing reviews using BigQuery...")
2332
  # Extract PR URLs from existing metadata
2333
- pr_urls = [r.get('pr_url') for r in recent_metadata if r.get('pr_url')]
2334
- if pr_urls:
2335
  # Fetch status from BigQuery
2336
  extended_end_date = today_utc
2337
- status_map = fetch_pr_status_from_bigquery(client, pr_urls, cutoff_date, extended_end_date)
2338
 
2339
  # Update metadata with new status
2340
  for review in recent_metadata:
2341
- pr_url = review.get('pr_url')
2342
- if pr_url and pr_url in status_map:
2343
- status_info = status_map[pr_url]
2344
  review['pr_status'] = status_info['status']
2345
- review['pr_merged'] = status_info['merged']
2346
- review['pr_closed_at'] = status_info['closed_at']
2347
 
2348
  print(f" βœ“ Updated PR status for existing reviews")
2349
 
2350
- # Step 3: Fetch NEW reviews from yesterday 12am to today 12am using BigQuery
2351
- print(f"πŸ” Fetching new reviews from {yesterday_midnight.isoformat()} to {today_midnight.isoformat()} using BigQuery...")
2352
 
2353
- review_rows = fetch_reviews_from_bigquery(client, identifier, yesterday_midnight, today_midnight)
2354
 
2355
  # Extract unique PR URLs and fetch status
2356
- pr_urls = list(set([row.pr_url for row in review_rows if row.pr_url]))
2357
- print(f" Found {len(review_rows)} review events across {len(pr_urls)} unique PRs")
2358
 
2359
  # Fetch PR status for new reviews
2360
  extended_end_date = today_utc
2361
- status_map = fetch_pr_status_from_bigquery(client, pr_urls, yesterday_midnight, extended_end_date)
2362
 
2363
  # Extract metadata for new reviews
2364
- yesterday_metadata = []
2365
  seen_prs = set()
2366
  for row in review_rows:
2367
- pr_url = row.pr_url
2368
- if pr_url in seen_prs:
2369
  continue
2370
- seen_prs.add(pr_url)
2371
 
2372
- status_info = status_map.get(pr_url, {
2373
  'status': 'open',
2374
  'merged': False,
2375
  'closed_at': None
@@ -2377,17 +2415,17 @@ def fetch_and_update_daily_reviews():
2377
 
2378
  metadata = extract_review_metadata_from_bigquery(row, status_info)
2379
  metadata['agent_identifier'] = identifier
2380
- yesterday_metadata.append(metadata)
2381
 
2382
- print(f" βœ“ Found {len(yesterday_metadata)} unique PRs in 24-hour window")
2383
 
2384
  # Step 4: Combine and save all metadata
2385
- all_updated_metadata = recent_metadata + yesterday_metadata
2386
 
2387
  if all_updated_metadata:
2388
  print(f"πŸ’Ύ Saving {len(all_updated_metadata)} total reviews to HuggingFace...")
2389
  save_review_metadata_to_hf(all_updated_metadata, identifier)
2390
- print(f"βœ“ Updated {identifier}: {len(recent_metadata)} existing (status checked) + {len(yesterday_metadata)} new = {len(all_updated_metadata)} total")
2391
  else:
2392
  print(f" No reviews to save for {identifier}")
2393
 
@@ -2402,66 +2440,58 @@ def fetch_and_update_daily_reviews():
2402
  # GRADIO APPLICATION
2403
  # =============================================================================
2404
 
2405
- # Initialize data before creating UI
2406
- if DEBUG_MODE:
2407
- print("\n" + "="*80)
2408
- print("πŸ› DEBUG MODE ENABLED πŸ›")
2409
- print("="*80)
2410
- print("Review retrieval is limited to 10 reviews per query pattern per agent")
2411
-
2412
- # Show how debug mode was enabled
2413
- if args.debug:
2414
- print("Enabled via: command-line flag '--debug'")
2415
- print("To disable: run without '--debug' flag")
2416
- else:
2417
- print("Enabled via: DEBUG_MODE environment variable")
2418
- print("To disable: run with '--no-debug' flag or unset DEBUG_MODE")
2419
-
2420
- print("="*80 + "\n")
2421
- else:
2422
- print("\nπŸš€ Starting in PRODUCTION MODE - full review retrieval enabled")
2423
- if args.no_debug:
2424
- print(" (Explicitly set via '--no-debug' flag)")
2425
- print()
2426
-
2427
- # Start APScheduler for daily updates at 12:00 AM UTC
2428
  scheduler = BackgroundScheduler(timezone="UTC")
2429
  scheduler.add_job(
2430
  update_all_agents_incremental,
2431
- trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
2432
- id='daily_review_mining',
2433
- name='Daily Regular Review Mining',
2434
  replace_existing=True
2435
  )
2436
  scheduler.start()
2437
- print("βœ“ Scheduler started: Daily Incremental Update at 12:00 AM UTC (updates existing metadata + mines yesterday's reviews)")
2438
 
2439
  # Create Gradio interface
2440
  with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app:
2441
 
2442
  gr.Markdown("# πŸ† SWE Agent Review Leaderboard")
2443
- gr.Markdown("Track and compare GitHub PR review acceptance statistics for SWE agents (last 6 months)")
2444
 
2445
  with gr.Tabs():
2446
-
2447
  # Leaderboard Tab
2448
  with gr.Tab("πŸ“Š Leaderboard"):
2449
- gr.Markdown("*All statistics are based on reviews from the last 6 months*")
2450
  leaderboard_table = Leaderboard(
2451
- value=get_leaderboard_dataframe(),
2452
  datatype=LEADERBOARD_COLUMNS,
2453
  search_columns=["Agent Name", "Website"],
2454
  filter_columns=["Acceptance Rate (%)"]
2455
  )
2456
 
2457
- gr.Markdown("### Monthly Metrics")
2458
- gr.Markdown("Track acceptance rates and review activity over time")
 
 
 
 
2459
 
2460
- monthly_plot = gr.Plot(
2461
- value=create_monthly_metrics_plot(),
2462
- label="Monthly Review Metrics"
 
 
 
 
 
 
 
 
 
2463
  )
2464
 
 
2465
  # Submit Agent Tab
2466
  with gr.Tab("βž• Submit Agent"):
2467
 
@@ -2507,7 +2537,7 @@ with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as
2507
  submit_button.click(
2508
  fn=submit_agent,
2509
  inputs=[github_input, name_input, organization_input, description_input, website_input],
2510
- outputs=[submission_status, leaderboard_table, monthly_plot]
2511
  )
2512
 
2513
 
 
25
 
26
  # Parse command-line arguments
27
  parser = argparse.ArgumentParser(description='SWE Agent Review Leaderboard')
 
 
 
 
28
  args = parser.parse_args()
29
 
30
  # =============================================================================
31
  # CONFIGURATION
32
  # =============================================================================
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
35
  REVIEW_METADATA_REPO = "SWE-Arena/review_metadata" # HuggingFace dataset for review metadata
36
+ LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for leaderboard
37
 
38
  LEADERBOARD_COLUMNS = [
39
  ("Agent Name", "string"),
 
174
  SELECT
175
  repo.name as repo_name,
176
  actor.login as actor_login,
177
+ JSON_EXTRACT_SCALAR(payload, '$.pull_request.url') as url,
178
  CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') AS INT64) as pr_number,
179
  JSON_EXTRACT_SCALAR(payload, '$.review.submitted_at') as reviewed_at,
180
  created_at
 
205
  return []
206
 
207
 
208
+ def fetch_pr_status_from_bigquery(client, urls, start_date, end_date):
209
  """
210
  Fetch PR status (merged/closed) from GitHub Archive PullRequestEvent.
211
 
 
214
 
215
  Args:
216
  client: BigQuery client instance
217
+ urls: List of PR URLs to check status for
218
  start_date: Start datetime (should cover review period and after)
219
  end_date: End datetime (should be recent/current)
220
 
221
  Returns:
222
  Dictionary mapping PR URL to status dict:
223
  {
224
+ 'url': {
225
  'status': 'merged'|'closed'|'open',
226
  'merged': bool,
227
  'closed_at': timestamp or None
228
  }
229
  }
230
  """
231
+ if not urls:
232
  return {}
233
 
234
+ print(f"\nπŸ” Querying BigQuery for PR status ({len(urls)} PRs)...")
235
 
236
  # Extract repo and PR number from URLs
237
  # URL format: https://github.com/owner/repo/pull/123
238
  pr_info = []
239
+ for url in urls:
240
  try:
241
  parts = url.replace('https://github.com/', '').split('/')
242
  if len(parts) >= 4:
 
288
  SELECT
289
  repo.name as repo_name,
290
  CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') AS INT64) as pr_number,
291
+ JSON_EXTRACT_SCALAR(payload, '$.pull_request.url') as url,
292
  JSON_EXTRACT_SCALAR(payload, '$.action') as action,
293
  CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.merged') AS BOOL) as merged,
294
  JSON_EXTRACT_SCALAR(payload, '$.pull_request.closed_at') as closed_at,
 
314
  # Build status map by PR URL
315
  status_map = {}
316
  for row in results:
317
+ url = row.url
318
 
319
  merged = row.merged if row.merged is not None else False
320
  closed_at = row.closed_at or row.merged_at
 
325
 
326
  status = 'merged' if merged else 'closed'
327
 
328
+ status_map[url] = {
329
  'status': status,
330
  'merged': merged,
331
  'closed_at': closed_at
332
  }
333
 
334
  # Mark remaining PRs as open
335
+ for url in urls:
336
  if url not in status_map:
337
  status_map[url] = {
338
  'status': 'open',
 
351
  except Exception as e:
352
  print(f" βœ— BigQuery error: {str(e)}")
353
  # Return all as open on error
354
+ return {url: {'status': 'open', 'merged': False, 'closed_at': None} for url in urls}
355
 
356
 
357
  def extract_review_metadata_from_bigquery(review_row, status_info):
 
365
  Returns:
366
  Dictionary with review metadata
367
  """
368
+ url = review_row.url
369
  pr_number = review_row.pr_number
370
  reviewed_at = review_row.reviewed_at or review_row.created_at
371
 
 
374
  reviewed_at = reviewed_at.isoformat()
375
 
376
  return {
377
+ 'url': url,
378
  'reviewed_at': reviewed_at,
379
  'pr_status': status_info['status'],
380
+ 'merged_at': status_info['merged'],
381
+ 'closed_at': status_info['closed_at'],
382
+ 'url': url,
383
  'review_id': f"pr_{pr_number}"
384
  }
385
 
 
686
  return False, f"Validation error: {str(e)}"
687
 
688
 
689
+ def fetch_reviews_with_time_partition(base_query, start_date, end_date, token_pool, prs_by_url, depth=0):
690
  """
691
  Fetch reviews within a specific time range using time-based partitioning.
692
  Recursively splits the time range if hitting the 1000-result limit.
693
  Supports splitting by day, hour, minute, and second as needed.
694
 
695
  Args:
 
696
  depth: Current recursion depth (for tracking)
697
 
698
  Returns the number of reviews found in this time partition.
 
730
  total_in_partition = 0
731
 
732
  while True:
 
 
 
 
733
  url = 'https://api.github.com/search/issues' # Use issues endpoint for PR search
734
  params = {
735
  'q': query,
 
760
 
761
  # Add PR reviews to global dict (keyed by PR URL)
762
  for pr in items:
763
+ url = pr.get('url')
764
  pr_number = pr.get('number')
765
  # Use PR URL as unique key (more reliable than number alone)
766
+ if url and url not in prs_by_url:
767
+ prs_by_url[url] = pr
768
  total_in_partition += 1
769
 
770
  # Check if we hit the 1000-result limit
 
791
  split_start = split_start + timedelta(seconds=1)
792
 
793
  count = fetch_reviews_with_time_partition(
794
+ base_query, split_start, split_end, token_pool, prs_by_url, depth + 1
795
  )
796
  total_from_splits += count
797
 
 
812
  split_start = split_start + timedelta(minutes=1)
813
 
814
  count = fetch_reviews_with_time_partition(
815
+ base_query, split_start, split_end, token_pool, prs_by_url, depth + 1
816
  )
817
  total_from_splits += count
818
 
 
833
  split_start = split_start + timedelta(hours=1)
834
 
835
  count = fetch_reviews_with_time_partition(
836
+ base_query, split_start, split_end, token_pool, prs_by_url, depth + 1
837
  )
838
  total_from_splits += count
839
 
 
864
  split_start = split_start + timedelta(days=1)
865
 
866
  count = fetch_reviews_with_time_partition(
867
+ base_query, split_start, split_end, token_pool, prs_by_url, depth + 1
868
  )
869
  total_from_splits += count
870
 
 
875
 
876
  # Recursively fetch both halves
877
  count1 = fetch_reviews_with_time_partition(
878
+ base_query, start_date, mid_date, token_pool, prs_by_url, depth + 1
879
  )
880
  count2 = fetch_reviews_with_time_partition(
881
+ base_query, mid_date + timedelta(days=1), end_date, token_pool, prs_by_url, depth + 1
882
  )
883
 
884
  return count1 + count2
 
900
  return total_in_partition
901
 
902
 
903
+ def fetch_reviews_parallel(query_patterns, start_date, end_date, token_pool, prs_by_url):
904
  """
905
  Fetch reviews for multiple query patterns in parallel using available parallel tokens.
906
 
 
914
  end_date: End datetime for time range
915
  token_pool: TokenPool instance for token management
916
  prs_by_url: Dictionary to collect PRs by URL (shared across patterns)
 
917
 
918
  Returns:
919
  Total number of PRs found across all patterns
 
931
  for pattern in query_patterns:
932
  pattern_prs = {}
933
  count = fetch_reviews_with_time_partition(
934
+ pattern, start_date, end_date, token_pool, pattern_prs, depth=0
935
  )
936
  # Merge pattern results into global dict
937
  with threading.Lock():
 
952
  pattern_prs = {}
953
  try:
954
  count = fetch_reviews_with_time_partition(
955
+ pattern, start_date, end_date, token_pool, pattern_prs, depth=0
956
  )
957
  return pattern, pattern_prs, count
958
  except Exception as e:
 
994
  def extract_review_metadata(pr):
995
  """
996
  Extract minimal PR review metadata for efficient storage.
997
+ Only keeps essential fields: url, reviewed_at, pr_status, merged_at, closed_at.
998
  Note: agent_name is not stored as it's inferred from the folder structure.
999
 
1000
  PR status:
1001
  - pr_status: 'open', 'merged', or 'closed'
1002
+ - merged_at: True if PR was merged, False otherwise
1003
+ - closed_at: Date when PR was closed/merged (if applicable)
1004
 
1005
  Merged PR = PR that was merged after agent review
1006
  Rejected PR = PR that was closed without merging after agent review
1007
  """
1008
  # Extract PR metadata from search results
1009
  # The GitHub search API returns PR data from /search/issues endpoint
1010
+ url = pr.get('url')
1011
  pr_number = pr.get('number')
1012
  created_at = pr.get('created_at')
1013
  closed_at = pr.get('closed_at')
 
1018
 
1019
  # For initial extraction, we don't know if merged yet
1020
  # This will be updated by update_pr_status function
1021
+ merged_at = pull_request_data.get('merged_at') is not None if pull_request_data else False
1022
 
1023
  # Determine initial status
1024
+ if merged_at:
1025
  status = 'merged'
1026
  elif state == 'closed':
1027
  status = 'closed'
 
1029
  status = 'open'
1030
 
1031
  return {
1032
+ 'url': url,
1033
  'reviewed_at': created_at, # When the PR was created (agent reviewed it)
1034
  'pr_status': status,
1035
+ 'merged_at': merged_at,
1036
+ 'closed_at': closed_at,
 
1037
  'review_id': f"pr_{pr_number}" # Use PR number for deduplication
1038
  }
1039
 
 
1045
  For each PR associated with a review, fetch current status from GitHub API.
1046
  Updates metadata_list in-place with PR status information.
1047
 
 
 
1048
  Args:
1049
  metadata_list: List of review metadata dictionaries
1050
  token_pool: TokenPool instance for rotating tokens
 
1055
  if not metadata_list:
1056
  return metadata_list
1057
 
 
 
 
 
 
1058
  # Track unique PRs to avoid duplicate API calls
1059
+ url_to_status = {}
1060
  updated_count = 0
1061
 
1062
  for metadata in metadata_list:
1063
+ url = metadata.get('url')
1064
+ if not url:
1065
  continue
1066
 
1067
  # Skip if already fetched for this PR
1068
+ if url in url_to_status:
1069
+ status_info = url_to_status[url]
1070
  metadata['pr_status'] = status_info['status']
1071
+ metadata['merged_at'] = status_info['merged']
1072
+ metadata['closed_at'] = status_info['closed_at']
1073
  continue
1074
 
1075
  try:
1076
  # Convert HTML URL to API URL
1077
  # https://github.com/owner/repo/pull/123 -> https://api.github.com/repos/owner/repo/pulls/123
1078
+ parts = url.replace('https://github.com/', '').split('/')
1079
  if len(parts) >= 4:
1080
  owner, repo, pull_word, pr_number = parts[0], parts[1], parts[2], parts[3]
1081
  api_url = f'https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}'
 
1106
  }
1107
 
1108
  # Cache and update
1109
+ url_to_status[url] = status_info
1110
  metadata['pr_status'] = status
1111
+ metadata['merged_at'] = merged
1112
+ metadata['closed_at'] = closed_at or merged_at
1113
  updated_count += 1
1114
 
1115
  # Small delay to avoid rate limiting
1116
  time.sleep(0.1)
1117
 
1118
  except Exception as e:
1119
+ print(f" Warning: Could not check PR status for {url}: {e}")
1120
  continue
1121
 
1122
  if updated_count > 0:
 
1127
 
1128
 
1129
 
1130
+ def get_pr_status_from_metadata(review_meta):
1131
+ """
1132
+ Derive PR status from merged_at and closed_at fields.
1133
+
1134
+ Args:
1135
+ review_meta: Dictionary containing merged_at and closed_at fields
1136
+
1137
+ Returns:
1138
+ str: 'merged', 'closed', or 'open'
1139
+ """
1140
+ merged_at = review_meta.get('merged_at')
1141
+ closed_at = review_meta.get('closed_at')
1142
+
1143
+ # If merged_at is set (not None and not False), PR is merged
1144
+ if merged_at:
1145
+ return 'merged'
1146
+ # If closed_at is set but not merged, PR is closed without merging
1147
+ elif closed_at:
1148
+ return 'closed'
1149
+ # Otherwise, PR is still open
1150
+ else:
1151
+ return 'open'
1152
+
1153
+
1154
  def calculate_review_stats_from_metadata(metadata_list):
1155
  """
1156
  Calculate statistics from a list of review metadata (lightweight objects).
1157
+ Works with minimal metadata: url, reviewed_at, merged_at, closed_at.
1158
 
1159
  Returns a dictionary with comprehensive review metrics.
1160
 
1161
  Acceptance Rate is calculated as:
1162
  merged PRs / (merged PRs + rejected PRs) * 100
1163
 
1164
+ Merged PRs = PRs that were merged (merged_at is not None)
1165
+ Rejected PRs = PRs that were closed without merging (closed_at is not None but merged_at is None)
1166
+ Pending PRs = PRs still open (both merged_at and closed_at are None) - excluded from acceptance rate
1167
  """
1168
  total_reviews = len(metadata_list)
1169
 
1170
+ # Count merged PRs (merged_at is set)
1171
  merged_prs = sum(1 for review_meta in metadata_list
1172
+ if get_pr_status_from_metadata(review_meta) == 'merged')
1173
 
1174
  # Count rejected PRs (closed without merging)
1175
  rejected_prs = sum(1 for review_meta in metadata_list
1176
+ if get_pr_status_from_metadata(review_meta) == 'closed')
1177
 
1178
  # Count pending PRs (still open)
1179
  pending_prs = sum(1 for review_meta in metadata_list
1180
+ if get_pr_status_from_metadata(review_meta) == 'open')
1181
 
1182
  # Calculate acceptance rate (exclude pending PRs)
1183
  completed_prs = merged_prs + rejected_prs
 
1191
  }
1192
 
1193
 
1194
+ def calculate_monthly_metrics_by_agent(top_n=None):
1195
  """
1196
+ Calculate monthly metrics for all agents (or top N agents) for visualization.
1197
  Loads data directly from SWE-Arena/review_metadata dataset.
1198
 
1199
+ Args:
1200
+ top_n: If specified, only return metrics for the top N agents by total reviews.
1201
+ Agents are ranked by their total review count across all months.
1202
+
1203
  Returns:
1204
  dict: {
1205
  'agents': list of agent names,
 
1217
  agents = load_agents_from_hf()
1218
 
1219
  # Create mapping from agent_identifier to agent_name
1220
+ identifier_to_name = {agent.get('github_identifier'): agent.get('name') for agent in agents if agent.get('github_identifier')}
1221
 
1222
  # Load all review metadata from review_metadata dataset
1223
  all_metadata = load_review_metadata()
 
1287
  'merged_prs': merged_prs_list,
1288
  }
1289
 
1290
+ # Filter to top N agents if specified
1291
+ agents_list = sorted(list(agent_month_data.keys()))
1292
+ if top_n is not None and top_n > 0:
1293
+ # Calculate total reviews for each agent across all months
1294
+ agent_totals = []
1295
+ for agent_name in agents_list:
1296
+ total_reviews = sum(result_data[agent_name]['total_reviews'])
1297
+ agent_totals.append((agent_name, total_reviews))
1298
+
1299
+ # Sort by total reviews (descending) and take top N
1300
+ agent_totals.sort(key=lambda x: x[1], reverse=True)
1301
+ top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
1302
+
1303
+ # Filter result_data to only include top agents
1304
+ result_data = {agent: result_data[agent] for agent in top_agents if agent in result_data}
1305
+ agents_list = top_agents
1306
+
1307
  return {
1308
+ 'agents': agents_list,
1309
  'months': months,
1310
  'data': result_data
1311
  }
 
1341
  """
1342
  Save review metadata to HuggingFace dataset, organized by [agent_identifier]/YYYY.MM.DD.jsonl.
1343
  Each file is stored in the agent's folder and named YYYY.MM.DD.jsonl for that day's reviews.
 
1344
 
1345
  This function APPENDS new metadata and DEDUPLICATES by review_id.
1346
  Uses batch upload to avoid rate limit (uploads entire folder in single commit).
 
1352
  import tempfile
1353
  import shutil
1354
 
 
 
 
 
 
 
 
 
 
 
 
1355
  try:
1356
  token = get_hf_token()
1357
  if not token:
 
1430
  """
1431
  Load review metadata from the last LEADERBOARD_TIME_FRAME_DAYS.
1432
 
 
 
1433
  Structure: [agent_identifier]/YYYY.MM.DD.jsonl
1434
 
1435
  Returns:
 
1440
  current_time = datetime.now(timezone.utc)
1441
  cutoff_date = current_time - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
1442
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1443
  try:
1444
  api = HfApi()
1445
  token = get_hf_token()
 
1473
  print(f"πŸ“₯ Loading review metadata from last {LEADERBOARD_TIME_FRAME_DAYS} days ({len(time_frame_files)} daily files across all agents)...")
1474
 
1475
  all_metadata = []
1476
+ agent_identifiers_found = set()
1477
+
1478
  for filename in time_frame_files:
1479
  try:
1480
  # Extract agent_identifier from path (first part)
 
1485
  continue
1486
 
1487
  agent_identifier = parts[0]
1488
+ agent_identifiers_found.add(agent_identifier)
1489
 
1490
  file_path = hf_hub_download(
1491
  repo_id=REVIEW_METADATA_REPO,
 
1517
  print(f" Warning: Could not load {filename}: {str(e)}")
1518
 
1519
  print(f"βœ“ Loaded {len(all_metadata)} total reviews from last {LEADERBOARD_TIME_FRAME_DAYS} days")
1520
+
1521
+ # DEBUG: Show unique agent identifiers found in review folders
1522
+ if agent_identifiers_found:
1523
+ print(f"πŸ“‹ Agent identifiers found in review metadata folders:")
1524
+ for identifier in sorted(agent_identifiers_found):
1525
+ count = sum(1 for r in all_metadata if r.get('agent_identifier') == identifier)
1526
+ print(f" - '{identifier}': {count} reviews")
1527
+
1528
  return all_metadata
1529
 
1530
  except Exception as e:
 
1590
  return None
1591
 
1592
 
1593
+ def get_daily_files_last_time_frame(agent_identifier):
1594
  """
1595
+ Get list of daily file paths for an agent from the configured time frame.
1596
 
1597
  Args:
1598
  agent_identifier: GitHub identifier of the agent
 
1599
 
1600
  Returns:
1601
  List of file paths in format: [agent_identifier]/YYYY.MM.DD.jsonl
 
1604
  api = HfApi()
1605
  token = get_hf_token()
1606
 
1607
+ # Calculate date range using configured time frame
1608
  today = datetime.now(timezone.utc)
1609
+ cutoff_date = today - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
1610
 
1611
  # List all files in the repository
1612
  files = api.list_repo_files(repo_id=REVIEW_METADATA_REPO, repo_type="dataset")
 
1632
  file_year, file_month, file_day = map(int, date_components)
1633
  file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc)
1634
 
1635
+ # Include if within configured time frame
1636
+ if cutoff_date <= file_date <= today:
1637
  recent_files.append(filename)
1638
  except Exception:
1639
  continue
 
1692
 
1693
  def refresh_review_status_for_agent(agent_identifier, token):
1694
  """
1695
+ Refresh status for all open reviews from the last month for an agent.
1696
  Only updates reviews that are still open (state="open" or no state_reason).
1697
 
1698
  This implements the smart update strategy:
 
1707
  Returns:
1708
  Tuple: (total_checked, updated_count)
1709
  """
1710
+ print(f"\nπŸ”„ Refreshing open reviews for {agent_identifier} (last month)...")
1711
 
1712
  try:
1713
+ # Get daily files from configured time frame
1714
+ recent_files = get_daily_files_last_time_frame(agent_identifier)
1715
 
1716
  if not recent_files:
1717
  print(f" No recent files found for {agent_identifier}")
 
1748
  continue
1749
 
1750
  # Review may have been reverted, check status
1751
+ review_url = review.get("url")
1752
 
1753
  if not review_url:
1754
  updated_reviews.append(review)
 
1836
 
1837
  with open(file_path, 'r') as f:
1838
  agent_data = json.load(f)
1839
+
1840
+ # Extract github_identifier from filename (e.g., "claude[bot].json" -> "claude[bot]")
1841
+ filename_identifier = json_file.replace('.json', '')
1842
+
1843
+ # Add or override github_identifier to match filename
1844
+ agent_data['github_identifier'] = filename_identifier
1845
+
1846
+ # DEBUG: Log the identifier being used
1847
+ print(f" βœ“ Loaded agent: '{filename_identifier}' -> {agent_data.get('name', 'Unknown')}")
1848
+
1849
  agents.append(agent_data)
1850
 
1851
  except Exception as e:
 
1959
 
1960
  def update_all_agents_incremental():
1961
  """
1962
+ Weekly scheduled task for incremental review mining and statistics update.
1963
 
1964
  Strategy:
1965
+ 1. Update PR status for all existing metadata (last LEADERBOARD_TIME_FRAME_DAYS - 7)
1966
+ 2. Fetch last week's new reviews
1967
  3. Save all updated/new metadata back to HuggingFace
1968
  4. Reload statistics from updated metadata
1969
  """
1970
  print(f"\n{'='*80}")
1971
+ print(f"πŸ•› Weekly Incremental Update started at {datetime.now(timezone.utc).isoformat()}")
1972
  print(f"{'='*80}")
1973
 
1974
  try:
1975
  # Fetch and update reviews
1976
+ fetch_and_update_weekly_reviews()
1977
 
1978
  # Reload statistics from updated metadata
1979
  print(f"\nπŸ“‹ Reloading statistics from updated review metadata...")
 
1982
  print(f"\n{'='*80}")
1983
  print(f"πŸ“Š Update Summary:")
1984
  print(f" βœ“ Updated existing review statuses")
1985
+ print(f" βœ“ Fetched last week's new reviews")
1986
  print(f" βœ“ Statistics reloaded")
1987
  print(f"{'='*80}")
1988
 
1989
+ print(f"\nβœ… Weekly Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
1990
 
1991
  except Exception as e:
1992
+ print(f"βœ— Weekly update failed: {str(e)}")
1993
  import traceback
1994
  traceback.print_exc()
1995
 
 
2002
  Returns dictionary of agent stats.
2003
  """
2004
  print("πŸ“Š Constructing leaderboard from review metadata...")
2005
+
2006
  # Load agents
2007
  agents = load_agents_from_hf()
2008
  if not agents:
2009
+ print("⚠️ No agents found")
2010
  return {}
2011
 
2012
+ print(f"βœ“ Loaded {len(agents)} agents")
2013
+
2014
  # Load all review metadata
2015
  all_metadata = load_review_metadata()
2016
+ print(f"βœ“ Loaded {len(all_metadata)} review metadata entries")
2017
+
2018
+ # Debug: Check what agent_identifiers exist in review metadata
2019
+ if all_metadata:
2020
+ review_identifiers = set(r.get('agent_identifier') for r in all_metadata if r.get('agent_identifier'))
2021
+ print(f" Unique agent_identifiers in reviews: {review_identifiers}")
2022
+ else:
2023
+ print("⚠️ No review metadata loaded!")
2024
 
2025
  cache_dict = {}
2026
 
2027
  for agent in agents:
2028
  identifier = agent.get('github_identifier')
2029
+ agent_name = agent.get('name', 'Unknown')
2030
 
2031
  # Filter metadata for this agent
2032
  agent_metadata = [review for review in all_metadata if review.get("agent_identifier") == identifier]
2033
 
2034
+ # Debug output
2035
+ if len(agent_metadata) > 0:
2036
+ print(f" βœ“ Agent '{identifier}' matched {len(agent_metadata)} reviews")
2037
+
2038
  # Calculate stats
2039
  stats = calculate_review_stats_from_metadata(agent_metadata)
2040
 
 
2045
  **stats
2046
  }
2047
 
2048
+ print(f"βœ“ Constructed cache with {len(cache_dict)} agent entries")
2049
+
2050
  return cache_dict
2051
 
2052
 
 
2054
  # UI FUNCTIONS
2055
  # =============================================================================
2056
 
2057
+ def create_monthly_metrics_plot(top_n=None):
2058
  """
2059
  Create a Plotly figure with dual y-axes showing:
2060
  - Left y-axis: Acceptance Rate (%) as line curves
2061
  - Right y-axis: Total Reviews created as bar charts
2062
 
2063
  Each agent gets a unique color for both their line and bars.
2064
+
2065
+ Args:
2066
+ top_n: If specified, only show metrics for the top N agents by total reviews.
2067
  """
2068
+ metrics = calculate_monthly_metrics_by_agent(top_n=top_n)
2069
 
2070
  if not metrics['agents'] or not metrics['months']:
2071
  # Return an empty figure with a message
 
2122
  line=dict(color=color, width=2),
2123
  marker=dict(size=8),
2124
  legendgroup=agent_name,
2125
+ showlegend=(top_n is not None and top_n <= 10), # Show legend for top N agents
2126
  hovertemplate='<b>Agent: %{fullData.name}</b><br>' +
2127
  'Month: %{x}<br>' +
2128
  'Acceptance Rate: %{y:.2f}%<br>' +
 
2148
  name=agent_name,
2149
  marker=dict(color=color, opacity=0.6),
2150
  legendgroup=agent_name,
2151
+ showlegend=False, # Hide duplicate legend entry (already shown in Scatter)
2152
  hovertemplate='<b>Agent: %{fullData.name}</b><br>' +
2153
  'Month: %{x}<br>' +
2154
  'Total Reviews: %{y}<br>' +
 
2164
  fig.update_yaxes(title_text="<b>Total Reviews</b>", secondary_y=True)
2165
 
2166
  # Update layout
2167
+ show_legend = (top_n is not None and top_n <= 10)
2168
  fig.update_layout(
2169
  title=None,
2170
  hovermode='closest', # Show individual agent info on hover
2171
  barmode='group',
2172
  height=600,
2173
+ showlegend=show_legend,
2174
+ margin=dict(l=50, r=150 if show_legend else 50, t=50, b=50) # More right margin when legend is shown
2175
  )
2176
 
2177
  return fig
 
2182
  Construct leaderboard from review metadata and convert to pandas DataFrame for display.
2183
  Returns formatted DataFrame sorted by retention rate.
2184
  """
2185
+ print("\n" + "="*60)
2186
+ print("πŸ” DEBUG: get_leaderboard_dataframe() called")
2187
+ print("="*60)
2188
+
2189
  # Construct leaderboard from metadata
2190
  cache_dict = construct_leaderboard_from_metadata()
2191
 
2192
+ print(f"πŸ“Š Cache dict size: {len(cache_dict)}")
2193
+
2194
  if not cache_dict:
2195
+ print("⚠️ WARNING: cache_dict is empty!")
2196
  # Return empty DataFrame with correct columns if no data
2197
  column_names = [col[0] for col in LEADERBOARD_COLUMNS]
2198
  return pd.DataFrame(columns=column_names)
2199
 
2200
  rows = []
2201
+ filtered_count = 0
2202
+ for identifier, data in cache_dict.items():
2203
+ total_reviews = data.get('total_reviews', 0)
2204
+ print(f" Agent '{identifier}': {total_reviews} reviews")
2205
+
2206
  # Filter out agents with zero total reviews
2207
+ if total_reviews == 0:
2208
+ filtered_count += 1
2209
  continue
2210
+
2211
  # Only include display-relevant fields
2212
  rows.append([
2213
  data.get('agent_name', 'Unknown'),
2214
  data.get('website', 'N/A'),
2215
+ total_reviews,
2216
  data.get('merged_prs', 0),
2217
  data.get('acceptance_rate', 0.0),
2218
  ])
2219
 
2220
+ print(f"πŸ“‰ Filtered out {filtered_count} agents with 0 reviews")
2221
+ print(f"πŸ“ˆ Leaderboard will show {len(rows)} agents")
2222
+
2223
  # Create DataFrame
2224
  column_names = [col[0] for col in LEADERBOARD_COLUMNS]
2225
  df = pd.DataFrame(rows, columns=column_names)
 
2234
  if "Acceptance Rate (%)" in df.columns and not df.empty:
2235
  df = df.sort_values(by="Acceptance Rate (%)", ascending=False).reset_index(drop=True)
2236
 
2237
+ print(f"βœ… Final DataFrame shape: {df.shape}")
2238
+ print("="*60 + "\n")
2239
+
2240
  return df
2241
 
2242
 
 
2247
  """
2248
  # Validate required fields
2249
  if not identifier or not identifier.strip():
2250
+ return "❌ GitHub identifier is required", get_leaderboard_dataframe()
2251
  if not agent_name or not agent_name.strip():
2252
+ return "❌ Agent name is required", get_leaderboard_dataframe()
2253
  if not organization or not organization.strip():
2254
+ return "❌ Organization name is required", get_leaderboard_dataframe()
2255
  if not website or not website.strip():
2256
+ return "❌ Website URL is required", get_leaderboard_dataframe()
2257
 
2258
  # Clean inputs
2259
  identifier = identifier.strip()
 
2265
  # Validate GitHub identifier
2266
  is_valid, message = validate_github_username(identifier)
2267
  if not is_valid:
2268
+ return f"❌ {message}", get_leaderboard_dataframe()
2269
 
2270
  # Check for duplicates by loading agents from HuggingFace
2271
  agents = load_agents_from_hf()
2272
  if agents:
2273
  existing_names = {agent['github_identifier'] for agent in agents}
2274
  if identifier in existing_names:
2275
+ return f"⚠️ Agent with identifier '{identifier}' already exists", get_leaderboard_dataframe()
2276
 
2277
  # Create submission
2278
  submission = {
 
2285
 
2286
  # Save to HuggingFace
2287
  if not save_agent_to_hf(submission):
2288
+ return "❌ Failed to save submission", get_leaderboard_dataframe()
2289
 
2290
  # Return success message - data will be populated by daily incremental updates
2291
+ return f"βœ… Successfully submitted {agent_name}! Review data will be populated by the next daily incremental update.", get_leaderboard_dataframe()
2292
 
2293
 
2294
  # =============================================================================
2295
  # BACKGROUND TASKS
2296
  # =============================================================================
2297
 
2298
+ def fetch_and_update_weekly_reviews():
2299
  """
2300
  Fetch and update reviews with comprehensive status checking using BigQuery.
2301
 
2302
  Strategy:
2303
  1. For each agent:
2304
+ - Examine ALL open reviews from last LEADERBOARD_TIME_FRAME_DAYS - 7 for their closed_at status
2305
+ - Update PR status for all existing metadata using BigQuery (last LEADERBOARD_TIME_FRAME_DAYS - 7)
2306
+ - Fetch new reviews from last week using BigQuery
2307
  - Save all updated/new metadata back to HuggingFace
2308
  """
2309
  # Initialize BigQuery client
 
2322
  # Calculate date range
2323
  today_utc = datetime.now(timezone.utc)
2324
  today_midnight = datetime.combine(today_utc.date(), datetime.min.time(), tzinfo=timezone.utc)
2325
+ last_week_midnight = today_midnight - timedelta(days=7)
2326
+ cutoff_date = today_midnight - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS - 7)
2327
 
2328
  print(f"πŸ“… Time Range Configuration:")
2329
+ print(f" Last week 12am UTC: {last_week_midnight.isoformat()}")
2330
  print(f" Today 12am UTC: {today_midnight.isoformat()}")
2331
  print(f" Cutoff for existing reviews: {cutoff_date.isoformat()}")
2332
  print(f" Examining reviews from: {cutoff_date.date()} to {today_midnight.date()}")
2333
 
2334
  for agent in agents:
2335
  identifier = agent.get('github_identifier')
2336
+ agent_name = agent.get('name', 'Unknown')
2337
 
2338
  if not identifier:
2339
  print(f"Warning: Skipping agent without identifier: {agent}")
 
2368
  if recent_metadata:
2369
  print(f"πŸ” Updating PR status for {len(recent_metadata)} existing reviews using BigQuery...")
2370
  # Extract PR URLs from existing metadata
2371
+ urls = [r.get('url') for r in recent_metadata if r.get('url')]
2372
+ if urls:
2373
  # Fetch status from BigQuery
2374
  extended_end_date = today_utc
2375
+ status_map = fetch_pr_status_from_bigquery(client, urls, cutoff_date, extended_end_date)
2376
 
2377
  # Update metadata with new status
2378
  for review in recent_metadata:
2379
+ url = review.get('url')
2380
+ if url and url in status_map:
2381
+ status_info = status_map[url]
2382
  review['pr_status'] = status_info['status']
2383
+ review['merged_at'] = status_info['merged']
2384
+ review['closed_at'] = status_info['closed_at']
2385
 
2386
  print(f" βœ“ Updated PR status for existing reviews")
2387
 
2388
+ # Step 3: Fetch NEW reviews from last week to today using BigQuery
2389
+ print(f"πŸ” Fetching new reviews from {last_week_midnight.isoformat()} to {today_midnight.isoformat()} using BigQuery...")
2390
 
2391
+ review_rows = fetch_reviews_from_bigquery(client, identifier, last_week_midnight, today_midnight)
2392
 
2393
  # Extract unique PR URLs and fetch status
2394
+ urls = list(set([row.url for row in review_rows if row.url]))
2395
+ print(f" Found {len(review_rows)} review events across {len(urls)} unique PRs")
2396
 
2397
  # Fetch PR status for new reviews
2398
  extended_end_date = today_utc
2399
+ status_map = fetch_pr_status_from_bigquery(client, urls, last_week_midnight, extended_end_date)
2400
 
2401
  # Extract metadata for new reviews
2402
+ weekly_metadata = []
2403
  seen_prs = set()
2404
  for row in review_rows:
2405
+ url = row.url
2406
+ if url in seen_prs:
2407
  continue
2408
+ seen_prs.add(url)
2409
 
2410
+ status_info = status_map.get(url, {
2411
  'status': 'open',
2412
  'merged': False,
2413
  'closed_at': None
 
2415
 
2416
  metadata = extract_review_metadata_from_bigquery(row, status_info)
2417
  metadata['agent_identifier'] = identifier
2418
+ weekly_metadata.append(metadata)
2419
 
2420
+ print(f" βœ“ Found {len(weekly_metadata)} unique PRs in 7-day window")
2421
 
2422
  # Step 4: Combine and save all metadata
2423
+ all_updated_metadata = recent_metadata + weekly_metadata
2424
 
2425
  if all_updated_metadata:
2426
  print(f"πŸ’Ύ Saving {len(all_updated_metadata)} total reviews to HuggingFace...")
2427
  save_review_metadata_to_hf(all_updated_metadata, identifier)
2428
+ print(f"βœ“ Updated {identifier}: {len(recent_metadata)} existing (status checked) + {len(weekly_metadata)} new = {len(all_updated_metadata)} total")
2429
  else:
2430
  print(f" No reviews to save for {identifier}")
2431
 
 
2440
  # GRADIO APPLICATION
2441
  # =============================================================================
2442
 
2443
+ # Start APScheduler for weekly updates at 12:00 AM UTC every Monday
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2444
  scheduler = BackgroundScheduler(timezone="UTC")
2445
  scheduler.add_job(
2446
  update_all_agents_incremental,
2447
+ trigger=CronTrigger(day_of_week='mon', hour=0, minute=0), # 12:00 AM UTC every Monday
2448
+ id='weekly_review_mining',
2449
+ name='Weekly Regular Review Mining',
2450
  replace_existing=True
2451
  )
2452
  scheduler.start()
2453
+ print("βœ“ Scheduler started: Weekly Incremental Update at 12:00 AM UTC every Monday (updates existing metadata + mines last week's reviews)")
2454
 
2455
  # Create Gradio interface
2456
  with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app:
2457
 
2458
  gr.Markdown("# πŸ† SWE Agent Review Leaderboard")
2459
+ gr.Markdown("Track and compare GitHub PR review acceptance statistics for SWE agents (last month)")
2460
 
2461
  with gr.Tabs():
2462
+
2463
  # Leaderboard Tab
2464
  with gr.Tab("πŸ“Š Leaderboard"):
2465
+ gr.Markdown("*All statistics are based on reviews from the last month*")
2466
  leaderboard_table = Leaderboard(
2467
+ value=pd.DataFrame(columns=[col[0] for col in LEADERBOARD_COLUMNS]), # Empty initially
2468
  datatype=LEADERBOARD_COLUMNS,
2469
  search_columns=["Agent Name", "Website"],
2470
  filter_columns=["Acceptance Rate (%)"]
2471
  )
2472
 
2473
+ # Load leaderboard data when app starts
2474
+ app.load(
2475
+ fn=get_leaderboard_dataframe,
2476
+ inputs=[],
2477
+ outputs=[leaderboard_table]
2478
+ )
2479
 
2480
+ # Monthly Metrics Section
2481
+ gr.Markdown("---") # Divider
2482
+ gr.Markdown("### πŸ“ˆ Monthly Performance - Top 5 Agents")
2483
+ gr.Markdown("*Shows acceptance rate trends and review volumes for the most active agents*")
2484
+
2485
+ monthly_metrics_plot = gr.Plot(label="Monthly Metrics")
2486
+
2487
+ # Load monthly metrics when app starts
2488
+ app.load(
2489
+ fn=lambda: create_monthly_metrics_plot(top_n=5),
2490
+ inputs=[],
2491
+ outputs=[monthly_metrics_plot]
2492
  )
2493
 
2494
+
2495
  # Submit Agent Tab
2496
  with gr.Tab("βž• Submit Agent"):
2497
 
 
2537
  submit_button.click(
2538
  fn=submit_agent,
2539
  inputs=[github_input, name_input, organization_input, description_input, website_input],
2540
+ outputs=[submission_status, leaderboard_table]
2541
  )
2542
 
2543
 
msr.py CHANGED
@@ -89,464 +89,188 @@ def get_bigquery_client():
89
  raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")
90
 
91
 
92
- # =============================================================================
93
- # BIGQUERY FUNCTIONS
94
- # =============================================================================
95
-
96
- def fetch_reviews_from_bigquery(client, identifier, start_date, end_date):
97
  """
98
- Fetch PR review events from GitHub Archive for a specific agent.
99
-
100
- Queries githubarchive.day.YYYYMMDD tables for PullRequestReviewEvent where
101
- actor.login matches the agent identifier.
102
-
103
  Args:
104
- client: BigQuery client instance
105
- identifier: GitHub username or bot identifier (e.g., 'amazon-inspector-beta[bot]')
106
- start_date: Start datetime (timezone-aware)
107
- end_date: End datetime (timezone-aware)
108
-
109
  Returns:
110
- List of review event rows with PR information
111
  """
112
- print(f"\nπŸ” Querying BigQuery for reviews by {identifier}")
113
- print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
114
-
115
- # Generate list of table names for each day in the range
116
- table_refs = []
117
  current_date = start_date
 
118
  while current_date < end_date:
119
- table_name = f"githubarchive.day.{current_date.strftime('%Y%m%d')}"
120
- table_refs.append(table_name)
121
  current_date += timedelta(days=1)
 
 
 
 
122
 
123
- # Build UNION ALL query for all daily tables
124
- union_parts = []
125
- for table_name in table_refs:
126
- union_parts.append(f"""
127
- SELECT
128
- repo.name as repo_name,
129
- actor.login as actor_login,
130
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as pr_url,
131
- CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') AS INT64) as pr_number,
132
- JSON_EXTRACT_SCALAR(payload, '$.review.submitted_at') as reviewed_at,
133
- created_at
134
- FROM `{table_name}`
135
- WHERE type = 'PullRequestReviewEvent'
136
- AND actor.login = @identifier
137
- """)
138
-
139
- query = " UNION ALL ".join(union_parts)
140
-
141
- job_config = bigquery.QueryJobConfig(
142
- query_parameters=[
143
- bigquery.ScalarQueryParameter("identifier", "STRING", identifier)
144
- ]
145
- )
146
-
147
- print(f" Querying {len(table_refs)} daily tables...")
148
-
149
- try:
150
- query_job = client.query(query, job_config=job_config)
151
- results = list(query_job.result())
152
-
153
- print(f" βœ“ Found {len(results)} review events")
154
- return results
155
-
156
- except Exception as e:
157
- print(f" βœ— BigQuery error: {str(e)}")
158
- return []
159
 
 
 
 
160
 
161
- def fetch_pr_status_from_bigquery(client, pr_urls, start_date, end_date):
162
  """
163
- Fetch PR status (merged/closed) from GitHub Archive PullRequestEvent.
164
-
165
- For each PR URL, looks for PullRequestEvent with action='closed' to determine
166
- if the PR was merged or just closed.
167
-
 
168
  Args:
169
  client: BigQuery client instance
170
- pr_urls: List of PR URLs to check status for
171
- start_date: Start datetime (should cover review period and after)
172
- end_date: End datetime (should be recent/current)
173
-
174
  Returns:
175
- Dictionary mapping PR URL to status dict:
176
  {
177
- 'pr_url': {
178
- 'status': 'merged'|'closed'|'open',
179
- 'merged': bool,
180
- 'closed_at': timestamp or None
181
- }
 
 
 
 
 
182
  }
183
  """
184
- if not pr_urls:
185
- return {}
186
-
187
- print(f"\nπŸ” Querying BigQuery for PR status ({len(pr_urls)} PRs)...")
188
-
189
- # Extract repo and PR number from URLs
190
- # URL format: https://github.com/owner/repo/pull/123
191
- pr_info = []
192
- for url in pr_urls:
193
- try:
194
- parts = url.replace('https://github.com/', '').split('/')
195
- if len(parts) >= 4:
196
- owner = parts[0]
197
- repo = parts[1]
198
- pr_number = int(parts[3])
199
- repo_name = f"{owner}/{repo}"
200
- pr_info.append({
201
- 'url': url,
202
- 'repo': repo_name,
203
- 'number': pr_number
204
- })
205
- except Exception as e:
206
- print(f" Warning: Could not parse PR URL {url}: {e}")
207
- continue
208
-
209
- if not pr_info:
210
- return {}
211
-
212
- # Build repo filter condition for WHERE clause
213
- # Group PRs by repo to create efficient filters
214
- repos_to_prs = defaultdict(list)
215
- for pr in pr_info:
216
- repos_to_prs[pr['repo']].append(pr['number'])
217
-
218
- # Generate list of table names for date range
219
- # Look back 1 full year from end_date to catch PR close events that may have occurred before reviews
220
- pr_status_start = end_date - timedelta(days=365)
221
- table_refs = []
222
- current_date = pr_status_start
223
- while current_date < end_date:
224
- table_name = f"githubarchive.day.{current_date.strftime('%Y%m%d')}"
225
- table_refs.append(table_name)
226
- current_date += timedelta(days=1)
227
-
228
- # Build WHERE clause to filter by specific repos and PR numbers
229
- # Format: (repo='owner/repo1' AND pr_number IN (1,2,3)) OR (repo='owner/repo2' AND pr_number IN (4,5))
230
- filter_conditions = []
231
- for repo, pr_numbers in repos_to_prs.items():
232
- pr_list = ','.join(map(str, pr_numbers))
233
- filter_conditions.append(f"(repo.name = '{repo}' AND CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') AS INT64) IN ({pr_list}))")
234
-
235
- pr_filter = " OR ".join(filter_conditions)
236
-
237
- # Build query to find close/merge events for specific PRs
238
- union_parts = []
239
- for table_name in table_refs:
240
- union_parts.append(f"""
241
- SELECT
242
- repo.name as repo_name,
243
- CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') AS INT64) as pr_number,
244
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as pr_url,
245
- JSON_EXTRACT_SCALAR(payload, '$.action') as action,
246
- CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.merged') AS BOOL) as merged,
247
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.closed_at') as closed_at,
248
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.merged_at') as merged_at,
249
- created_at
250
- FROM `{table_name}`
251
- WHERE type = 'PullRequestEvent'
252
  AND JSON_EXTRACT_SCALAR(payload, '$.action') = 'closed'
253
- AND ({pr_filter})
254
- """)
255
-
256
- query = " UNION ALL ".join(union_parts)
257
-
258
- print(f" Querying {len(table_refs)} daily tables for PR status (1-year lookback: {pr_status_start.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')})...")
259
- print(f" Filtering for {len(pr_info)} specific PRs across {len(repos_to_prs)} repos")
260
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  try:
262
  query_job = client.query(query)
263
  results = list(query_job.result())
264
-
265
- print(f" βœ“ Found {len(results)} PR close events")
266
-
267
- # Build status map by PR URL
268
- status_map = {}
 
269
  for row in results:
270
- pr_url = row.pr_url
271
-
272
- merged = row.merged if row.merged is not None else False
273
- closed_at = row.closed_at or row.merged_at
274
-
275
- # Convert to ISO format if datetime
 
 
 
 
 
 
276
  if hasattr(closed_at, 'isoformat'):
277
  closed_at = closed_at.isoformat()
278
-
279
- status = 'merged' if merged else 'closed'
280
-
281
- status_map[pr_url] = {
282
- 'status': status,
283
- 'merged': merged,
284
- 'closed_at': closed_at
285
- }
286
-
287
- # Mark remaining PRs as open
288
- for url in pr_urls:
289
- if url not in status_map:
290
- status_map[url] = {
291
- 'status': 'open',
292
- 'merged': False,
293
- 'closed_at': None
294
- }
295
-
296
- merged_count = sum(1 for s in status_map.values() if s['merged'])
297
- closed_count = sum(1 for s in status_map.values() if s['status'] == 'closed')
298
- open_count = sum(1 for s in status_map.values() if s['status'] == 'open')
299
-
300
- print(f" Status breakdown: {merged_count} merged, {closed_count} closed, {open_count} open")
301
-
302
- return status_map
303
-
304
- except Exception as e:
305
- print(f" βœ— BigQuery error: {str(e)}")
306
- # Return all as open on error
307
- return {url: {'status': 'open', 'merged': False, 'closed_at': None} for url in pr_urls}
308
-
309
-
310
- def extract_review_metadata(review_row, status_info):
311
- """
312
- Extract minimal PR review metadata from BigQuery row and status info.
313
-
314
- Args:
315
- review_row: BigQuery row from PullRequestReviewEvent query
316
- status_info: Status dictionary from fetch_pr_status_from_bigquery
317
-
318
- Returns:
319
- Dictionary with review metadata
320
- """
321
- pr_url = review_row.pr_url
322
- pr_number = review_row.pr_number
323
- reviewed_at = review_row.reviewed_at or review_row.created_at
324
-
325
- # Convert to ISO format if datetime
326
- if hasattr(reviewed_at, 'isoformat'):
327
- reviewed_at = reviewed_at.isoformat()
328
-
329
- return {
330
- 'html_url': pr_url,
331
- 'reviewed_at': reviewed_at,
332
- 'pr_status': status_info['status'],
333
- 'pr_merged': status_info['merged'],
334
- 'pr_closed_at': status_info['closed_at'],
335
- 'pr_url': pr_url,
336
- 'review_id': f"pr_{pr_number}"
337
- }
338
-
339
-
340
- def fetch_all_reviews_metadata(identifier, agent_name):
341
- """
342
- Fetch PR reviews associated with a GitHub user or bot for the past LEADERBOARD_TIME_FRAME_DAYS.
343
- Uses BigQuery to query GitHub Archive instead of GitHub API.
344
-
345
- Args:
346
- identifier: GitHub username or bot identifier (for BigQuery queries)
347
- agent_name: Human-readable name of the agent (for display only)
348
-
349
- Returns:
350
- List of dictionaries containing minimal PR review metadata with PR status
351
- """
352
- # Initialize BigQuery client
353
- try:
354
- client = get_bigquery_client()
355
- except Exception as e:
356
- print(f"βœ— Failed to initialize BigQuery client: {str(e)}")
357
- return []
358
-
359
- # Define time range: past LEADERBOARD_TIME_FRAME_DAYS (excluding today)
360
- current_time = datetime.now(timezone.utc)
361
- end_date = current_time.replace(hour=0, minute=0, second=0, microsecond=0) # 12:00 AM UTC today
362
- start_date = end_date - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
363
-
364
- print(f"\n{'='*80}")
365
- print(f"Fetching reviews for: {agent_name} ({identifier})")
366
- print(f"{'='*80}")
367
-
368
- # Fetch review events from BigQuery
369
- review_rows = fetch_reviews_from_bigquery(client, identifier, start_date, end_date)
370
-
371
- if not review_rows:
372
- print(f" No reviews found for {identifier}")
373
- return []
374
-
375
- # Extract unique PR URLs
376
- pr_urls = list(set([row.pr_url for row in review_rows if row.pr_url]))
377
- print(f"\nπŸ“Š Found {len(review_rows)} review events across {len(pr_urls)} unique PRs")
378
-
379
- # Fetch PR status from BigQuery
380
- # Use extended end date to catch recent merges/closes
381
- extended_end_date = current_time
382
- status_map = fetch_pr_status_from_bigquery(client, pr_urls, start_date, extended_end_date)
383
-
384
- # Extract metadata for each review
385
- print(f"\nπŸ“¦ Extracting metadata...")
386
- metadata_list = []
387
-
388
- # Deduplicate by PR URL (multiple reviews on same PR)
389
- seen_prs = set()
390
- for row in review_rows:
391
- pr_url = row.pr_url
392
- if pr_url in seen_prs:
393
- continue
394
- seen_prs.add(pr_url)
395
-
396
- status_info = status_map.get(pr_url, {
397
- 'status': 'open',
398
- 'merged': False,
399
- 'closed_at': None
400
- })
401
-
402
- metadata = extract_review_metadata(row, status_info)
403
- metadata_list.append(metadata)
404
-
405
- print(f" βœ“ Extracted {len(metadata_list)} unique PR review records")
406
-
407
- return metadata_list
408
-
409
-
410
- def fetch_all_reviews_metadata_batch(agents):
411
- """
412
- Fetch PR reviews for ALL agents in a single batch operation.
413
- Uses only 2 BigQuery queries total (instead of 2*N queries for N agents).
414
-
415
- Args:
416
- agents: List of agent dictionaries with 'github_identifier' and 'name' fields
417
-
418
- Returns:
419
- Dictionary mapping agent identifier to list of review metadata:
420
- {
421
- 'agent-identifier': [metadata_list],
422
- ...
423
- }
424
- """
425
- if not agents:
426
- return {}
427
-
428
- # Initialize BigQuery client
429
- try:
430
- client = get_bigquery_client()
431
- except Exception as e:
432
- print(f"βœ— Failed to initialize BigQuery client: {str(e)}")
433
- return {}
434
-
435
- # Define time range: past LEADERBOARD_TIME_FRAME_DAYS (excluding today)
436
- current_time = datetime.now(timezone.utc)
437
- end_date = current_time.replace(hour=0, minute=0, second=0, microsecond=0)
438
- start_date = end_date - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
439
-
440
- # Extract all identifiers
441
- identifiers = [agent['github_identifier'] for agent in agents if agent.get('github_identifier')]
442
- if not identifiers:
443
- return {}
444
-
445
- print(f"\nπŸš€ BATCH MODE: Fetching reviews for {len(identifiers)} agents in 2 queries")
446
- print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
447
-
448
- # =========================================================================
449
- # QUERY 1: Fetch ALL review events for ALL agents in one query
450
- # =========================================================================
451
- print(f"\nπŸ” Query 1/2: Fetching ALL review events...")
452
-
453
- # Generate list of table names
454
- table_refs = []
455
- current_date = start_date
456
- while current_date < end_date:
457
- table_name = f"githubarchive.day.{current_date.strftime('%Y%m%d')}"
458
- table_refs.append(table_name)
459
- current_date += timedelta(days=1)
460
-
461
- # Build IN clause for all identifiers
462
- identifier_list = ', '.join([f"'{id}'" for id in identifiers])
463
-
464
- # Build UNION ALL query for all daily tables
465
- union_parts = []
466
- for table_name in table_refs:
467
- union_parts.append(f"""
468
- SELECT
469
- repo.name as repo_name,
470
- actor.login as actor_login,
471
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as pr_url,
472
- CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') AS INT64) as pr_number,
473
- JSON_EXTRACT_SCALAR(payload, '$.review.submitted_at') as reviewed_at,
474
- created_at
475
- FROM `{table_name}`
476
- WHERE type = 'PullRequestReviewEvent'
477
- AND actor.login IN ({identifier_list})
478
- """)
479
-
480
- query = " UNION ALL ".join(union_parts)
481
-
482
- print(f" Querying {len(table_refs)} daily tables...")
483
-
484
- try:
485
- query_job = client.query(query)
486
- all_review_rows = list(query_job.result())
487
- print(f" βœ“ Found {len(all_review_rows)} total review events")
488
  except Exception as e:
489
  print(f" βœ— BigQuery error: {str(e)}")
 
 
490
  return {}
491
 
492
- # Group reviews by agent
493
- reviews_by_agent = defaultdict(list)
494
- all_pr_urls = set()
495
- for row in all_review_rows:
496
- reviews_by_agent[row.actor_login].append(row)
497
- if row.pr_url:
498
- all_pr_urls.add(row.pr_url)
499
-
500
- print(f" πŸ“Š Reviews found for {len(reviews_by_agent)} agents")
501
- print(f" πŸ“Š {len(all_pr_urls)} unique PRs to check status for")
502
-
503
- # =========================================================================
504
- # QUERY 2: Fetch ALL PR statuses in one query
505
- # =========================================================================
506
- if all_pr_urls:
507
- print(f"\nπŸ” Query 2/2: Fetching ALL PR statuses...")
508
- extended_end_date = current_time
509
- status_map = fetch_pr_status_from_bigquery(client, list(all_pr_urls), start_date, extended_end_date)
510
- else:
511
- status_map = {}
512
-
513
- # =========================================================================
514
- # Post-process: Build metadata for each agent
515
- # =========================================================================
516
- print(f"\nπŸ“¦ Processing metadata for each agent...")
517
- results = {}
518
-
519
- for agent in agents:
520
- identifier = agent.get('github_identifier')
521
- if not identifier or identifier not in reviews_by_agent:
522
- results[identifier] = []
523
- continue
524
-
525
- review_rows = reviews_by_agent[identifier]
526
-
527
- # Deduplicate by PR URL
528
- metadata_list = []
529
- seen_prs = set()
530
- for row in review_rows:
531
- pr_url = row.pr_url
532
- if pr_url in seen_prs:
533
- continue
534
- seen_prs.add(pr_url)
535
-
536
- status_info = status_map.get(pr_url, {
537
- 'status': 'open',
538
- 'merged': False,
539
- 'closed_at': None
540
- })
541
-
542
- metadata = extract_review_metadata(row, status_info)
543
- metadata_list.append(metadata)
544
-
545
- results[identifier] = metadata_list
546
- print(f" βœ“ {agent.get('name', identifier)}: {len(metadata_list)} unique PRs")
547
-
548
- return results
549
-
550
 
551
  # =============================================================================
552
  # HUGGINGFACE STORAGE FUNCTIONS
@@ -554,7 +278,7 @@ def fetch_all_reviews_metadata_batch(agents):
554
 
555
  def group_metadata_by_date(metadata_list):
556
  """
557
- Group review metadata by exact date (year.month.day) for efficient daily storage.
558
  Returns dict: {(year, month, day): [metadata_list]}
559
  """
560
  grouped = defaultdict(list)
@@ -579,7 +303,7 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
579
  Save review metadata to HuggingFace dataset, organized by [agent_identifier]/YYYY.MM.DD.jsonl.
580
  Each file is stored in the agent's folder and named YYYY.MM.DD.jsonl for that day's reviews.
581
 
582
- This function APPENDS new metadata and DEDUPLICATES by review_id.
583
  Uses batch upload to avoid rate limit (uploads entire folder in single commit).
584
 
585
  Args:
@@ -595,56 +319,41 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
595
 
596
  api = HfApi()
597
 
598
- # Group by exact date (year, month, day)
599
  grouped = group_metadata_by_date(metadata_list)
600
 
 
 
 
 
601
  # Create a temporary directory for batch upload
602
  temp_dir = tempfile.mkdtemp()
603
  agent_folder = os.path.join(temp_dir, agent_identifier)
604
  os.makedirs(agent_folder, exist_ok=True)
605
 
606
  try:
607
- print(f"πŸ“¦ Preparing batch upload for {len(grouped)} daily files...")
608
 
609
  # Process each daily file
610
  for (review_year, month, day), day_metadata in grouped.items():
611
  filename = f"{agent_identifier}/{review_year}.{month:02d}.{day:02d}.jsonl"
612
  local_filename = os.path.join(agent_folder, f"{review_year}.{month:02d}.{day:02d}.jsonl")
613
 
614
- # Download existing file if it exists
615
- existing_metadata = []
616
- try:
617
- file_path = hf_hub_download(
618
- repo_id=REVIEW_METADATA_REPO,
619
- filename=filename,
620
- repo_type="dataset",
621
- token=token
622
- )
623
- existing_metadata = load_jsonl(file_path)
624
- print(f" Found {len(existing_metadata)} existing reviews in {filename}")
625
- except Exception:
626
- print(f" Creating new file: {filename}")
627
-
628
- # Merge and deduplicate by review_id
629
- existing_by_id = {meta['review_id']: meta for meta in existing_metadata if meta.get('review_id')}
630
- new_by_id = {meta['review_id']: meta for meta in day_metadata if meta.get('review_id')}
631
-
632
- # Update with new data (new data overwrites old)
633
- existing_by_id.update(new_by_id)
634
- merged_metadata = list(existing_by_id.values())
635
-
636
- # Save to temp directory
637
- save_jsonl(local_filename, merged_metadata)
638
- print(f" Prepared {len(merged_metadata)} reviews for {filename}")
639
 
640
  # Upload entire folder in a single commit
641
- print(f"πŸ“€ Uploading {len(grouped)} files in single batch commit...")
642
- api.upload_folder(
643
  folder_path=temp_dir,
644
  repo_id=REVIEW_METADATA_REPO,
645
  repo_type="dataset",
646
  token=token,
647
- commit_message=f"Batch update: {agent_identifier} ({len(grouped)} daily files)"
648
  )
649
  print(f" βœ“ Batch upload complete for {agent_identifier}")
650
 
@@ -656,7 +365,7 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
656
  shutil.rmtree(temp_dir)
657
 
658
  except Exception as e:
659
- print(f"βœ— Error saving review metadata: {str(e)}")
660
  import traceback
661
  traceback.print_exc()
662
  return False
@@ -665,21 +374,21 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
665
  def load_agents_from_hf():
666
  """
667
  Load all agent metadata JSON files from HuggingFace dataset.
668
-
669
  The github_identifier is extracted from the filename (e.g., 'agent-name[bot].json' -> 'agent-name[bot]')
670
  """
671
  try:
672
  api = HfApi()
673
  agents = []
674
-
675
  # List all files in the repository
676
  files = api.list_repo_files(repo_id=AGENTS_REPO, repo_type="dataset")
677
-
678
  # Filter for JSON files only
679
  json_files = [f for f in files if f.endswith('.json')]
680
-
681
  print(f"Found {len(json_files)} agent files in {AGENTS_REPO}")
682
-
683
  # Download and parse each JSON file
684
  for json_file in json_files:
685
  try:
@@ -688,23 +397,23 @@ def load_agents_from_hf():
688
  filename=json_file,
689
  repo_type="dataset"
690
  )
691
-
692
  with open(file_path, 'r') as f:
693
  agent_data = json.load(f)
694
-
695
  # Extract github_identifier from filename (remove .json extension)
696
  github_identifier = json_file.replace('.json', '')
697
  agent_data['github_identifier'] = github_identifier
698
-
699
  agents.append(agent_data)
700
-
701
  except Exception as e:
702
  print(f"Warning: Could not load {json_file}: {str(e)}")
703
  continue
704
-
705
  print(f"βœ“ Loaded {len(agents)} agents from HuggingFace")
706
  return agents
707
-
708
  except Exception as e:
709
  print(f"Could not load agents from HuggingFace: {str(e)}")
710
  return []
@@ -717,60 +426,95 @@ def load_agents_from_hf():
717
  def mine_all_agents():
718
  """
719
  Mine review metadata for all agents within LEADERBOARD_TIME_FRAME_DAYS and save to HuggingFace.
720
- Uses BigQuery to query GitHub Archive with batch processing (only 2 queries for all agents).
721
  """
722
  # Load agent metadata from HuggingFace
723
  agents = load_agents_from_hf()
724
  if not agents:
725
  print("No agents found in HuggingFace dataset")
726
  return
727
-
 
 
 
 
 
 
728
  print(f"\n{'='*80}")
729
- print(f"Starting review metadata mining for {len(agents)} agents")
730
  print(f"Time frame: Last {LEADERBOARD_TIME_FRAME_DAYS} days")
731
- print(f"Data source: BigQuery + GitHub Archive (BATCH MODE)")
732
  print(f"{'='*80}\n")
733
-
734
- # Fetch ALL reviews for ALL agents in batch (only 2 BigQuery queries total!)
735
  try:
736
- all_metadata = fetch_all_reviews_metadata_batch(agents)
737
  except Exception as e:
738
- print(f"βœ— Error during batch fetch: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
739
  import traceback
740
  traceback.print_exc()
741
  return
742
-
743
  # Save results for each agent
744
  print(f"\n{'='*80}")
745
- print(f"πŸ’Ύ Saving results to HuggingFace...")
746
  print(f"{'='*80}\n")
747
-
748
- for agent in agents:
 
 
 
 
749
  identifier = agent.get('github_identifier')
750
  agent_name = agent.get('name', agent.get('agent_name', 'Unknown'))
751
-
752
  if not identifier:
753
- print(f"Warning: Skipping agent without identifier: {agent}")
 
754
  continue
755
-
756
  metadata = all_metadata.get(identifier, [])
757
-
 
 
758
  try:
759
  if metadata:
760
- print(f"πŸ’Ύ {agent_name}: Saving {len(metadata)} review records...")
761
- save_review_metadata_to_hf(metadata, identifier)
762
- print(f" βœ“ Successfully saved")
 
 
763
  else:
764
- print(f" No reviews found for {agent_name}")
765
-
 
766
  except Exception as e:
767
- print(f"βœ— Error saving {identifier}: {str(e)}")
768
  import traceback
769
  traceback.print_exc()
 
770
  continue
771
-
772
  print(f"\n{'='*80}")
773
- print(f"βœ… Mining complete for all agents")
 
 
 
 
 
774
  print(f"{'='*80}\n")
775
 
776
 
@@ -779,4 +523,4 @@ def mine_all_agents():
779
  # =============================================================================
780
 
781
  if __name__ == "__main__":
782
- mine_all_agents()
 
89
  raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")
90
 
91
 
92
+ def generate_table_union_statements(start_date, end_date):
 
 
 
 
93
  """
94
+ Generate UNION ALL statements for githubarchive.day tables in date range.
95
+
 
 
 
96
  Args:
97
+ start_date: Start datetime
98
+ end_date: End datetime
99
+
 
 
100
  Returns:
101
+ String with UNION ALL SELECT statements for all tables in range
102
  """
103
+ table_names = []
 
 
 
 
104
  current_date = start_date
105
+
106
  while current_date < end_date:
107
+ table_name = f"`githubarchive.day.{current_date.strftime('%Y%m%d')}`"
108
+ table_names.append(table_name)
109
  current_date += timedelta(days=1)
110
+
111
+ # Create UNION ALL chain
112
+ union_parts = [f"SELECT * FROM {table}" for table in table_names]
113
+ return " UNION ALL ".join(union_parts)
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
+ # =============================================================================
117
+ # BIGQUERY FUNCTIONS
118
+ # =============================================================================
119
 
120
+ def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date):
121
  """
122
+ Fetch PR review metadata for ALL agents using ONE comprehensive BigQuery query.
123
+
124
+ This query combines:
125
+ 1. Review events (PullRequestReviewEvent) for all agents
126
+ 2. PR status (PullRequestEvent with action='closed')
127
+
128
  Args:
129
  client: BigQuery client instance
130
+ identifiers: List of GitHub usernames/bot identifiers
131
+ start_date: Start datetime (timezone-aware)
132
+ end_date: End datetime (timezone-aware)
133
+
134
  Returns:
135
+ Dictionary mapping agent identifier to list of PR metadata:
136
  {
137
+ 'agent-identifier': [
138
+ {
139
+ 'url': PR URL,
140
+ 'reviewed_at': Review timestamp,
141
+ 'merged_at': Merge timestamp (if merged, else None),
142
+ 'closed_at': Close timestamp (if closed, else None)
143
+ },
144
+ ...
145
+ ],
146
+ ...
147
  }
148
  """
149
+ print(f"\nπŸ” Querying BigQuery for ALL {len(identifiers)} agents in ONE QUERY")
150
+ print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
151
+
152
+ # Generate table UNION statements for review period
153
+ review_tables = generate_table_union_statements(start_date, end_date)
154
+
155
+ # Generate table UNION statements for PR status (look back 1 year to catch all closures)
156
+ status_start_date = end_date - timedelta(days=365)
157
+ status_tables = generate_table_union_statements(status_start_date, end_date)
158
+
159
+ # Build identifier list for IN clause
160
+ identifier_list = ', '.join([f"'{id}'" for id in identifiers])
161
+
162
+ # Build comprehensive query with CTEs
163
+ query = f"""
164
+ WITH review_events AS (
165
+ -- Get all review events for ALL agents
166
+ SELECT
167
+ JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as url,
168
+ COALESCE(
169
+ JSON_EXTRACT_SCALAR(payload, '$.review.submitted_at'),
170
+ CAST(created_at AS STRING)
171
+ ) as reviewed_at,
172
+ actor.login as reviewer,
173
+ repo.name as repo_name,
174
+ CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') AS INT64) as pr_number
175
+ FROM (
176
+ {review_tables}
177
+ )
178
+ WHERE
179
+ type = 'PullRequestReviewEvent'
180
+ AND actor.login IN ({identifier_list})
181
+ ),
182
+
183
+ pr_status AS (
184
+ -- Get merge/close status for those PRs
185
+ SELECT
186
+ JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as url,
187
+ CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.merged') AS BOOL) as is_merged,
188
+ JSON_EXTRACT_SCALAR(payload, '$.pull_request.merged_at') as merged_at,
189
+ JSON_EXTRACT_SCALAR(payload, '$.pull_request.closed_at') as closed_at,
190
+ created_at
191
+ FROM (
192
+ {status_tables}
193
+ )
194
+ WHERE
195
+ type = 'PullRequestEvent'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  AND JSON_EXTRACT_SCALAR(payload, '$.action') = 'closed'
197
+ AND JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') IN (
198
+ SELECT DISTINCT url FROM review_events
199
+ )
200
+ QUALIFY ROW_NUMBER() OVER (PARTITION BY url ORDER BY created_at DESC) = 1
201
+ )
202
+
203
+ -- Join review events with PR status
204
+ SELECT DISTINCT
205
+ re.reviewer,
206
+ re.url,
207
+ re.reviewed_at,
208
+ ps.merged_at,
209
+ ps.closed_at
210
+ FROM review_events re
211
+ LEFT JOIN pr_status ps ON re.url = ps.url
212
+ ORDER BY re.reviewer, re.reviewed_at DESC
213
+ """
214
+
215
+ # Calculate number of days for reporting
216
+ review_days = (end_date - start_date).days
217
+ status_days = (end_date - status_start_date).days
218
+
219
+ print(f" Querying {review_days} days for reviews, {status_days} days for PR status...")
220
+ print(f" Agents: {', '.join(identifiers[:5])}{'...' if len(identifiers) > 5 else ''}")
221
+
222
  try:
223
  query_job = client.query(query)
224
  results = list(query_job.result())
225
+
226
+ print(f" βœ“ Found {len(results)} total PR review records across all agents")
227
+
228
+ # Group results by agent
229
+ metadata_by_agent = defaultdict(list)
230
+
231
  for row in results:
232
+ reviewer = row.reviewer
233
+
234
+ # Convert datetime objects to ISO strings
235
+ reviewed_at = row.reviewed_at
236
+ if hasattr(reviewed_at, 'isoformat'):
237
+ reviewed_at = reviewed_at.isoformat()
238
+
239
+ merged_at = row.merged_at
240
+ if hasattr(merged_at, 'isoformat'):
241
+ merged_at = merged_at.isoformat()
242
+
243
+ closed_at = row.closed_at
244
  if hasattr(closed_at, 'isoformat'):
245
  closed_at = closed_at.isoformat()
246
+
247
+ metadata_by_agent[reviewer].append({
248
+ 'url': row.url,
249
+ 'reviewed_at': reviewed_at,
250
+ 'merged_at': merged_at,
251
+ 'closed_at': closed_at,
252
+ })
253
+
254
+ # Print breakdown by agent
255
+ print(f"\n πŸ“Š Results breakdown by agent:")
256
+ for identifier in identifiers:
257
+ count = len(metadata_by_agent.get(identifier, []))
258
+ if count > 0:
259
+ metadata = metadata_by_agent[identifier]
260
+ merged_count = sum(1 for m in metadata if m['merged_at'] is not None)
261
+ closed_count = sum(1 for m in metadata if m['closed_at'] is not None and m['merged_at'] is None)
262
+ open_count = count - merged_count - closed_count
263
+ print(f" {identifier}: {count} PRs ({merged_count} merged, {closed_count} closed, {open_count} open)")
264
+
265
+ # Convert defaultdict to regular dict
266
+ return dict(metadata_by_agent)
267
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  except Exception as e:
269
  print(f" βœ— BigQuery error: {str(e)}")
270
+ import traceback
271
+ traceback.print_exc()
272
  return {}
273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
  # =============================================================================
276
  # HUGGINGFACE STORAGE FUNCTIONS
 
278
 
279
  def group_metadata_by_date(metadata_list):
280
  """
281
+ Group review metadata by date (year.month.day) for daily storage.
282
  Returns dict: {(year, month, day): [metadata_list]}
283
  """
284
  grouped = defaultdict(list)
 
303
  Save review metadata to HuggingFace dataset, organized by [agent_identifier]/YYYY.MM.DD.jsonl.
304
  Each file is stored in the agent's folder and named YYYY.MM.DD.jsonl for that day's reviews.
305
 
306
+ This function OVERWRITES existing files completely with fresh data from BigQuery.
307
  Uses batch upload to avoid rate limit (uploads entire folder in single commit).
308
 
309
  Args:
 
319
 
320
  api = HfApi()
321
 
322
+ # Group by date (year, month, day)
323
  grouped = group_metadata_by_date(metadata_list)
324
 
325
+ if not grouped:
326
+ print(f" No valid metadata to save for {agent_identifier}")
327
+ return False
328
+
329
  # Create a temporary directory for batch upload
330
  temp_dir = tempfile.mkdtemp()
331
  agent_folder = os.path.join(temp_dir, agent_identifier)
332
  os.makedirs(agent_folder, exist_ok=True)
333
 
334
  try:
335
+ print(f" πŸ“¦ Preparing batch upload for {len(grouped)} daily files...")
336
 
337
  # Process each daily file
338
  for (review_year, month, day), day_metadata in grouped.items():
339
  filename = f"{agent_identifier}/{review_year}.{month:02d}.{day:02d}.jsonl"
340
  local_filename = os.path.join(agent_folder, f"{review_year}.{month:02d}.{day:02d}.jsonl")
341
 
342
+ # Sort by reviewed_at for better organization
343
+ day_metadata.sort(key=lambda x: x.get('reviewed_at', ''), reverse=True)
344
+
345
+ # Save to temp directory (complete overwrite, no merging)
346
+ save_jsonl(local_filename, day_metadata)
347
+ print(f" Prepared {len(day_metadata)} reviews for {filename}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
  # Upload entire folder in a single commit
350
+ print(f" πŸ“€ Uploading {len(grouped)} files in single batch commit...")
351
+ api.upload_large_folder(
352
  folder_path=temp_dir,
353
  repo_id=REVIEW_METADATA_REPO,
354
  repo_type="dataset",
355
  token=token,
356
+ commit_message=f"Update: {agent_identifier} ({len(grouped)} daily files, {len(metadata_list)} total reviews)"
357
  )
358
  print(f" βœ“ Batch upload complete for {agent_identifier}")
359
 
 
365
  shutil.rmtree(temp_dir)
366
 
367
  except Exception as e:
368
+ print(f" βœ— Error saving review metadata: {str(e)}")
369
  import traceback
370
  traceback.print_exc()
371
  return False
 
374
  def load_agents_from_hf():
375
  """
376
  Load all agent metadata JSON files from HuggingFace dataset.
377
+
378
  The github_identifier is extracted from the filename (e.g., 'agent-name[bot].json' -> 'agent-name[bot]')
379
  """
380
  try:
381
  api = HfApi()
382
  agents = []
383
+
384
  # List all files in the repository
385
  files = api.list_repo_files(repo_id=AGENTS_REPO, repo_type="dataset")
386
+
387
  # Filter for JSON files only
388
  json_files = [f for f in files if f.endswith('.json')]
389
+
390
  print(f"Found {len(json_files)} agent files in {AGENTS_REPO}")
391
+
392
  # Download and parse each JSON file
393
  for json_file in json_files:
394
  try:
 
397
  filename=json_file,
398
  repo_type="dataset"
399
  )
400
+
401
  with open(file_path, 'r') as f:
402
  agent_data = json.load(f)
403
+
404
  # Extract github_identifier from filename (remove .json extension)
405
  github_identifier = json_file.replace('.json', '')
406
  agent_data['github_identifier'] = github_identifier
407
+
408
  agents.append(agent_data)
409
+
410
  except Exception as e:
411
  print(f"Warning: Could not load {json_file}: {str(e)}")
412
  continue
413
+
414
  print(f"βœ“ Loaded {len(agents)} agents from HuggingFace")
415
  return agents
416
+
417
  except Exception as e:
418
  print(f"Could not load agents from HuggingFace: {str(e)}")
419
  return []
 
426
  def mine_all_agents():
427
  """
428
  Mine review metadata for all agents within LEADERBOARD_TIME_FRAME_DAYS and save to HuggingFace.
429
+ Uses ONE BigQuery query for ALL agents (most efficient approach).
430
  """
431
  # Load agent metadata from HuggingFace
432
  agents = load_agents_from_hf()
433
  if not agents:
434
  print("No agents found in HuggingFace dataset")
435
  return
436
+
437
+ # Extract all identifiers
438
+ identifiers = [agent['github_identifier'] for agent in agents if agent.get('github_identifier')]
439
+ if not identifiers:
440
+ print("No valid agent identifiers found")
441
+ return
442
+
443
  print(f"\n{'='*80}")
444
+ print(f"Starting review metadata mining for {len(identifiers)} agents")
445
  print(f"Time frame: Last {LEADERBOARD_TIME_FRAME_DAYS} days")
446
+ print(f"Data source: BigQuery + GitHub Archive (ONE QUERY FOR ALL AGENTS)")
447
  print(f"{'='*80}\n")
448
+
449
+ # Initialize BigQuery client
450
  try:
451
+ client = get_bigquery_client()
452
  except Exception as e:
453
+ print(f"βœ— Failed to initialize BigQuery client: {str(e)}")
454
+ return
455
+
456
+ # Define time range: past LEADERBOARD_TIME_FRAME_DAYS (excluding today)
457
+ current_time = datetime.now(timezone.utc)
458
+ end_date = current_time.replace(hour=0, minute=0, second=0, microsecond=0)
459
+ start_date = end_date - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
460
+
461
+ try:
462
+ all_metadata = fetch_all_pr_metadata_single_query(
463
+ client, identifiers, start_date, end_date
464
+ )
465
+ except Exception as e:
466
+ print(f"βœ— Error during BigQuery fetch: {str(e)}")
467
  import traceback
468
  traceback.print_exc()
469
  return
470
+
471
  # Save results for each agent
472
  print(f"\n{'='*80}")
473
+ print(f"πŸ’Ύ Saving results to HuggingFace for each agent...")
474
  print(f"{'='*80}\n")
475
+
476
+ success_count = 0
477
+ error_count = 0
478
+ no_data_count = 0
479
+
480
+ for i, agent in enumerate(agents, 1):
481
  identifier = agent.get('github_identifier')
482
  agent_name = agent.get('name', agent.get('agent_name', 'Unknown'))
483
+
484
  if not identifier:
485
+ print(f"[{i}/{len(agents)}] Skipping agent without identifier")
486
+ error_count += 1
487
  continue
488
+
489
  metadata = all_metadata.get(identifier, [])
490
+
491
+ print(f"[{i}/{len(agents)}] {agent_name} ({identifier}):")
492
+
493
  try:
494
  if metadata:
495
+ print(f" πŸ’Ύ Saving {len(metadata)} review records...")
496
+ if save_review_metadata_to_hf(metadata, identifier):
497
+ success_count += 1
498
+ else:
499
+ error_count += 1
500
  else:
501
+ print(f" No reviews found")
502
+ no_data_count += 1
503
+
504
  except Exception as e:
505
+ print(f" βœ— Error saving {identifier}: {str(e)}")
506
  import traceback
507
  traceback.print_exc()
508
+ error_count += 1
509
  continue
510
+
511
  print(f"\n{'='*80}")
512
+ print(f"βœ… Mining complete!")
513
+ print(f" Total agents: {len(agents)}")
514
+ print(f" Successfully saved: {success_count}")
515
+ print(f" No data (skipped): {no_data_count}")
516
+ print(f" Errors: {error_count}")
517
+ print(f" BigQuery queries executed: 1")
518
  print(f"{'='*80}\n")
519
 
520
 
 
523
  # =============================================================================
524
 
525
  if __name__ == "__main__":
526
+ mine_all_agents()