zhimin-z commited on
Commit
0d6aceb
·
1 Parent(s): 170e383
Files changed (4) hide show
  1. Dockerfile +0 -34
  2. app.py +125 -1297
  3. msr.py +344 -335
  4. requirements.txt +3 -5
Dockerfile DELETED
@@ -1,34 +0,0 @@
1
- # Use official Python runtime as base image
2
- FROM python:3.12-slim
3
-
4
- # Set working directory
5
- WORKDIR /app
6
-
7
- # Install system dependencies (if needed)
8
- RUN apt-get update && apt-get install -y \
9
- git \
10
- && rm -rf /var/lib/apt/lists/*
11
-
12
- # Copy requirements.txt
13
- COPY requirements.txt .
14
-
15
- # Install Python dependencies
16
- RUN pip install --no-cache-dir -r requirements.txt
17
-
18
- # Copy application files
19
- COPY .env .
20
- COPY msr.py .
21
-
22
- # Create a non-root user for security (optional but recommended)
23
- RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
24
- USER appuser
25
-
26
- # Expose port for Gradio web interface (default is 7860)
27
- EXPOSE 7860
28
-
29
- # Set environment variables
30
- ENV GRADIO_SERVER_NAME=0.0.0.0
31
- ENV GRADIO_SERVER_PORT=7860
32
-
33
- # Run the Gradio app
34
- CMD ["python", "msr.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -3,13 +3,10 @@ from gradio_leaderboard import Leaderboard, ColumnFilter
3
  import json
4
  import os
5
  import time
6
- import tempfile
7
  import requests
8
- from datetime import datetime, timezone, timedelta
9
- from collections import defaultdict
10
  from huggingface_hub import HfApi, hf_hub_download
11
  from huggingface_hub.errors import HfHubHTTPError
12
- from datasets import load_dataset, Dataset
13
  import backoff
14
  from dotenv import load_dotenv
15
  import pandas as pd
@@ -18,7 +15,6 @@ import plotly.graph_objects as go
18
  from plotly.subplots import make_subplots
19
  from apscheduler.schedulers.background import BackgroundScheduler
20
  from apscheduler.triggers.cron import CronTrigger
21
- from google.cloud import bigquery
22
 
23
  # Load environment variables
24
  load_dotenv()
@@ -28,10 +24,7 @@ load_dotenv()
28
  # =============================================================================
29
 
30
  AGENTS_REPO = "SWE-Arena/bot_metadata" # HuggingFace dataset for agent metadata
31
- REVIEW_METADATA_REPO = "SWE-Arena/review_metadata" # HuggingFace dataset for review metadata
32
  LEADERBOARD_REPO = "SWE-Arena/leaderboard_metadata" # HuggingFace dataset for leaderboard data
33
- LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for constructing leaderboard
34
- UPDATE_TIME_FRAME_DAYS = 30 # Time frame for mining new reviews
35
 
36
  LEADERBOARD_COLUMNS = [
37
  ("Agent Name", "string"),
@@ -41,71 +34,6 @@ LEADERBOARD_COLUMNS = [
41
  ("Acceptance Rate (%)", "number"),
42
  ]
43
 
44
- # =============================================================================
45
- # JSONL FILE OPERATIONS
46
- # =============================================================================
47
-
48
- def load_jsonl(filename):
49
- """Load JSONL file and return list of dictionaries."""
50
- if not os.path.exists(filename):
51
- return []
52
-
53
- data = []
54
- with open(filename, 'r', encoding='utf-8') as f:
55
- for line in f:
56
- line = line.strip()
57
- if line:
58
- try:
59
- entry = json.loads(line)
60
- data.append(entry)
61
- except json.JSONDecodeError as e:
62
- print(f"Warning: Skipping invalid JSON line: {e}")
63
- return data
64
-
65
-
66
- def save_jsonl(filename, data):
67
- """Save list of dictionaries to JSONL file."""
68
- with open(filename, 'w', encoding='utf-8') as f:
69
- for item in data:
70
- f.write(json.dumps(item) + '\n')
71
-
72
-
73
- def cache_to_dict(cache_list):
74
- """Convert list of cache entries to dictionary by identifier."""
75
- return {entry['github_identifier']: entry for entry in cache_list}
76
-
77
-
78
- def dict_to_cache(cache_dict):
79
- """Convert dictionary back to list of values."""
80
- return list(cache_dict.values())
81
-
82
-
83
- def normalize_date_format(date_string):
84
- """
85
- Convert date strings to standardized ISO 8601 format with Z suffix.
86
- Handles both old format (2025-10-15T23:23:47.983068) and new format (2025-10-15T23:23:47Z).
87
- """
88
- if not date_string or date_string == 'N/A':
89
- return 'N/A'
90
-
91
- try:
92
- # Replace space with 'T' for ISO format compatibility
93
- date_string = date_string.replace(' ', 'T')
94
-
95
- # Fix incomplete timezone offset (+00 or -00 -> +00:00 or -00:00)
96
- if date_string[-3:-2] in ('+', '-') and ':' not in date_string[-3:]:
97
- date_string = date_string + ':00'
98
-
99
- # Parse the date string (handles both with and without microseconds)
100
- dt = datetime.fromisoformat(date_string.replace('Z', '+00:00'))
101
-
102
- # Convert to standardized format
103
- return dt.strftime('%Y-%m-%dT%H:%M:%SZ')
104
- except Exception as e:
105
- print(f"Warning: Could not parse date '{date_string}': {e}")
106
- return date_string
107
-
108
-
109
  # =============================================================================
110
  # HUGGINGFACE API WRAPPERS WITH BACKOFF
111
  # =============================================================================
@@ -125,7 +53,7 @@ def is_rate_limit_error(e):
125
  max_value=3600,
126
  giveup=lambda e: not is_rate_limit_error(e),
127
  on_backoff=lambda details: print(
128
- f"Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
129
  )
130
  )
131
  def list_repo_files_with_backoff(api, **kwargs):
@@ -141,7 +69,7 @@ def list_repo_files_with_backoff(api, **kwargs):
141
  max_value=3600,
142
  giveup=lambda e: not is_rate_limit_error(e),
143
  on_backoff=lambda details: print(
144
- f"Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
145
  )
146
  )
147
  def hf_hub_download_with_backoff(**kwargs):
@@ -149,337 +77,6 @@ def hf_hub_download_with_backoff(**kwargs):
149
  return hf_hub_download(**kwargs)
150
 
151
 
152
- @backoff.on_exception(
153
- backoff.expo,
154
- HfHubHTTPError,
155
- max_tries=8,
156
- base=300,
157
- max_value=3600,
158
- giveup=lambda e: not is_rate_limit_error(e),
159
- on_backoff=lambda details: print(
160
- f"⏳ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
161
- )
162
- )
163
- def upload_file_with_backoff(api, **kwargs):
164
- """Wrapper for api.upload_file() with exponential backoff for rate limits."""
165
- return api.upload_file(**kwargs)
166
-
167
-
168
- @backoff.on_exception(
169
- backoff.expo,
170
- HfHubHTTPError,
171
- max_tries=8,
172
- base=300,
173
- max_value=3600,
174
- giveup=lambda e: not is_rate_limit_error(e),
175
- on_backoff=lambda details: print(
176
- f"⏳ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
177
- )
178
- )
179
- def upload_folder_with_backoff(api, **kwargs):
180
- """Wrapper for api.upload_folder() with exponential backoff for rate limits."""
181
- return api.upload_folder(**kwargs)
182
-
183
-
184
- # =============================================================================
185
- # BIGQUERY FUNCTIONS
186
- # =============================================================================
187
-
188
- def get_bigquery_client():
189
- """
190
- Initialize BigQuery client using credentials from environment variable.
191
-
192
- Expects GOOGLE_APPLICATION_CREDENTIALS_JSON environment variable containing
193
- the service account JSON credentials as a string.
194
- """
195
- # Get the JSON content from environment variable
196
- creds_json = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS_JSON')
197
-
198
- if creds_json:
199
- # Create a temporary file to store credentials
200
- with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as temp_file:
201
- temp_file.write(creds_json)
202
- temp_path = temp_file.name
203
-
204
- # Set environment variable to point to temp file
205
- os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = temp_path
206
-
207
- # Initialize BigQuery client
208
- client = bigquery.Client()
209
-
210
- # Clean up temp file
211
- os.unlink(temp_path)
212
-
213
- return client
214
- else:
215
- raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")
216
-
217
-
218
- def generate_table_union_statements(start_date, end_date):
219
- """
220
- Generate UNION ALL statements for githubarchive.month tables in date range.
221
- Uses monthly tables instead of daily to drastically reduce query size.
222
-
223
- Args:
224
- start_date: Start datetime
225
- end_date: End datetime
226
-
227
- Returns:
228
- String with UNION ALL SELECT statements for all monthly tables in range
229
- """
230
- table_names = []
231
-
232
- # Start from the beginning of start_date's month
233
- current_date = start_date.replace(day=1)
234
-
235
- # End at the beginning of end_date's month (inclusive)
236
- end_month = end_date.replace(day=1)
237
-
238
- while current_date <= end_month:
239
- table_name = f"`githubarchive.month.{current_date.strftime('%Y%m')}`"
240
- table_names.append(table_name)
241
-
242
- # Move to next month
243
- if current_date.month == 12:
244
- current_date = current_date.replace(year=current_date.year + 1, month=1)
245
- else:
246
- current_date = current_date.replace(month=current_date.month + 1)
247
-
248
- # Create UNION ALL chain
249
- union_parts = [f"SELECT * FROM {table}" for table in table_names]
250
- return " UNION ALL ".join(union_parts)
251
-
252
-
253
- def fetch_all_pr_metadata_batched(client, identifiers, start_date, end_date, batch_size=100, upload_immediately=True):
254
- """
255
- Fetch PR review metadata for ALL agents using BATCHED BigQuery queries.
256
- Splits agents into smaller batches to avoid performance issues with large queries.
257
-
258
- Args:
259
- client: BigQuery client instance
260
- identifiers: List of GitHub usernames/bot identifiers
261
- start_date: Start datetime (timezone-aware)
262
- end_date: End datetime (timezone-aware)
263
- batch_size: Number of agents to process per batch (default: 100)
264
- upload_immediately: If True, upload each batch to HuggingFace immediately after processing (default: True)
265
-
266
- Returns:
267
- Dictionary mapping agent identifier to list of PR metadata
268
- """
269
- print(f"\n🔍 Using BATCHED approach: {len(identifiers)} agents in batches of {batch_size}")
270
-
271
- # Log upload mode
272
- if upload_immediately:
273
- print(f" 📤 Upload mode: IMMEDIATE (upload after each batch)")
274
- else:
275
- print(f" 📤 Upload mode: DEFERRED (upload after all batches complete)")
276
-
277
- # Split identifiers into batches
278
- batches = [identifiers[i:i + batch_size] for i in range(0, len(identifiers), batch_size)]
279
- total_batches = len(batches)
280
-
281
- print(f" Total batches: {total_batches}")
282
-
283
- # Collect results from all batches
284
- all_metadata = {}
285
- successful_batches = 0
286
- failed_batches = 0
287
-
288
- for batch_num, batch_identifiers in enumerate(batches, 1):
289
- print(f"\n📦 Processing batch {batch_num}/{total_batches} ({len(batch_identifiers)} agents)...")
290
-
291
- try:
292
- # Query this batch - process each agent in the batch
293
- batch_results = {}
294
- for identifier in batch_identifiers:
295
- review_rows = fetch_reviews_from_bigquery(client, identifier, start_date, end_date)
296
-
297
- # Extract metadata
298
- metadata_list = []
299
- seen_prs = set()
300
- for row in review_rows:
301
- url = row.url
302
- if url in seen_prs:
303
- continue
304
- seen_prs.add(url)
305
-
306
- metadata = extract_review_metadata_from_bigquery(row)
307
- metadata_list.append(metadata)
308
-
309
- if metadata_list:
310
- all_metadata[identifier] = metadata_list
311
- batch_results[identifier] = metadata_list
312
-
313
- successful_batches += 1
314
- print(f" ✓ Batch {batch_num}/{total_batches} complete: {len(batch_identifiers)} agents processed")
315
-
316
- # Upload immediately after this batch if enabled
317
- if upload_immediately and batch_results:
318
- print(f"\n 📤 Uploading batch {batch_num}/{total_batches} results to HuggingFace...")
319
- upload_success = 0
320
- upload_errors = 0
321
-
322
- for identifier, metadata_list in batch_results.items():
323
- if metadata_list:
324
- if save_review_metadata_to_hf(metadata_list, identifier):
325
- upload_success += 1
326
- else:
327
- upload_errors += 1
328
-
329
- print(f" ✓ Batch {batch_num}/{total_batches} upload complete ({upload_success} agents uploaded, {upload_errors} errors)")
330
-
331
- except Exception as e:
332
- failed_batches += 1
333
- print(f" ✗ Batch {batch_num}/{total_batches} failed: {str(e)}")
334
- print(f" Continuing with remaining batches...")
335
- continue
336
-
337
- print(f"\n📊 Batching Summary:")
338
- print(f" Total batches: {total_batches}")
339
- print(f" Successful: {successful_batches}")
340
- print(f" Failed: {failed_batches}")
341
- print(f" Total agents with data: {len(all_metadata)}")
342
-
343
- return all_metadata
344
-
345
-
346
- def fetch_reviews_from_bigquery(client, identifier, start_date, end_date):
347
- """
348
- Fetch PR review events from GitHub Archive for a SINGLE agent.
349
-
350
- NOTE: This function is designed for querying a single agent at a time.
351
- For querying multiple agents efficiently, use fetch_all_pr_metadata_batched() instead.
352
-
353
- Queries githubarchive.month.YYYYMM tables for PullRequestReviewEvent where
354
- actor.login matches the agent identifier, and joins with PR status.
355
-
356
- Args:
357
- client: BigQuery client instance
358
- identifier: GitHub username or bot identifier (e.g., 'amazon-inspector-beta[bot]')
359
- start_date: Start datetime (timezone-aware)
360
- end_date: End datetime (timezone-aware)
361
-
362
- Returns:
363
- List of review event rows with PR information including merged_at and closed_at
364
- """
365
- print(f"\n🔍 Querying BigQuery for reviews by {identifier}")
366
- print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
367
-
368
- # Generate monthly table UNION statements for review period
369
- review_union = generate_table_union_statements(start_date, end_date)
370
-
371
- # Generate monthly table UNION statements for PR status (lookback)
372
- status_start = end_date - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
373
- status_union = generate_table_union_statements(status_start, end_date)
374
-
375
- # Build comprehensive query with CTEs for PR status
376
- query = f"""
377
- WITH review_events AS (
378
- SELECT
379
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as url,
380
- COALESCE(
381
- JSON_EXTRACT_SCALAR(payload, '$.review.submitted_at'),
382
- CAST(created_at AS STRING)
383
- ) as reviewed_at,
384
- actor.login as reviewer,
385
- created_at
386
- FROM (
387
- {review_union}
388
- )
389
- WHERE type = 'PullRequestReviewEvent'
390
- AND actor.login = @identifier
391
- AND JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') IS NOT NULL
392
- ),
393
- pr_status AS (
394
- SELECT
395
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as url,
396
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.merged_at') as merged_at,
397
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.closed_at') as closed_at,
398
- created_at
399
- FROM (
400
- {status_union}
401
- )
402
- WHERE type = 'PullRequestEvent'
403
- AND JSON_EXTRACT_SCALAR(payload, '$.action') = 'closed'
404
- AND JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') IN (
405
- SELECT DISTINCT url FROM review_events
406
- )
407
- QUALIFY ROW_NUMBER() OVER (PARTITION BY url ORDER BY created_at DESC) = 1
408
- )
409
- SELECT DISTINCT
410
- re.url,
411
- re.reviewed_at,
412
- re.created_at,
413
- ps.merged_at,
414
- ps.closed_at
415
- FROM review_events re
416
- LEFT JOIN pr_status ps ON re.url = ps.url
417
- ORDER BY re.reviewed_at DESC
418
- """
419
-
420
- job_config = bigquery.QueryJobConfig(
421
- query_parameters=[
422
- bigquery.ScalarQueryParameter("identifier", "STRING", identifier)
423
- ]
424
- )
425
-
426
- # Calculate months for logging
427
- review_months = ((end_date.year - start_date.year) * 12 + end_date.month - start_date.month + 1)
428
- status_months = ((end_date.year - status_start.year) * 12 + end_date.month - status_start.month + 1)
429
- print(f" Querying {review_months} monthly review tables and {status_months} monthly status tables...")
430
-
431
- try:
432
- query_job = client.query(query, job_config=job_config)
433
- results = list(query_job.result())
434
-
435
- print(f" ✓ Found {len(results)} review events")
436
- return results
437
-
438
- except Exception as e:
439
- print(f" ✗ BigQuery error: {str(e)}")
440
- return []
441
-
442
-
443
- def extract_review_metadata_from_bigquery(review_row):
444
- """
445
- Extract minimal PR review metadata from BigQuery row.
446
-
447
- Args:
448
- review_row: BigQuery row from PullRequestReviewEvent query
449
-
450
- Returns:
451
- Dictionary with review metadata containing:
452
- - url: PR URL
453
- - reviewed_at: Review timestamp
454
- - merged_at: Merge timestamp (if merged, else None)
455
- - closed_at: Close timestamp (if closed, else None)
456
- """
457
- url = review_row.url
458
- reviewed_at = review_row.reviewed_at or review_row.created_at
459
- merged_at = getattr(review_row, 'merged_at', None)
460
- closed_at = getattr(review_row, 'closed_at', None)
461
-
462
- # Convert to ISO format if datetime and normalize
463
- if hasattr(reviewed_at, 'isoformat'):
464
- reviewed_at = reviewed_at.isoformat()
465
- reviewed_at = normalize_date_format(reviewed_at) if reviewed_at else None
466
-
467
- if merged_at and hasattr(merged_at, 'isoformat'):
468
- merged_at = merged_at.isoformat()
469
- merged_at = normalize_date_format(merged_at) if merged_at else None
470
-
471
- if closed_at and hasattr(closed_at, 'isoformat'):
472
- closed_at = closed_at.isoformat()
473
- closed_at = normalize_date_format(closed_at) if closed_at else None
474
-
475
- return {
476
- 'url': url,
477
- 'reviewed_at': reviewed_at,
478
- 'merged_at': merged_at,
479
- 'closed_at': closed_at
480
- }
481
-
482
-
483
  # =============================================================================
484
  # GITHUB API OPERATIONS
485
  # =============================================================================
@@ -574,550 +171,6 @@ def validate_github_username(identifier):
574
  except Exception as e:
575
  return False, f"Validation error: {str(e)}"
576
 
577
- def extract_review_metadata(pr):
578
- """
579
- Extract minimal PR review metadata for efficient storage.
580
- Only keeps essential fields: url, reviewed_at, merged_at, closed_at.
581
- Note: agent_name is not stored as it's inferred from the folder structure.
582
-
583
- Status can be derived from the timestamps:
584
- - merged_at: Timestamp if PR was merged, None otherwise
585
- - closed_at: Timestamp if PR was closed (either merged or just closed), None otherwise
586
-
587
- Merged PR = PR that was merged (merged_at is not None)
588
- Rejected PR = PR that was closed without merging (closed_at is not None but merged_at is None)
589
- Open PR = PR still open (both merged_at and closed_at are None)
590
- """
591
- # Extract PR metadata from search results
592
- # The GitHub search API returns PR data from /search/issues endpoint
593
- url = pr.get('url')
594
- created_at = pr.get('created_at')
595
- closed_at = pr.get('closed_at')
596
-
597
- # Check if PR has pull_request field (indicates it's a PR, not an issue)
598
- pull_request_data = pr.get('pull_request', {})
599
- merged_at = pull_request_data.get('merged_at') if pull_request_data else None
600
-
601
- return {
602
- 'url': url,
603
- 'reviewed_at': created_at, # When the PR was created (agent reviewed it)
604
- 'merged_at': merged_at,
605
- 'closed_at': closed_at
606
- }
607
-
608
-
609
- def get_pr_status_from_metadata(review_meta):
610
- """
611
- Derive PR status from merged_at and closed_at fields.
612
-
613
- Args:
614
- review_meta: Dictionary containing merged_at and closed_at fields
615
-
616
- Returns:
617
- str: 'merged', 'closed', or 'open'
618
- """
619
- merged_at = review_meta.get('merged_at')
620
- closed_at = review_meta.get('closed_at')
621
-
622
- # If merged_at is set (not None and not False), PR is merged
623
- if merged_at:
624
- return 'merged'
625
- # If closed_at is set but not merged, PR is closed without merging
626
- elif closed_at:
627
- return 'closed'
628
- # Otherwise, PR is still open
629
- else:
630
- return 'open'
631
-
632
-
633
- def calculate_review_stats_from_metadata(metadata_list):
634
- """
635
- Calculate statistics from a list of review metadata (lightweight objects).
636
- Works with minimal metadata: url, reviewed_at, merged_at, closed_at.
637
-
638
- Returns a dictionary with comprehensive review metrics.
639
-
640
- Acceptance Rate is calculated as:
641
- merged PRs / (merged PRs + rejected PRs) * 100
642
-
643
- Merged PRs = PRs that were merged (merged_at is not None)
644
- Rejected PRs = PRs that were closed without merging (closed_at is not None but merged_at is None)
645
- Pending PRs = PRs still open (both merged_at and closed_at are None) - excluded from acceptance rate
646
- """
647
- total_reviews = len(metadata_list)
648
-
649
- # Count merged PRs (merged_at is set)
650
- merged_prs = sum(1 for review_meta in metadata_list
651
- if get_pr_status_from_metadata(review_meta) == 'merged')
652
-
653
- # Count rejected PRs (closed without merging)
654
- rejected_prs = sum(1 for review_meta in metadata_list
655
- if get_pr_status_from_metadata(review_meta) == 'closed')
656
-
657
- # Count pending PRs (still open)
658
- pending_prs = sum(1 for review_meta in metadata_list
659
- if get_pr_status_from_metadata(review_meta) == 'open')
660
-
661
- # Calculate acceptance rate (exclude pending PRs)
662
- completed_prs = merged_prs + rejected_prs
663
- acceptance_rate = (merged_prs / completed_prs * 100) if completed_prs > 0 else 0
664
-
665
- return {
666
- 'total_reviews': total_reviews,
667
- 'merged_prs': merged_prs,
668
- 'pending_prs': pending_prs,
669
- 'acceptance_rate': round(acceptance_rate, 2),
670
- }
671
-
672
-
673
- def calculate_monthly_metrics_by_agent(top_n=None):
674
- """
675
- Calculate monthly metrics for all agents (or top N agents) for visualization.
676
- Loads data directly from SWE-Arena/review_metadata dataset.
677
-
678
- Args:
679
- top_n: If specified, only return metrics for the top N agents by total reviews.
680
- Agents are ranked by their total review count across all months.
681
-
682
- Returns:
683
- dict: {
684
- 'agents': list of agent names,
685
- 'months': list of month labels (e.g., '2025-01'),
686
- 'data': {
687
- agent_name: {
688
- 'acceptance_rates': list of acceptance rates by month,
689
- 'total_reviews': list of review counts by month,
690
- 'merged_prs': list of merged PR counts by month,
691
- }
692
- }
693
- }
694
- """
695
- # Load ALL agents from HuggingFace agents repo
696
- agents = load_agents_from_hf()
697
-
698
- # Create mapping from agent_identifier to agent_name
699
- identifier_to_name = {agent.get('github_identifier'): agent.get('name') for agent in agents if agent.get('github_identifier')}
700
-
701
- # Load all review metadata from review_metadata dataset
702
- all_metadata = load_review_metadata()
703
-
704
- if not all_metadata:
705
- return {'agents': [], 'months': [], 'data': {}}
706
-
707
- # Group by agent and month
708
- agent_month_data = defaultdict(lambda: defaultdict(list))
709
-
710
- for review_meta in all_metadata:
711
- agent_identifier = review_meta.get('agent_identifier')
712
- reviewed_at = review_meta.get('reviewed_at')
713
-
714
- if not agent_identifier or not reviewed_at:
715
- continue
716
-
717
- # Get agent_name from identifier
718
- agent_name = identifier_to_name.get(agent_identifier, agent_identifier)
719
-
720
- try:
721
- dt = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
722
- month_key = f"{dt.year}-{dt.month:02d}"
723
- agent_month_data[agent_name][month_key].append(review_meta)
724
- except Exception as e:
725
- print(f"Warning: Could not parse date '{reviewed_at}': {e}")
726
- continue
727
-
728
- # Get all unique months and sort them
729
- all_months = set()
730
- for agent_data in agent_month_data.values():
731
- all_months.update(agent_data.keys())
732
- months = sorted(list(all_months))
733
-
734
- # Calculate metrics for each agent and month
735
- result_data = {}
736
- for agent_name, month_dict in agent_month_data.items():
737
- acceptance_rates = []
738
- total_reviews_list = []
739
- merged_prs_list = []
740
-
741
- for month in months:
742
- reviews_in_month = month_dict.get(month, [])
743
-
744
- # Count merged PRs (merged_at is set)
745
- merged_count = sum(1 for review in reviews_in_month
746
- if get_pr_status_from_metadata(review) == 'merged')
747
-
748
- # Count rejected PRs (closed without merging)
749
- rejected_count = sum(1 for review in reviews_in_month
750
- if get_pr_status_from_metadata(review) == 'closed')
751
-
752
- # Total reviews created in this month
753
- total_count = len(reviews_in_month)
754
-
755
- # Calculate acceptance rate (exclude pending PRs)
756
- completed_count = merged_count + rejected_count
757
- acceptance_rate = (merged_count / completed_count * 100) if completed_count > 0 else None
758
-
759
- acceptance_rates.append(acceptance_rate)
760
- total_reviews_list.append(total_count)
761
- merged_prs_list.append(merged_count)
762
-
763
- result_data[agent_name] = {
764
- 'acceptance_rates': acceptance_rates,
765
- 'total_reviews': total_reviews_list,
766
- 'merged_prs': merged_prs_list,
767
- }
768
-
769
- # Filter to top N agents if specified
770
- agents_list = sorted(list(agent_month_data.keys()))
771
- if top_n is not None and top_n > 0:
772
- # Calculate total reviews for each agent across all months
773
- agent_totals = []
774
- for agent_name in agents_list:
775
- total_reviews = sum(result_data[agent_name]['total_reviews'])
776
- agent_totals.append((agent_name, total_reviews))
777
-
778
- # Sort by total reviews (descending) and take top N
779
- agent_totals.sort(key=lambda x: x[1], reverse=True)
780
- top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
781
-
782
- # Filter result_data to only include top agents
783
- result_data = {agent: result_data[agent] for agent in top_agents if agent in result_data}
784
- agents_list = top_agents
785
-
786
- return {
787
- 'agents': agents_list,
788
- 'months': months,
789
- 'data': result_data
790
- }
791
-
792
-
793
- # =============================================================================
794
- # REVIEW METADATA STORAGE & RETRIEVAL
795
- # =============================================================================
796
-
797
- def group_metadata_by_date(metadata_list):
798
- """
799
- Group review metadata by exact date (year.month.day) for efficient daily storage.
800
- Returns dict: {(year, month, day): [metadata_list]}
801
- """
802
- grouped = defaultdict(list)
803
-
804
- for review_meta in metadata_list:
805
- reviewed_at = review_meta.get('reviewed_at')
806
- if not reviewed_at:
807
- continue
808
-
809
- try:
810
- dt = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
811
- key = (dt.year, dt.month, dt.day)
812
- grouped[key].append(review_meta)
813
- except Exception as e:
814
- print(f"Warning: Could not parse date '{reviewed_at}': {e}")
815
-
816
- return dict(grouped)
817
-
818
-
819
- def save_review_metadata_to_hf(metadata_list, agent_identifier):
820
- """
821
- Save review metadata to HuggingFace dataset, organized by [agent_identifier]/YYYY.MM.DD.jsonl.
822
- Each file is stored in the agent's folder and named YYYY.MM.DD.jsonl for that day's reviews.
823
-
824
- This function APPENDS new metadata and DEDUPLICATES by URL.
825
- Uses batch upload to avoid rate limit (uploads entire folder in single commit).
826
-
827
- Args:
828
- metadata_list: List of review metadata dictionaries
829
- agent_identifier: GitHub identifier of the agent (used as folder name)
830
- """
831
- import tempfile
832
- import shutil
833
-
834
- try:
835
- token = get_hf_token()
836
- if not token:
837
- raise Exception("No HuggingFace token found")
838
-
839
- api = HfApi()
840
-
841
- # Group by exact date (year, month, day)
842
- grouped = group_metadata_by_date(metadata_list)
843
-
844
- # Create a temporary directory for batch upload
845
- temp_dir = tempfile.mkdtemp()
846
- agent_folder = os.path.join(temp_dir, agent_identifier)
847
- os.makedirs(agent_folder, exist_ok=True)
848
-
849
- try:
850
- print(f"📦 Preparing batch upload for {len(grouped)} daily files...")
851
-
852
- # Process each daily file
853
- for (review_year, month, day), day_metadata in grouped.items():
854
- filename = f"{agent_identifier}/{review_year}.{month:02d}.{day:02d}.jsonl"
855
- local_filename = os.path.join(agent_folder, f"{review_year}.{month:02d}.{day:02d}.jsonl")
856
-
857
- # Download existing file if it exists
858
- existing_metadata = []
859
- try:
860
- file_path = hf_hub_download(
861
- repo_id=REVIEW_METADATA_REPO,
862
- filename=filename,
863
- repo_type="dataset",
864
- token=token
865
- )
866
- existing_metadata = load_jsonl(file_path)
867
- print(f" Found {len(existing_metadata)} existing reviews in {filename}")
868
- except Exception:
869
- print(f" Creating new file: {filename}")
870
-
871
- # Merge and deduplicate by URL
872
- existing_by_url = {meta['url']: meta for meta in existing_metadata if meta.get('url')}
873
- new_by_url = {meta['url']: meta for meta in day_metadata if meta.get('url')}
874
-
875
- # Update with new data (new data overwrites old)
876
- existing_by_url.update(new_by_url)
877
- merged_metadata = list(existing_by_url.values())
878
-
879
- # Save to temp directory
880
- save_jsonl(local_filename, merged_metadata)
881
- print(f" Prepared {len(merged_metadata)} reviews for {filename}")
882
-
883
- # Upload entire folder using upload_folder (single commit per agent)
884
- print(f"📤 Uploading {len(grouped)} files...")
885
- upload_folder_with_backoff(
886
- api=api,
887
- folder_path=temp_dir,
888
- repo_id=REVIEW_METADATA_REPO,
889
- repo_type="dataset",
890
- commit_message=f"Update review metadata for {agent_identifier}"
891
- )
892
- print(f" ✓ Batch upload complete for {agent_identifier}")
893
-
894
- return True
895
-
896
- finally:
897
- # Always clean up temp directory
898
- if os.path.exists(temp_dir):
899
- shutil.rmtree(temp_dir)
900
-
901
- except Exception as e:
902
- print(f"✗ Error saving review metadata: {str(e)}")
903
- import traceback
904
- traceback.print_exc()
905
- return False
906
-
907
-
908
- def load_review_metadata():
909
- """
910
- Load review metadata from the last LEADERBOARD_TIME_FRAME_DAYS.
911
-
912
- Structure: [agent_identifier]/YYYY.MM.DD.jsonl
913
-
914
- Returns:
915
- List of dictionaries with 'agent_identifier' added to each review metadata.
916
- Only includes reviews from the last LEADERBOARD_TIME_FRAME_DAYS.
917
- """
918
- # Calculate cutoff date based on LEADERBOARD_TIME_FRAME_DAYS
919
- current_time = datetime.now(timezone.utc)
920
- cutoff_date = current_time - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
921
-
922
- try:
923
- api = HfApi()
924
- token = get_hf_token()
925
-
926
- # List all files in the repository
927
- files = list_repo_files_with_backoff(api=api, repo_id=REVIEW_METADATA_REPO, repo_type="dataset")
928
-
929
- # Filter for files matching the pattern: [agent_identifier]/YYYY.MM.DD.jsonl
930
- # AND within the time frame (parse date from filename)
931
- time_frame_files = []
932
- for f in files:
933
- if f.endswith('.jsonl'):
934
- parts = f.split('/')
935
- if len(parts) == 2: # [agent_identifier]/YYYY.MM.DD.jsonl
936
- filename = parts[1]
937
- # Parse date from filename: YYYY.MM.DD.jsonl
938
- try:
939
- date_part = filename.replace('.jsonl', '') # Get YYYY.MM.DD
940
- date_components = date_part.split('.')
941
- if len(date_components) == 3:
942
- file_year, file_month, file_day = map(int, date_components)
943
- file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc)
944
-
945
- # Only include files within the time frame
946
- if file_date >= cutoff_date:
947
- time_frame_files.append(f)
948
- except Exception:
949
- # If we can't parse the date, skip this file
950
- continue
951
-
952
- print(f"📥 Loading review metadata from last {LEADERBOARD_TIME_FRAME_DAYS} days ({len(time_frame_files)} daily files across all agents)...")
953
-
954
- all_metadata = []
955
- agent_identifiers_found = set()
956
-
957
- for filename in time_frame_files:
958
- try:
959
- # Extract agent_identifier from path (first part)
960
- # Format: agent_identifier/YYYY.MM.DD.jsonl
961
- parts = filename.split('/')
962
- if len(parts) != 2:
963
- print(f" Warning: Unexpected filename format: {filename}")
964
- continue
965
-
966
- agent_identifier = parts[0]
967
- agent_identifiers_found.add(agent_identifier)
968
-
969
- file_path = hf_hub_download_with_backoff(
970
- repo_id=REVIEW_METADATA_REPO,
971
- filename=filename,
972
- repo_type="dataset",
973
- token=token
974
- )
975
- day_metadata = load_jsonl(file_path)
976
-
977
- # Add agent_identifier and filter by time frame (double-check)
978
- filtered_count = 0
979
- for review_meta in day_metadata:
980
- # Validate review date is within time frame
981
- reviewed_at = review_meta.get('reviewed_at')
982
- if reviewed_at:
983
- try:
984
- dt = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
985
- if dt < cutoff_date:
986
- continue # Skip reviews older than time frame
987
- except Exception:
988
- pass # Keep reviews with unparseable dates
989
-
990
- review_meta['agent_identifier'] = agent_identifier
991
- all_metadata.append(review_meta)
992
- filtered_count += 1
993
-
994
- print(f" ✓ Loaded {filtered_count} reviews from {filename}")
995
- except Exception as e:
996
- print(f" Warning: Could not load {filename}: {str(e)}")
997
-
998
- print(f"✓ Loaded {len(all_metadata)} total reviews from last {LEADERBOARD_TIME_FRAME_DAYS} days")
999
-
1000
- return all_metadata
1001
-
1002
- except Exception as e:
1003
- print(f"✗ Error loading review metadata from last {LEADERBOARD_TIME_FRAME_DAYS} days: {str(e)}")
1004
- return []
1005
-
1006
-
1007
- def get_latest_review_date_for_agent(agent_identifier):
1008
- """
1009
- Get the latest review creation date for an agent from stored metadata.
1010
- Used for incremental updates - only fetch reviews newer than this date.
1011
-
1012
- Structure: [agent_identifier]/YYYY.MM.DD.jsonl
1013
-
1014
- Args:
1015
- agent_identifier: GitHub identifier of the agent
1016
-
1017
- Returns:
1018
- datetime or None if no existing reviews found.
1019
- """
1020
- try:
1021
- api = HfApi()
1022
- token = get_hf_token()
1023
-
1024
- # List all files in the repository
1025
- files = list_repo_files_with_backoff(api=api, repo_id=REVIEW_METADATA_REPO, repo_type="dataset")
1026
-
1027
- # Filter for files in this agent's folder
1028
- # New structure: [agent_identifier]/YYYY.MM.DD.jsonl
1029
- agent_pattern = f"{agent_identifier}/"
1030
- agent_files = [f for f in files if f.startswith(agent_pattern) and f.endswith('.jsonl')]
1031
-
1032
- if not agent_files:
1033
- return None
1034
-
1035
- # Find latest created_at across all files
1036
- latest_date = None
1037
- for filename in agent_files:
1038
- try:
1039
- file_path = hf_hub_download_with_backoff(
1040
- repo_id=REVIEW_METADATA_REPO,
1041
- filename=filename,
1042
- repo_type="dataset",
1043
- token=token
1044
- )
1045
- metadata = load_jsonl(file_path)
1046
-
1047
- for review_meta in metadata:
1048
- reviewed_at = review_meta.get("reviewed_at")
1049
- if reviewed_at:
1050
- try:
1051
- dt = datetime.fromisoformat(reviewed_at.replace("Z", "+00:00"))
1052
- if latest_date is None or dt > latest_date:
1053
- latest_date = dt
1054
- except Exception:
1055
- continue
1056
- except Exception:
1057
- continue
1058
-
1059
- return latest_date
1060
-
1061
- except Exception:
1062
- return None
1063
-
1064
-
1065
- def get_daily_files_last_time_frame(agent_identifier):
1066
- """
1067
- Get list of daily file paths for an agent from the configured time frame.
1068
-
1069
- Args:
1070
- agent_identifier: GitHub identifier of the agent
1071
-
1072
- Returns:
1073
- List of file paths in format: [agent_identifier]/YYYY.MM.DD.jsonl
1074
- """
1075
- try:
1076
- api = HfApi()
1077
- token = get_hf_token()
1078
-
1079
- # Calculate date range using configured time frame
1080
- today = datetime.now(timezone.utc)
1081
- cutoff_date = today - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
1082
-
1083
- # List all files in the repository
1084
- files = list_repo_files_with_backoff(api=api, repo_id=REVIEW_METADATA_REPO, repo_type="dataset")
1085
-
1086
- # Filter for files in this agent's folder
1087
- agent_pattern = f"{agent_identifier}/"
1088
- agent_files = [f for f in files if f.startswith(agent_pattern) and f.endswith('.jsonl')]
1089
-
1090
- # Filter by date range (extract date from filename)
1091
- recent_files = []
1092
- for filename in agent_files:
1093
- try:
1094
- # Extract date from filename: YYYY.MM.DD.jsonl
1095
- parts = filename.split('/')
1096
- if len(parts) != 2:
1097
- continue
1098
-
1099
- date_part = parts[1].replace('.jsonl', '') # Get YYYY.MM.DD
1100
- date_components = date_part.split('.')
1101
- if len(date_components) != 3:
1102
- continue
1103
-
1104
- file_year, file_month, file_day = map(int, date_components)
1105
- file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc)
1106
-
1107
- # Include if within configured time frame
1108
- if cutoff_date <= file_date <= today:
1109
- recent_files.append(filename)
1110
- except Exception:
1111
- continue
1112
-
1113
- return recent_files
1114
-
1115
- except Exception as e:
1116
- print(f"Error getting daily files: {str(e)}")
1117
- return []
1118
-
1119
-
1120
-
1121
 
1122
  # =============================================================================
1123
  # HUGGINGFACE DATASET OPERATIONS
@@ -1163,7 +216,7 @@ def load_agents_from_hf():
1163
  print(f"Warning: Could not load {json_file}: {str(e)}")
1164
  continue
1165
 
1166
- print(f"Loaded {len(agents)} agents from HuggingFace")
1167
  return agents
1168
 
1169
  except Exception as e:
@@ -1171,8 +224,6 @@ def load_agents_from_hf():
1171
  return None
1172
 
1173
 
1174
-
1175
-
1176
  def get_hf_token():
1177
  """Get HuggingFace token from environment variables."""
1178
  token = os.getenv('HF_TOKEN')
@@ -1209,18 +260,18 @@ def upload_with_retry(api, path_or_fileobj, path_in_repo, repo_id, repo_type, to
1209
  token=token
1210
  )
1211
  if attempt > 0:
1212
- print(f" Upload succeeded on attempt {attempt + 1}/{max_retries}")
1213
  return True
1214
 
1215
  except Exception as e:
1216
  if attempt < max_retries - 1:
1217
  wait_time = delay + random.uniform(0, 1.0)
1218
- print(f" ⚠️ Upload failed (attempt {attempt + 1}/{max_retries}): {str(e)}")
1219
- print(f" Retrying in {wait_time:.1f} seconds...")
1220
  time.sleep(wait_time)
1221
  delay = min(delay * 2, 60.0) # Exponential backoff, max 60s
1222
  else:
1223
- print(f" Upload failed after {max_retries} attempts: {str(e)}")
1224
  raise
1225
 
1226
 
@@ -1250,64 +301,7 @@ def save_agent_to_hf(data):
1250
  repo_type="dataset",
1251
  token=token
1252
  )
1253
- print(f"Saved agent to HuggingFace: {filename}")
1254
- return True
1255
- finally:
1256
- # Always clean up local file, even if upload fails
1257
- if os.path.exists(filename):
1258
- os.remove(filename)
1259
-
1260
- except Exception as e:
1261
- print(f"✗ Error saving agent: {str(e)}")
1262
- return False
1263
-
1264
-
1265
- def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
1266
- """
1267
- Save leaderboard data and monthly metrics to HuggingFace dataset as swe-review.json.
1268
-
1269
- Args:
1270
- leaderboard_dict: Dictionary of agent stats from construct_leaderboard_from_metadata()
1271
- monthly_metrics: Monthly metrics data from calculate_monthly_metrics_by_agent()
1272
-
1273
- Returns:
1274
- bool: True if successful, False otherwise
1275
- """
1276
- try:
1277
- api = HfApi()
1278
- token = get_hf_token()
1279
-
1280
- if not token:
1281
- raise Exception("No HuggingFace token found. Please set HF_TOKEN in your Space settings.")
1282
-
1283
- filename = "swe-review.json"
1284
-
1285
- # Combine leaderboard and monthly metrics
1286
- combined_data = {
1287
- 'last_updated': datetime.now(timezone.utc).isoformat(),
1288
- 'leaderboard': leaderboard_dict,
1289
- 'monthly_metrics': monthly_metrics,
1290
- 'metadata': {
1291
- 'leaderboard_time_frame_days': LEADERBOARD_TIME_FRAME_DAYS,
1292
- 'update_time_frame_days': UPDATE_TIME_FRAME_DAYS
1293
- }
1294
- }
1295
-
1296
- # Save locally first
1297
- with open(filename, 'w') as f:
1298
- json.dump(combined_data, f, indent=2)
1299
-
1300
- try:
1301
- # Upload to HuggingFace
1302
- upload_with_retry(
1303
- api=api,
1304
- path_or_fileobj=filename,
1305
- path_in_repo=filename,
1306
- repo_id=LEADERBOARD_REPO,
1307
- repo_type="dataset",
1308
- token=token
1309
- )
1310
- print(f"✓ Saved leaderboard data to HuggingFace: {filename}")
1311
  return True
1312
  finally:
1313
  # Always clean up local file, even if upload fails
@@ -1315,9 +309,7 @@ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
1315
  os.remove(filename)
1316
 
1317
  except Exception as e:
1318
- print(f"Error saving leaderboard data: {str(e)}")
1319
- import traceback
1320
- traceback.print_exc()
1321
  return False
1322
 
1323
 
@@ -1346,205 +338,15 @@ def load_leaderboard_data_from_hf():
1346
  data = json.load(f)
1347
 
1348
  last_updated = data.get('last_updated', 'Unknown')
1349
- print(f"Loaded leaderboard data from HuggingFace (last updated: {last_updated})")
1350
 
1351
  return data
1352
 
1353
  except Exception as e:
1354
- print(f"⚠️ Could not load leaderboard data from HuggingFace: {str(e)}")
1355
  return None
1356
 
1357
 
1358
- def save_leaderboard_and_metrics_to_hf():
1359
- """
1360
- Creates a comprehensive JSON file with both leaderboard stats and monthly metrics.
1361
- If the file exists, it will be overwritten.
1362
-
1363
- Returns:
1364
- bool: True if successful, False otherwise
1365
- """
1366
- import io
1367
-
1368
- try:
1369
- token = get_hf_token()
1370
- if not token:
1371
- raise Exception("No HuggingFace token found")
1372
-
1373
- api = HfApi(token=token)
1374
-
1375
- print(f"\n{'='*80}")
1376
- print(f"📊 Preparing leaderboard and metrics data for upload...")
1377
- print(f"{'='*80}\n")
1378
-
1379
- # Get leaderboard data from review metadata
1380
- print(" Constructing leaderboard data from review metadata...")
1381
- leaderboard_data = construct_leaderboard_from_metadata()
1382
-
1383
- # Get monthly metrics data (all agents, not just top N)
1384
- print(" Calculating monthly metrics from review metadata...")
1385
- monthly_metrics = calculate_monthly_metrics_by_agent(top_n=None)
1386
-
1387
- # Combine into a single structure
1388
- combined_data = {
1389
- "leaderboard": leaderboard_data,
1390
- "monthly_metrics": monthly_metrics,
1391
- "metadata": {
1392
- "last_updated": datetime.now(timezone.utc).isoformat(),
1393
- "time_frame_days": LEADERBOARD_TIME_FRAME_DAYS,
1394
- "total_agents": len(leaderboard_data)
1395
- }
1396
- }
1397
-
1398
- print(f" Leaderboard entries: {len(leaderboard_data)}")
1399
- print(f" Monthly metrics for: {len(monthly_metrics['agents'])} agents")
1400
- print(f" Time frame: {LEADERBOARD_TIME_FRAME_DAYS} days")
1401
-
1402
- # Convert to JSON and create file-like object
1403
- json_content = json.dumps(combined_data, indent=2)
1404
- file_like_object = io.BytesIO(json_content.encode('utf-8'))
1405
-
1406
- # Upload to HuggingFace (will overwrite if exists)
1407
- print(f"\n🤗 Uploading to {LEADERBOARD_REPO}...")
1408
- upload_file_with_backoff(
1409
- api=api,
1410
- path_or_fileobj=file_like_object,
1411
- path_in_repo="swe-review.json",
1412
- repo_id=LEADERBOARD_REPO,
1413
- repo_type="dataset",
1414
- token=token,
1415
- commit_message=f"Update leaderboard data - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC"
1416
- )
1417
-
1418
- print(f" ✓ Successfully uploaded swe-review.json")
1419
- print(f"{'='*80}\n")
1420
-
1421
- return True
1422
-
1423
- except Exception as e:
1424
- print(f"✗ Error saving leaderboard and metrics: {str(e)}")
1425
- import traceback
1426
- traceback.print_exc()
1427
- return False
1428
-
1429
-
1430
-
1431
- # =============================================================================
1432
- # DATA MANAGEMENT
1433
- # =============================================================================
1434
-
1435
- def mine_all_agents():
1436
- """
1437
- Mine review metadata for all agents within UPDATE_TIME_FRAME_DAYS and save to HuggingFace.
1438
- Uses BATCHED BigQuery queries for all agents (efficient approach).
1439
- """
1440
- # Load agent metadata from HuggingFace
1441
- agents = load_agents_from_hf()
1442
- if not agents:
1443
- print("No agents found in HuggingFace dataset")
1444
- return
1445
-
1446
- # Extract all identifiers
1447
- identifiers = [agent['github_identifier'] for agent in agents if agent.get('github_identifier')]
1448
- if not identifiers:
1449
- print("No valid agent identifiers found")
1450
- return
1451
-
1452
- print(f"\n{'='*80}")
1453
- print(f"Starting review metadata mining for {len(identifiers)} agents")
1454
- print(f"Time frame: Last {UPDATE_TIME_FRAME_DAYS} days")
1455
- print(f"Data source: BigQuery + GitHub Archive (BATCHED QUERIES)")
1456
- print(f"{'='*80}\n")
1457
-
1458
- # Initialize BigQuery client
1459
- try:
1460
- client = get_bigquery_client()
1461
- except Exception as e:
1462
- print(f"✗ Failed to initialize BigQuery client: {str(e)}")
1463
- return
1464
-
1465
- # Define time range: past UPDATE_TIME_FRAME_DAYS (excluding today)
1466
- current_time = datetime.now(timezone.utc)
1467
- end_date = current_time.replace(hour=0, minute=0, second=0, microsecond=0)
1468
- start_date = end_date - timedelta(days=UPDATE_TIME_FRAME_DAYS)
1469
-
1470
- try:
1471
- # Use batched approach for better performance
1472
- # upload_immediately=True means each batch uploads to HuggingFace right after BigQuery completes
1473
- all_metadata = fetch_all_pr_metadata_batched(
1474
- client, identifiers, start_date, end_date, batch_size=100, upload_immediately=True
1475
- )
1476
-
1477
- # Calculate summary statistics
1478
- total_prs = sum(len(metadata_list) for metadata_list in all_metadata.values())
1479
- agents_with_data = sum(1 for metadata_list in all_metadata.values() if metadata_list)
1480
-
1481
- print(f"\n{'='*80}")
1482
- print(f"✅ BigQuery mining and upload complete!")
1483
- print(f" Total agents: {len(agents)}")
1484
- print(f" Agents with data: {agents_with_data}")
1485
- print(f" Total PRs found: {total_prs}")
1486
- print(f"{'='*80}\n")
1487
-
1488
- except Exception as e:
1489
- print(f"✗ Error during BigQuery fetch: {str(e)}")
1490
- import traceback
1491
- traceback.print_exc()
1492
- return
1493
-
1494
- # After mining is complete, save leaderboard and metrics to HuggingFace
1495
- print(f"📤 Uploading leaderboard and metrics data...")
1496
- if save_leaderboard_and_metrics_to_hf():
1497
- print(f"✓ Leaderboard and metrics successfully uploaded to {LEADERBOARD_REPO}")
1498
- else:
1499
- print(f"⚠️ Failed to upload leaderboard and metrics data")
1500
-
1501
-
1502
- def construct_leaderboard_from_metadata():
1503
- """
1504
- Construct leaderboard from stored review metadata instead of fetching all reviews.
1505
- Much more memory-efficient and faster.
1506
-
1507
- Returns dictionary of agent stats.
1508
- """
1509
- print("📊 Constructing leaderboard from review metadata...")
1510
-
1511
- # Load agents
1512
- agents = load_agents_from_hf()
1513
- if not agents:
1514
- print("⚠️ No agents found")
1515
- return {}
1516
-
1517
- print(f"✓ Loaded {len(agents)} agents")
1518
-
1519
- # Load all review metadata
1520
- all_metadata = load_review_metadata()
1521
- print(f"✓ Loaded {len(all_metadata)} review metadata entries")
1522
-
1523
- cache_dict = {}
1524
-
1525
- for agent in agents:
1526
- identifier = agent.get('github_identifier')
1527
- agent_name = agent.get('name', 'Unknown')
1528
-
1529
- # Filter metadata for this agent
1530
- bot_metadata = [review for review in all_metadata if review.get("agent_identifier") == identifier]
1531
-
1532
- # Calculate stats
1533
- stats = calculate_review_stats_from_metadata(bot_metadata)
1534
-
1535
- cache_dict[identifier] = {
1536
- 'name': agent_name,
1537
- 'name': agent_name, # Store both for compatibility
1538
- 'website': agent.get('website', 'N/A'),
1539
- 'github_identifier': identifier,
1540
- **stats
1541
- }
1542
-
1543
- print(f"✓ Constructed cache with {len(cache_dict)} agent entries")
1544
-
1545
- return cache_dict
1546
-
1547
-
1548
  # =============================================================================
1549
  # UI FUNCTIONS
1550
  # =============================================================================
@@ -1560,36 +362,47 @@ def create_monthly_metrics_plot(top_n=5):
1560
  Args:
1561
  top_n: Number of top agents to show (default: 5)
1562
  """
1563
- # Try loading from saved dataset first
1564
  saved_data = load_leaderboard_data_from_hf()
1565
 
1566
- if saved_data and 'monthly_metrics' in saved_data:
1567
- metrics = saved_data['monthly_metrics']
1568
- print(f"📈 Loaded monthly metrics from saved dataset")
1569
-
1570
- # Apply top_n filter if specified
1571
- if top_n is not None and top_n > 0 and metrics.get('agents'):
1572
- # Calculate total reviews for each agent
1573
- agent_totals = []
1574
- for agent_name in metrics['agents']:
1575
- agent_data = metrics['data'].get(agent_name, {})
1576
- total_reviews = sum(agent_data.get('total_reviews', []))
1577
- agent_totals.append((agent_name, total_reviews))
1578
-
1579
- # Sort by total reviews and take top N
1580
- agent_totals.sort(key=lambda x: x[1], reverse=True)
1581
- top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
1582
-
1583
- # Filter metrics to only include top agents
1584
- metrics = {
1585
- 'agents': top_agents,
1586
- 'months': metrics['months'],
1587
- 'data': {agent: metrics['data'][agent] for agent in top_agents if agent in metrics['data']}
1588
- }
1589
- else:
1590
- # Fallback: calculate from metadata if saved data doesn't exist
1591
- print(f"📈 Saved data not available, calculating monthly metrics from metadata...")
1592
- metrics = calculate_monthly_metrics_by_agent(top_n=top_n)
 
 
 
 
 
 
 
 
 
 
 
1593
 
1594
  if not metrics['agents'] or not metrics['months']:
1595
  # Return an empty figure with a message
@@ -1712,24 +525,23 @@ def create_monthly_metrics_plot(top_n=5):
1712
  def get_leaderboard_dataframe():
1713
  """
1714
  Load leaderboard from saved dataset and convert to pandas DataFrame for display.
1715
- Falls back to constructing from metadata if saved data is not available.
1716
  Returns formatted DataFrame sorted by total reviews.
1717
  """
1718
- # Try loading from saved dataset first
1719
  saved_data = load_leaderboard_data_from_hf()
1720
 
1721
- if saved_data and 'leaderboard' in saved_data:
1722
- cache_dict = saved_data['leaderboard']
1723
- print(f"📊 Loaded leaderboard from saved dataset (last updated: {saved_data.get('last_updated', 'Unknown')})")
1724
- else:
1725
- # Fallback: construct from metadata if saved data doesn't exist
1726
- print(f"📊 Saved data not available, constructing leaderboard from metadata...")
1727
- cache_dict = construct_leaderboard_from_metadata()
1728
 
1729
- print(f"📊 Cache dict size: {len(cache_dict)}")
 
 
1730
 
1731
  if not cache_dict:
1732
- print("⚠️ WARNING: cache_dict is empty!")
1733
  # Return empty DataFrame with correct columns if no data
1734
  column_names = [col[0] for col in LEADERBOARD_COLUMNS]
1735
  return pd.DataFrame(columns=column_names)
@@ -1754,8 +566,8 @@ def get_leaderboard_dataframe():
1754
  data.get('acceptance_rate', 0.0),
1755
  ])
1756
 
1757
- print(f"📉 Filtered out {filtered_count} agents with 0 reviews")
1758
- print(f"📈 Leaderboard will show {len(rows)} agents")
1759
 
1760
  # Create DataFrame
1761
  column_names = [col[0] for col in LEADERBOARD_COLUMNS]
@@ -1771,7 +583,7 @@ def get_leaderboard_dataframe():
1771
  if "Total Reviews" in df.columns and not df.empty:
1772
  df = df.sort_values(by="Total Reviews", ascending=False).reset_index(drop=True)
1773
 
1774
- print(f"Final DataFrame shape: {df.shape}")
1775
  print("="*60 + "\n")
1776
 
1777
  return df
@@ -1780,17 +592,17 @@ def get_leaderboard_dataframe():
1780
  def submit_agent(identifier, agent_name, developer, website):
1781
  """
1782
  Submit a new agent to the leaderboard.
1783
- Validates input, saves submission, and fetches PR metadata (memory-efficient).
1784
  """
1785
  # Validate required fields
1786
  if not identifier or not identifier.strip():
1787
- return " GitHub identifier is required", get_leaderboard_dataframe()
1788
  if not agent_name or not agent_name.strip():
1789
- return " Agent name is required", get_leaderboard_dataframe()
1790
  if not developer or not developer.strip():
1791
- return " Developer name is required", get_leaderboard_dataframe()
1792
  if not website or not website.strip():
1793
- return " Website URL is required", get_leaderboard_dataframe()
1794
 
1795
  # Clean inputs
1796
  identifier = identifier.strip()
@@ -1801,14 +613,14 @@ def submit_agent(identifier, agent_name, developer, website):
1801
  # Validate GitHub identifier
1802
  is_valid, message = validate_github_username(identifier)
1803
  if not is_valid:
1804
- return f" {message}", get_leaderboard_dataframe()
1805
 
1806
  # Check for duplicates by loading agents from HuggingFace
1807
  agents = load_agents_from_hf()
1808
  if agents:
1809
  existing_names = {agent['github_identifier'] for agent in agents}
1810
  if identifier in existing_names:
1811
- return f"⚠️ Agent with identifier '{identifier}' already exists", get_leaderboard_dataframe()
1812
 
1813
  # Create submission
1814
  submission = {
@@ -1816,62 +628,78 @@ def submit_agent(identifier, agent_name, developer, website):
1816
  'developer': developer,
1817
  'github_identifier': identifier,
1818
  'website': website,
 
1819
  }
1820
 
1821
  # Save to HuggingFace
1822
  if not save_agent_to_hf(submission):
1823
- return " Failed to save submission", get_leaderboard_dataframe()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1824
 
1825
- # Reconstruct and save leaderboard data with new agent
1826
  try:
1827
- print(f"📊 Reconstructing leaderboard with new agent...")
1828
- leaderboard_dict = construct_leaderboard_from_metadata()
1829
- monthly_metrics = calculate_monthly_metrics_by_agent()
1830
- save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
1831
- print(f" Leaderboard data updated")
 
 
1832
  except Exception as e:
1833
- print(f"⚠️ Failed to update leaderboard data: {str(e)}")
1834
 
1835
- # Return success message - data will be populated by daily incremental updates
1836
- return f"✅ Successfully submitted {agent_name}! Review data will be populated by the next daily incremental update.", get_leaderboard_dataframe()
1837
 
1838
 
1839
  # =============================================================================
1840
  # GRADIO APPLICATION
1841
  # =============================================================================
1842
 
1843
- print(f"\n🚀 Starting SWE Agent PR Leaderboard")
1844
- print(f" Leaderboard time frame: {LEADERBOARD_TIME_FRAME_DAYS} days ({LEADERBOARD_TIME_FRAME_DAYS // 30} months)")
1845
- print(f" Mining update frequency: Every {UPDATE_TIME_FRAME_DAYS} days\n")
1846
 
1847
- # Start APScheduler for monthly PR mining at 12:00 AM UTC every 1st of the month
1848
  scheduler = BackgroundScheduler(timezone="UTC")
1849
  scheduler.add_job(
1850
- mine_all_agents,
1851
- trigger=CronTrigger(day=1, hour=0, minute=0), # 12:00 AM UTC every 1st of the month
1852
- id='monthly_review_mining',
1853
- name='Monthly Review Mining',
1854
  replace_existing=True
1855
  )
1856
  scheduler.start()
1857
  print(f"\n{'='*80}")
1858
- print(f"Scheduler initialized successfully")
1859
- print(f"⛏️ Mining schedule: Every 1st of the month at 12:00 AM UTC")
1860
- print(f"📥 On startup: Only loads cached data from HuggingFace (no mining)")
1861
  print(f"{'='*80}\n")
1862
 
1863
  # Create Gradio interface
1864
  with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app:
1865
- total_months = LEADERBOARD_TIME_FRAME_DAYS // 30
1866
-
1867
- gr.Markdown("# 🏆 SWE Agent Review Leaderboard")
1868
  gr.Markdown(f"Track and compare GitHub PR review acceptance statistics for SWE agents")
1869
-
1870
  with gr.Tabs():
1871
 
1872
  # Leaderboard Tab
1873
- with gr.Tab("📊 Leaderboard"):
1874
- gr.Markdown(f"*All statistics are based on reviews from the last {total_months} months*")
1875
  leaderboard_table = Leaderboard(
1876
  value=pd.DataFrame(columns=[col[0] for col in LEADERBOARD_COLUMNS]), # Empty initially
1877
  datatype=LEADERBOARD_COLUMNS,
@@ -1897,7 +725,7 @@ with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as
1897
 
1898
  # Monthly Metrics Section
1899
  gr.Markdown("---") # Divider
1900
- gr.Markdown("### 📈 Monthly Performance - Top 5 Agents")
1901
  gr.Markdown("*Shows acceptance rate trends and review volumes for the most active agents*")
1902
 
1903
  monthly_metrics_plot = gr.Plot(label="Monthly Metrics")
@@ -1911,32 +739,32 @@ with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as
1911
 
1912
 
1913
  # Submit Agent Tab
1914
- with gr.Tab("Submit Agent"):
1915
-
1916
  gr.Markdown("### Submit Your Agent")
1917
- gr.Markdown("Fill in the details below to add your agent to the leaderboard. Make sure you're logged in to HuggingFace CLI on your machine.")
1918
-
1919
  with gr.Row():
1920
  with gr.Column():
1921
  github_input = gr.Textbox(
1922
  label="GitHub Identifier*",
1923
- placeholder="Your agent username (e.g., my-agent-bot)"
1924
  )
1925
  name_input = gr.Textbox(
1926
  label="Agent Name*",
1927
  placeholder="Your agent's display name"
1928
  )
1929
-
1930
  with gr.Column():
1931
  developer_input = gr.Textbox(
1932
  label="Developer*",
1933
  placeholder="Your developer or team name"
1934
  )
1935
  website_input = gr.Textbox(
1936
- label="Website",
1937
  placeholder="https://your-agent-website.com"
1938
  )
1939
-
1940
  submit_button = gr.Button(
1941
  "Submit Agent",
1942
  variant="primary"
@@ -1945,7 +773,7 @@ with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as
1945
  label="Submission Status",
1946
  interactive=False
1947
  )
1948
-
1949
  # Event handler
1950
  submit_button.click(
1951
  fn=submit_agent,
@@ -1956,4 +784,4 @@ with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as
1956
 
1957
  # Launch application
1958
  if __name__ == "__main__":
1959
- app.launch()
 
3
  import json
4
  import os
5
  import time
 
6
  import requests
7
+ from datetime import datetime, timezone
 
8
  from huggingface_hub import HfApi, hf_hub_download
9
  from huggingface_hub.errors import HfHubHTTPError
 
10
  import backoff
11
  from dotenv import load_dotenv
12
  import pandas as pd
 
15
  from plotly.subplots import make_subplots
16
  from apscheduler.schedulers.background import BackgroundScheduler
17
  from apscheduler.triggers.cron import CronTrigger
 
18
 
19
  # Load environment variables
20
  load_dotenv()
 
24
  # =============================================================================
25
 
26
  AGENTS_REPO = "SWE-Arena/bot_metadata" # HuggingFace dataset for agent metadata
 
27
  LEADERBOARD_REPO = "SWE-Arena/leaderboard_metadata" # HuggingFace dataset for leaderboard data
 
 
28
 
29
  LEADERBOARD_COLUMNS = [
30
  ("Agent Name", "string"),
 
34
  ("Acceptance Rate (%)", "number"),
35
  ]
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  # =============================================================================
38
  # HUGGINGFACE API WRAPPERS WITH BACKOFF
39
  # =============================================================================
 
53
  max_value=3600,
54
  giveup=lambda e: not is_rate_limit_error(e),
55
  on_backoff=lambda details: print(
56
+ f"Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
57
  )
58
  )
59
  def list_repo_files_with_backoff(api, **kwargs):
 
69
  max_value=3600,
70
  giveup=lambda e: not is_rate_limit_error(e),
71
  on_backoff=lambda details: print(
72
+ f"Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
73
  )
74
  )
75
  def hf_hub_download_with_backoff(**kwargs):
 
77
  return hf_hub_download(**kwargs)
78
 
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  # =============================================================================
81
  # GITHUB API OPERATIONS
82
  # =============================================================================
 
171
  except Exception as e:
172
  return False, f"Validation error: {str(e)}"
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
  # =============================================================================
176
  # HUGGINGFACE DATASET OPERATIONS
 
216
  print(f"Warning: Could not load {json_file}: {str(e)}")
217
  continue
218
 
219
+ print(f"Loaded {len(agents)} agents from HuggingFace")
220
  return agents
221
 
222
  except Exception as e:
 
224
  return None
225
 
226
 
 
 
227
  def get_hf_token():
228
  """Get HuggingFace token from environment variables."""
229
  token = os.getenv('HF_TOKEN')
 
260
  token=token
261
  )
262
  if attempt > 0:
263
+ print(f" Upload succeeded on attempt {attempt + 1}/{max_retries}")
264
  return True
265
 
266
  except Exception as e:
267
  if attempt < max_retries - 1:
268
  wait_time = delay + random.uniform(0, 1.0)
269
+ print(f" Upload failed (attempt {attempt + 1}/{max_retries}): {str(e)}")
270
+ print(f" Retrying in {wait_time:.1f} seconds...")
271
  time.sleep(wait_time)
272
  delay = min(delay * 2, 60.0) # Exponential backoff, max 60s
273
  else:
274
+ print(f" Upload failed after {max_retries} attempts: {str(e)}")
275
  raise
276
 
277
 
 
301
  repo_type="dataset",
302
  token=token
303
  )
304
+ print(f"Saved agent to HuggingFace: {filename}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  return True
306
  finally:
307
  # Always clean up local file, even if upload fails
 
309
  os.remove(filename)
310
 
311
  except Exception as e:
312
+ print(f"Error saving agent: {str(e)}")
 
 
313
  return False
314
 
315
 
 
338
  data = json.load(f)
339
 
340
  last_updated = data.get('last_updated', 'Unknown')
341
+ print(f"Loaded leaderboard data from HuggingFace (last updated: {last_updated})")
342
 
343
  return data
344
 
345
  except Exception as e:
346
+ print(f"Could not load leaderboard data from HuggingFace: {str(e)}")
347
  return None
348
 
349
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  # =============================================================================
351
  # UI FUNCTIONS
352
  # =============================================================================
 
362
  Args:
363
  top_n: Number of top agents to show (default: 5)
364
  """
365
+ # Load from saved dataset
366
  saved_data = load_leaderboard_data_from_hf()
367
 
368
+ if not saved_data or 'monthly_metrics' not in saved_data:
369
+ # Return an empty figure with a message
370
+ fig = go.Figure()
371
+ fig.add_annotation(
372
+ text="No data available for visualization",
373
+ xref="paper", yref="paper",
374
+ x=0.5, y=0.5, showarrow=False,
375
+ font=dict(size=16)
376
+ )
377
+ fig.update_layout(
378
+ title=None,
379
+ xaxis_title=None,
380
+ height=500
381
+ )
382
+ return fig
383
+
384
+ metrics = saved_data['monthly_metrics']
385
+ print(f"Loaded monthly metrics from saved dataset")
386
+
387
+ # Apply top_n filter if specified
388
+ if top_n is not None and top_n > 0 and metrics.get('agents'):
389
+ # Calculate total reviews for each agent
390
+ agent_totals = []
391
+ for agent_name in metrics['agents']:
392
+ agent_data = metrics['data'].get(agent_name, {})
393
+ total_reviews = sum(agent_data.get('total_reviews', []))
394
+ agent_totals.append((agent_name, total_reviews))
395
+
396
+ # Sort by total reviews and take top N
397
+ agent_totals.sort(key=lambda x: x[1], reverse=True)
398
+ top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
399
+
400
+ # Filter metrics to only include top agents
401
+ metrics = {
402
+ 'agents': top_agents,
403
+ 'months': metrics['months'],
404
+ 'data': {agent: metrics['data'][agent] for agent in top_agents if agent in metrics['data']}
405
+ }
406
 
407
  if not metrics['agents'] or not metrics['months']:
408
  # Return an empty figure with a message
 
525
  def get_leaderboard_dataframe():
526
  """
527
  Load leaderboard from saved dataset and convert to pandas DataFrame for display.
 
528
  Returns formatted DataFrame sorted by total reviews.
529
  """
530
+ # Load from saved dataset
531
  saved_data = load_leaderboard_data_from_hf()
532
 
533
+ if not saved_data or 'leaderboard' not in saved_data:
534
+ print(f"No leaderboard data available")
535
+ # Return empty DataFrame with correct columns if no data
536
+ column_names = [col[0] for col in LEADERBOARD_COLUMNS]
537
+ return pd.DataFrame(columns=column_names)
 
 
538
 
539
+ cache_dict = saved_data['leaderboard']
540
+ print(f"Loaded leaderboard from saved dataset (last updated: {saved_data.get('last_updated', 'Unknown')})")
541
+ print(f"Cache dict size: {len(cache_dict)}")
542
 
543
  if not cache_dict:
544
+ print("WARNING: cache_dict is empty!")
545
  # Return empty DataFrame with correct columns if no data
546
  column_names = [col[0] for col in LEADERBOARD_COLUMNS]
547
  return pd.DataFrame(columns=column_names)
 
566
  data.get('acceptance_rate', 0.0),
567
  ])
568
 
569
+ print(f"Filtered out {filtered_count} agents with 0 reviews")
570
+ print(f"Leaderboard will show {len(rows)} agents")
571
 
572
  # Create DataFrame
573
  column_names = [col[0] for col in LEADERBOARD_COLUMNS]
 
583
  if "Total Reviews" in df.columns and not df.empty:
584
  df = df.sort_values(by="Total Reviews", ascending=False).reset_index(drop=True)
585
 
586
+ print(f"Final DataFrame shape: {df.shape}")
587
  print("="*60 + "\n")
588
 
589
  return df
 
592
  def submit_agent(identifier, agent_name, developer, website):
593
  """
594
  Submit a new agent to the leaderboard.
595
+ Validates input and saves submission.
596
  """
597
  # Validate required fields
598
  if not identifier or not identifier.strip():
599
+ return "ERROR: GitHub identifier is required", get_leaderboard_dataframe()
600
  if not agent_name or not agent_name.strip():
601
+ return "ERROR: Agent name is required", get_leaderboard_dataframe()
602
  if not developer or not developer.strip():
603
+ return "ERROR: Developer name is required", get_leaderboard_dataframe()
604
  if not website or not website.strip():
605
+ return "ERROR: Website URL is required", get_leaderboard_dataframe()
606
 
607
  # Clean inputs
608
  identifier = identifier.strip()
 
613
  # Validate GitHub identifier
614
  is_valid, message = validate_github_username(identifier)
615
  if not is_valid:
616
+ return f"ERROR: {message}", get_leaderboard_dataframe()
617
 
618
  # Check for duplicates by loading agents from HuggingFace
619
  agents = load_agents_from_hf()
620
  if agents:
621
  existing_names = {agent['github_identifier'] for agent in agents}
622
  if identifier in existing_names:
623
+ return f"WARNING: Agent with identifier '{identifier}' already exists", get_leaderboard_dataframe()
624
 
625
  # Create submission
626
  submission = {
 
628
  'developer': developer,
629
  'github_identifier': identifier,
630
  'website': website,
631
+ 'status': 'public'
632
  }
633
 
634
  # Save to HuggingFace
635
  if not save_agent_to_hf(submission):
636
+ return "ERROR: Failed to save submission", get_leaderboard_dataframe()
637
+
638
+ # Return success message - data will be populated by backend updates
639
+ return f"SUCCESS: Successfully submitted {agent_name}! Review data will be populated by the backend system.", get_leaderboard_dataframe()
640
+
641
+
642
+ # =============================================================================
643
+ # DATA RELOAD FUNCTION
644
+ # =============================================================================
645
+
646
+ def reload_leaderboard_data():
647
+ """
648
+ Reload leaderboard data from HuggingFace.
649
+ This function is called by the scheduler on a daily basis.
650
+ """
651
+ print(f"\n{'='*80}")
652
+ print(f"Reloading leaderboard data from HuggingFace...")
653
+ print(f"{'='*80}\n")
654
 
 
655
  try:
656
+ data = load_leaderboard_data_from_hf()
657
+ if data:
658
+ print(f"Successfully reloaded leaderboard data")
659
+ print(f" Last updated: {data.get('last_updated', 'Unknown')}")
660
+ print(f" Agents: {len(data.get('leaderboard', {}))}")
661
+ else:
662
+ print(f"No data available")
663
  except Exception as e:
664
+ print(f"Error reloading leaderboard data: {str(e)}")
665
 
666
+ print(f"{'='*80}\n")
 
667
 
668
 
669
  # =============================================================================
670
  # GRADIO APPLICATION
671
  # =============================================================================
672
 
673
+ print(f"\nStarting SWE Agent PR Leaderboard")
674
+ print(f" Data source: {LEADERBOARD_REPO}")
675
+ print(f" Reload frequency: Daily at 12:00 AM UTC\n")
676
 
677
+ # Start APScheduler for daily data reload at 12:00 AM UTC
678
  scheduler = BackgroundScheduler(timezone="UTC")
679
  scheduler.add_job(
680
+ reload_leaderboard_data,
681
+ trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
682
+ id='daily_data_reload',
683
+ name='Daily Data Reload',
684
  replace_existing=True
685
  )
686
  scheduler.start()
687
  print(f"\n{'='*80}")
688
+ print(f"Scheduler initialized successfully")
689
+ print(f"Reload schedule: Daily at 12:00 AM UTC")
690
+ print(f"On startup: Loads cached data from HuggingFace on demand")
691
  print(f"{'='*80}\n")
692
 
693
  # Create Gradio interface
694
  with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app:
695
+ gr.Markdown("# SWE Agent Review Leaderboard")
 
 
696
  gr.Markdown(f"Track and compare GitHub PR review acceptance statistics for SWE agents")
697
+
698
  with gr.Tabs():
699
 
700
  # Leaderboard Tab
701
+ with gr.Tab("Leaderboard"):
702
+ gr.Markdown("*Statistics are based on agent review activity tracked by the system*")
703
  leaderboard_table = Leaderboard(
704
  value=pd.DataFrame(columns=[col[0] for col in LEADERBOARD_COLUMNS]), # Empty initially
705
  datatype=LEADERBOARD_COLUMNS,
 
725
 
726
  # Monthly Metrics Section
727
  gr.Markdown("---") # Divider
728
+ gr.Markdown("### Monthly Performance - Top 5 Agents")
729
  gr.Markdown("*Shows acceptance rate trends and review volumes for the most active agents*")
730
 
731
  monthly_metrics_plot = gr.Plot(label="Monthly Metrics")
 
739
 
740
 
741
  # Submit Agent Tab
742
+ with gr.Tab("Submit Agent"):
743
+
744
  gr.Markdown("### Submit Your Agent")
745
+ gr.Markdown("Fill in the details below to add your agent to the leaderboard.")
746
+
747
  with gr.Row():
748
  with gr.Column():
749
  github_input = gr.Textbox(
750
  label="GitHub Identifier*",
751
+ placeholder="Your agent username (e.g., claude[bot])"
752
  )
753
  name_input = gr.Textbox(
754
  label="Agent Name*",
755
  placeholder="Your agent's display name"
756
  )
757
+
758
  with gr.Column():
759
  developer_input = gr.Textbox(
760
  label="Developer*",
761
  placeholder="Your developer or team name"
762
  )
763
  website_input = gr.Textbox(
764
+ label="Website*",
765
  placeholder="https://your-agent-website.com"
766
  )
767
+
768
  submit_button = gr.Button(
769
  "Submit Agent",
770
  variant="primary"
 
773
  label="Submission Status",
774
  interactive=False
775
  )
776
+
777
  # Event handler
778
  submit_button.click(
779
  fn=submit_agent,
 
784
 
785
  # Launch application
786
  if __name__ == "__main__":
787
+ app.launch()
msr.py CHANGED
@@ -1,18 +1,25 @@
1
  """
2
  Minimalist Review Metadata Mining Script
3
- Mines PR review metadata from GitHub Archive via BigQuery and saves to HuggingFace dataset.
 
 
 
 
 
4
  """
5
 
6
  import json
7
  import os
 
8
  import tempfile
9
  from datetime import datetime, timezone, timedelta
10
  from collections import defaultdict
11
  from huggingface_hub import HfApi, hf_hub_download
12
  from huggingface_hub.errors import HfHubHTTPError
13
  from dotenv import load_dotenv
14
- from google.cloud import bigquery
15
  import backoff
 
16
 
17
  # Load environment variables
18
  load_dotenv()
@@ -25,6 +32,13 @@ AGENTS_REPO = "SWE-Arena/bot_metadata"
25
  REVIEW_METADATA_REPO = "SWE-Arena/review_metadata"
26
  LEADERBOARD_REPO = "SWE-Arena/leaderboard_metadata" # HuggingFace dataset for leaderboard data
27
  LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for leaderboard
 
 
 
 
 
 
 
28
 
29
  # =============================================================================
30
  # UTILITY FUNCTIONS
@@ -98,250 +112,173 @@ def get_hf_token():
98
 
99
 
100
  # =============================================================================
101
- # HUGGINGFACE API WRAPPERS WITH BACKOFF
102
  # =============================================================================
103
 
104
- def is_rate_limit_error(e):
105
- """Check if exception is a HuggingFace rate limit error (429)."""
 
 
 
106
  if isinstance(e, HfHubHTTPError):
107
- return e.response.status_code == 429
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  return False
109
 
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  @backoff.on_exception(
112
  backoff.expo,
113
- HfHubHTTPError,
114
  max_tries=8,
115
  base=300,
116
  max_value=3600,
117
- giveup=lambda e: not is_rate_limit_error(e),
118
  on_backoff=lambda details: print(
119
- f" Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
120
  )
121
  )
122
  def list_repo_files_with_backoff(api, **kwargs):
123
- """Wrapper for api.list_repo_files() with exponential backoff for rate limits."""
124
  return api.list_repo_files(**kwargs)
125
 
126
 
127
  @backoff.on_exception(
128
  backoff.expo,
129
- HfHubHTTPError,
130
  max_tries=8,
131
  base=300,
132
  max_value=3600,
133
- giveup=lambda e: not is_rate_limit_error(e),
134
  on_backoff=lambda details: print(
135
- f" Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
136
  )
137
  )
138
  def hf_hub_download_with_backoff(**kwargs):
139
- """Wrapper for hf_hub_download() with exponential backoff for rate limits."""
140
  return hf_hub_download(**kwargs)
141
 
142
 
143
  @backoff.on_exception(
144
  backoff.expo,
145
- HfHubHTTPError,
146
  max_tries=8,
147
  base=300,
148
  max_value=3600,
149
- giveup=lambda e: not is_rate_limit_error(e),
150
  on_backoff=lambda details: print(
151
- f" Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
152
  )
153
  )
154
  def upload_file_with_backoff(api, **kwargs):
155
- """Wrapper for api.upload_file() with exponential backoff for rate limits."""
156
  return api.upload_file(**kwargs)
157
 
158
 
159
  @backoff.on_exception(
160
  backoff.expo,
161
- HfHubHTTPError,
162
  max_tries=8,
163
  base=300,
164
  max_value=3600,
165
- giveup=lambda e: not is_rate_limit_error(e),
166
  on_backoff=lambda details: print(
167
- f" Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
168
  )
169
  )
170
  def upload_folder_with_backoff(api, **kwargs):
171
- """Wrapper for api.upload_folder() with exponential backoff for rate limits."""
172
  return api.upload_folder(**kwargs)
173
 
174
 
175
- def get_bigquery_client():
176
  """
177
- Initialize BigQuery client using credentials from environment variable.
178
 
179
- Expects GOOGLE_APPLICATION_CREDENTIALS_JSON environment variable containing
180
- the service account JSON credentials as a string.
181
  """
182
- # Get the JSON content from environment variable
183
- creds_json = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS_JSON')
184
-
185
- if creds_json:
186
- # Create a temporary file to store credentials
187
- with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as temp_file:
188
- temp_file.write(creds_json)
189
- temp_path = temp_file.name
190
 
191
- # Set environment variable to point to temp file
192
- os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = temp_path
 
193
 
194
- # Initialize BigQuery client
195
- client = bigquery.Client()
196
 
197
- # Clean up temp file
198
- os.unlink(temp_path)
199
 
200
- return client
201
- else:
202
- raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")
203
-
204
-
205
- def generate_table_union_statements(start_date, end_date):
206
  """
207
- Generate UNION ALL statements for githubarchive.month tables in date range.
208
- Uses monthly tables instead of daily to drastically reduce query size.
209
 
210
  Args:
211
  start_date: Start datetime
212
  end_date: End datetime
 
213
 
214
  Returns:
215
- String with UNION ALL SELECT statements for all monthly tables in range
216
  """
217
- table_names = []
218
-
219
- # Start from the beginning of start_date's month
220
- current_date = start_date.replace(day=1)
221
 
222
- # End at the beginning of end_date's month (inclusive)
223
- end_month = end_date.replace(day=1)
224
 
225
- while current_date <= end_month:
226
- table_name = f"`githubarchive.month.{current_date.strftime('%Y%m')}`"
227
- table_names.append(table_name)
 
228
 
229
- # Move to next month
230
- if current_date.month == 12:
231
- current_date = current_date.replace(year=current_date.year + 1, month=1)
232
- else:
233
- current_date = current_date.replace(month=current_date.month + 1)
234
 
235
- # Create UNION ALL chain
236
- union_parts = [f"SELECT * FROM {table}" for table in table_names]
237
- return " UNION ALL ".join(union_parts)
238
 
239
 
240
  # =============================================================================
241
- # BIGQUERY FUNCTIONS
242
  # =============================================================================
243
 
244
- def fetch_all_pr_metadata_batched(client, identifiers, start_date, end_date, batch_size=100, upload_immediately=True):
245
- """
246
- Fetch PR review metadata for ALL agents using BATCHED BigQuery queries.
247
- Splits agents into smaller batches to avoid performance issues with large queries.
248
-
249
- Args:
250
- client: BigQuery client instance
251
- identifiers: List of GitHub usernames/bot identifiers
252
- start_date: Start datetime (timezone-aware)
253
- end_date: End datetime (timezone-aware)
254
- batch_size: Number of agents to process per batch (default: 100)
255
- upload_immediately: If True, upload each batch to HuggingFace immediately after processing (default: True)
256
-
257
- Returns:
258
- Dictionary mapping agent identifier to list of PR metadata (same format as single query)
259
  """
260
- print(f"\n🔍 Using BATCHED approach: {len(identifiers)} agents in batches of {batch_size}")
261
-
262
- # Log upload mode
263
- if upload_immediately:
264
- print(f" 📤 Upload mode: IMMEDIATE (upload after each batch)")
265
- else:
266
- print(f" 📤 Upload mode: DEFERRED (upload after all batches complete)")
267
 
268
- # Split identifiers into batches
269
- batches = [identifiers[i:i + batch_size] for i in range(0, len(identifiers), batch_size)]
270
- total_batches = len(batches)
271
-
272
- print(f" Total batches: {total_batches}")
273
-
274
- # Collect results from all batches
275
- all_metadata = {}
276
- successful_batches = 0
277
- failed_batches = 0
278
-
279
- for batch_num, batch_identifiers in enumerate(batches, 1):
280
- print(f"\n📦 Processing batch {batch_num}/{total_batches} ({len(batch_identifiers)} agents)...")
281
-
282
- try:
283
- # Query this batch
284
- batch_results = fetch_all_pr_metadata_single_query(
285
- client, batch_identifiers, start_date, end_date
286
- )
287
-
288
- # Merge results
289
- for identifier, metadata_list in batch_results.items():
290
- if identifier in all_metadata:
291
- all_metadata[identifier].extend(metadata_list)
292
- else:
293
- all_metadata[identifier] = metadata_list
294
-
295
- successful_batches += 1
296
- print(f" ✓ Batch {batch_num}/{total_batches} complete: {len(batch_results)} agents processed")
297
-
298
- # Upload immediately after this batch if enabled
299
- if upload_immediately and batch_results:
300
- print(f"\n 📤 Uploading batch {batch_num}/{total_batches} results to HuggingFace...")
301
- upload_success = 0
302
- upload_errors = 0
303
-
304
- for identifier, metadata_list in batch_results.items():
305
- if metadata_list:
306
- if save_review_metadata_to_hf(metadata_list, identifier):
307
- upload_success += 1
308
- else:
309
- upload_errors += 1
310
-
311
- print(f" ✓ Batch {batch_num}/{total_batches} upload complete ({upload_success} agents uploaded, {upload_errors} errors)")
312
-
313
- except Exception as e:
314
- failed_batches += 1
315
- print(f" ✗ Batch {batch_num}/{total_batches} failed: {str(e)}")
316
- print(f" Continuing with remaining batches...")
317
- continue
318
-
319
- print(f"\n📊 Batching Summary:")
320
- print(f" Total batches: {total_batches}")
321
- print(f" Successful: {successful_batches}")
322
- print(f" Failed: {failed_batches}")
323
- print(f" Total agents with data: {len(all_metadata)}")
324
-
325
- return all_metadata
326
-
327
-
328
- def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date):
329
- """
330
- Fetch PR review metadata for a BATCH of agents using ONE comprehensive BigQuery query.
331
-
332
- NOTE: This function is designed for smaller batches (~100 agents).
333
- For large numbers of agents, use fetch_all_pr_metadata_batched() instead.
334
-
335
  This query combines:
336
  1. Review events (PullRequestReviewEvent) for all agents
337
  2. PR status (PullRequestEvent with action='closed')
338
-
339
  Args:
340
- client: BigQuery client instance
341
  identifiers: List of GitHub usernames/bot identifiers
342
  start_date: Start datetime (timezone-aware)
343
  end_date: End datetime (timezone-aware)
344
-
345
  Returns:
346
  Dictionary mapping agent identifier to list of PR metadata:
347
  {
@@ -357,97 +294,89 @@ def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date
357
  ...
358
  }
359
  """
360
- print(f"\n🔍 Querying BigQuery for ALL {len(identifiers)} agents in ONE QUERY")
361
  print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
362
-
363
- # Generate table UNION statements for review period
364
- review_tables = generate_table_union_statements(start_date, end_date)
365
-
366
- # Generate table UNION statements for PR status (use same lookback as reviews)
367
  status_start_date = end_date - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
368
- status_tables = generate_table_union_statements(status_start_date, end_date)
369
-
370
  # Build identifier list for IN clause
371
  identifier_list = ', '.join([f"'{id}'" for id in identifiers])
372
-
373
- # Build comprehensive query with CTEs
374
  query = f"""
375
  WITH review_events AS (
376
  -- Get all review events for ALL agents
377
  SELECT
378
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as url,
379
  COALESCE(
380
- JSON_EXTRACT_SCALAR(payload, '$.review.submitted_at'),
381
- CAST(created_at AS STRING)
382
  ) as reviewed_at,
383
- actor.login as reviewer,
384
- repo.name as repo_name,
385
- CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') AS INT64) as pr_number
386
- FROM (
387
- {review_tables}
388
- )
389
  WHERE
390
  type = 'PullRequestReviewEvent'
391
- AND actor.login IN ({identifier_list})
392
- AND JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') IS NOT NULL
393
 
394
  UNION ALL
395
 
396
  -- Get PR comments (IssueCommentEvent on PRs)
397
  SELECT
398
- JSON_EXTRACT_SCALAR(payload, '$.issue.html_url') as url,
399
- CAST(created_at AS STRING) as reviewed_at,
400
- actor.login as reviewer,
401
- repo.name as repo_name,
402
- CAST(JSON_EXTRACT_SCALAR(payload, '$.issue.number') AS INT64) as pr_number
403
- FROM (
404
- {review_tables}
405
- )
406
  WHERE
407
  type = 'IssueCommentEvent'
408
- AND actor.login IN ({identifier_list})
409
- AND JSON_EXTRACT_SCALAR(payload, '$.issue.pull_request.url') IS NOT NULL
410
- AND JSON_EXTRACT_SCALAR(payload, '$.issue.html_url') IS NOT NULL
411
 
412
  UNION ALL
413
 
414
  -- Get review comments (PullRequestReviewCommentEvent)
415
  SELECT
416
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as url,
417
- CAST(created_at AS STRING) as reviewed_at,
418
- actor.login as reviewer,
419
- repo.name as repo_name,
420
- CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') AS INT64) as pr_number
421
- FROM (
422
- {review_tables}
423
- )
424
  WHERE
425
  type = 'PullRequestReviewCommentEvent'
426
- AND actor.login IN ({identifier_list})
427
- AND JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') IS NOT NULL
428
  ),
429
-
430
  pr_status AS (
431
  -- Get merge/close status for those PRs
432
  SELECT
433
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as url,
434
- CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.merged') AS BOOL) as is_merged,
435
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.merged_at') as merged_at,
436
- JSON_EXTRACT_SCALAR(payload, '$.pull_request.closed_at') as closed_at,
437
- created_at
438
- FROM (
439
- {status_tables}
440
- )
441
  WHERE
442
  type = 'PullRequestEvent'
443
- AND JSON_EXTRACT_SCALAR(payload, '$.action') = 'closed'
444
- AND JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') IS NOT NULL
445
- AND JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') IN (
446
  SELECT DISTINCT url FROM review_events
447
  )
448
- QUALIFY ROW_NUMBER() OVER (PARTITION BY url ORDER BY created_at DESC) = 1
449
  )
450
-
451
  -- Join review events with PR status
452
  SELECT DISTINCT
453
  re.reviewer,
@@ -456,54 +385,42 @@ def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date
456
  ps.merged_at,
457
  ps.closed_at
458
  FROM review_events re
459
- LEFT JOIN pr_status ps ON re.url = ps.url
460
  ORDER BY re.reviewer, re.reviewed_at DESC
461
  """
462
-
463
  # Calculate number of days for reporting
464
  review_days = (end_date - start_date).days
465
  status_days = (end_date - status_start_date).days
466
-
467
  print(f" Querying {review_days} days for reviews, {status_days} days for PR status...")
468
  print(f" Agents: {', '.join(identifiers[:5])}{'...' if len(identifiers) > 5 else ''}")
469
-
470
  try:
471
- query_job = client.query(query)
472
- results = list(query_job.result())
473
-
474
- print(f" ✓ Found {len(results)} total PR review records across all agents")
475
-
476
- # Group results by agent
477
- metadata_by_agent = defaultdict(list)
478
-
479
- for row in results:
480
- reviewer = row.reviewer
481
 
482
- # Convert datetime objects to ISO strings and normalize
483
- reviewed_at = row.reviewed_at
484
- if hasattr(reviewed_at, 'isoformat'):
485
- reviewed_at = reviewed_at.isoformat()
486
- reviewed_at = normalize_date_format(reviewed_at) if reviewed_at else None
487
 
488
- merged_at = row.merged_at
489
- if hasattr(merged_at, 'isoformat'):
490
- merged_at = merged_at.isoformat()
491
- merged_at = normalize_date_format(merged_at) if merged_at else None
492
 
493
- closed_at = row.closed_at
494
- if hasattr(closed_at, 'isoformat'):
495
- closed_at = closed_at.isoformat()
496
- closed_at = normalize_date_format(closed_at) if closed_at else None
 
 
497
 
498
  metadata_by_agent[reviewer].append({
499
- 'url': row.url,
500
  'reviewed_at': reviewed_at,
501
  'merged_at': merged_at,
502
  'closed_at': closed_at,
503
  })
504
-
505
  # Print breakdown by agent
506
- print(f"\n 📊 Results breakdown by agent:")
507
  for identifier in identifiers:
508
  count = len(metadata_by_agent.get(identifier, []))
509
  if count > 0:
@@ -512,19 +429,19 @@ def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date
512
  closed_count = sum(1 for m in metadata if m['closed_at'] is not None and m['merged_at'] is None)
513
  open_count = count - merged_count - closed_count
514
  print(f" {identifier}: {count} PRs ({merged_count} merged, {closed_count} closed, {open_count} open)")
515
-
516
  # Convert defaultdict to regular dict
517
  return dict(metadata_by_agent)
518
-
519
  except Exception as e:
520
- print(f" BigQuery error: {str(e)}")
521
  import traceback
522
  traceback.print_exc()
523
  return {}
524
 
525
 
526
  # =============================================================================
527
- # HUGGINGFACE STORAGE FUNCTIONS
528
  # =============================================================================
529
 
530
  def group_metadata_by_date(metadata_list):
@@ -549,20 +466,57 @@ def group_metadata_by_date(metadata_list):
549
  return dict(grouped)
550
 
551
 
552
- def save_review_metadata_to_hf(metadata_list, agent_identifier):
553
  """
554
- Save review metadata to HuggingFace dataset, organized by [agent_identifier]/YYYY.MM.DD.jsonl.
555
- Each file is stored in the agent's folder and named YYYY.MM.DD.jsonl for that day's reviews.
556
-
557
- This function OVERWRITES existing files completely with fresh data from BigQuery.
558
- Uses batch upload to avoid rate limit (uploads entire folder in single commit).
559
 
560
  Args:
561
- metadata_list: List of review metadata dictionaries
562
- agent_identifier: GitHub identifier of the agent (used as folder name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
  """
564
- import shutil
 
565
 
 
 
 
 
 
 
566
  try:
567
  token = get_hf_token()
568
  if not token:
@@ -570,56 +524,103 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
570
 
571
  api = HfApi(token=token)
572
 
573
- # Group by date (year, month, day)
574
- grouped = group_metadata_by_date(metadata_list)
 
575
 
576
- if not grouped:
577
- print(f" No valid metadata to save for {agent_identifier}")
578
- return False
 
 
579
 
580
- # Create a temporary directory for batch upload
581
- temp_dir = tempfile.mkdtemp()
582
- agent_folder = os.path.join(temp_dir, agent_identifier)
583
- os.makedirs(agent_folder, exist_ok=True)
584
 
585
- try:
586
- print(f" 📦 Preparing batch upload for {len(grouped)} daily files...")
587
 
588
- # Process each daily file
589
- for (review_year, month, day), day_metadata in grouped.items():
590
- filename = f"{agent_identifier}/{review_year}.{month:02d}.{day:02d}.jsonl"
591
- local_filename = os.path.join(agent_folder, f"{review_year}.{month:02d}.{day:02d}.jsonl")
592
 
593
- # Sort by reviewed_at for better organization
594
- day_metadata.sort(key=lambda x: x.get('reviewed_at', ''), reverse=True)
595
 
596
- # Save to temp directory (complete overwrite, no merging)
597
- save_jsonl(local_filename, day_metadata)
598
- print(f" Prepared {len(day_metadata)} reviews for {filename}")
599
 
600
- # Upload entire folder using upload_folder (single commit per agent)
601
- print(f" 📤 Uploading {len(grouped)} files ({len(metadata_list)} total reviews)...")
602
- upload_folder_with_backoff(
603
- api=api,
604
- folder_path=temp_dir,
605
- repo_id=REVIEW_METADATA_REPO,
606
- repo_type="dataset",
607
- commit_message=f"Update review metadata for {agent_identifier}"
608
- )
609
- print(f" ✓ Batch upload complete for {agent_identifier}")
610
 
611
- return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
612
 
613
- finally:
614
- # Always clean up temp directory
615
- if os.path.exists(temp_dir):
616
- shutil.rmtree(temp_dir)
 
 
 
 
617
 
618
  except Exception as e:
619
- print(f"Error saving review metadata: {str(e)}")
620
  import traceback
621
  traceback.print_exc()
622
- return False
623
 
624
 
625
  def load_agents_from_hf():
@@ -666,7 +667,7 @@ def load_agents_from_hf():
666
  print(f"Warning: Could not load {json_file}: {str(e)}")
667
  continue
668
 
669
- print(f"Loaded {len(agents)} agents from HuggingFace")
670
  return agents
671
 
672
  except Exception as e:
@@ -713,7 +714,7 @@ def load_review_metadata():
713
  except Exception:
714
  continue
715
 
716
- print(f"📥 Loading review metadata from last {LEADERBOARD_TIME_FRAME_DAYS} days ({len(time_frame_files)} daily files)...")
717
 
718
  all_metadata = []
719
 
@@ -742,11 +743,11 @@ def load_review_metadata():
742
  except Exception as e:
743
  print(f" Warning: Could not load {filename}: {str(e)}")
744
 
745
- print(f"Loaded {len(all_metadata)} total reviews from last {LEADERBOARD_TIME_FRAME_DAYS} days")
746
  return all_metadata
747
 
748
  except Exception as e:
749
- print(f"Error loading review metadata: {str(e)}")
750
  return []
751
 
752
 
@@ -908,19 +909,19 @@ def construct_leaderboard_from_metadata():
908
  Returns:
909
  Dictionary of agent stats.
910
  """
911
- print("\n📊 Constructing leaderboard from review metadata...")
912
 
913
  # Load agents
914
  agents = load_agents_from_hf()
915
  if not agents:
916
- print("⚠️ No agents found")
917
  return {}
918
 
919
- print(f"Loaded {len(agents)} agents")
920
 
921
  # Load all review metadata
922
  all_metadata = load_review_metadata()
923
- print(f"Loaded {len(all_metadata)} review metadata entries")
924
 
925
  cache_dict = {}
926
 
@@ -935,14 +936,13 @@ def construct_leaderboard_from_metadata():
935
  stats = calculate_review_stats_from_metadata(bot_metadata)
936
 
937
  cache_dict[identifier] = {
938
- 'name': agent_name,
939
  'name': agent_name,
940
  'website': agent.get('website', 'N/A'),
941
  'github_identifier': identifier,
942
  **stats
943
  }
944
 
945
- print(f"Constructed cache with {len(cache_dict)} agent entries")
946
 
947
  return cache_dict
948
 
@@ -981,7 +981,8 @@ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
981
  json.dump(combined_data, f, indent=2)
982
 
983
  try:
984
- # Upload to HuggingFace
 
985
  upload_file_with_backoff(
986
  api=api,
987
  path_or_fileobj=filename,
@@ -989,7 +990,8 @@ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
989
  repo_id=LEADERBOARD_REPO,
990
  repo_type="dataset"
991
  )
992
- print(f" Saved leaderboard data to HuggingFace: {filename}")
 
993
  return True
994
  finally:
995
  # Always clean up local file
@@ -997,7 +999,8 @@ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
997
  os.remove(filename)
998
 
999
  except Exception as e:
1000
- print(f" Error saving leaderboard data: {str(e)}")
 
1001
  import traceback
1002
  traceback.print_exc()
1003
  return False
@@ -1010,43 +1013,42 @@ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
1010
  def mine_all_agents():
1011
  """
1012
  Mine review metadata for all agents within LEADERBOARD_TIME_FRAME_DAYS and save to HuggingFace.
1013
- Uses ONE BigQuery query for ALL agents (most efficient approach).
1014
  """
1015
  # Load agent metadata from HuggingFace
1016
  agents = load_agents_from_hf()
1017
  if not agents:
1018
  print("No agents found in HuggingFace dataset")
1019
  return
1020
-
1021
  # Extract all identifiers
1022
  identifiers = [agent['github_identifier'] for agent in agents if agent.get('github_identifier')]
1023
  if not identifiers:
1024
  print("No valid agent identifiers found")
1025
  return
1026
-
1027
- print(f"\n{'='*80}")
1028
  print(f"Starting review metadata mining for {len(identifiers)} agents")
1029
  print(f"Time frame: Last {LEADERBOARD_TIME_FRAME_DAYS} days")
1030
- print(f"Data source: BigQuery + GitHub Archive (BATCHED QUERIES)")
1031
- print(f"{'='*80}\n")
1032
-
1033
- # Initialize BigQuery client
1034
  try:
1035
- client = get_bigquery_client()
1036
  except Exception as e:
1037
- print(f"Failed to initialize BigQuery client: {str(e)}")
1038
  return
1039
-
1040
  # Define time range: past LEADERBOARD_TIME_FRAME_DAYS (excluding today)
1041
  current_time = datetime.now(timezone.utc)
1042
  end_date = current_time.replace(hour=0, minute=0, second=0, microsecond=0)
1043
  start_date = end_date - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
1044
-
1045
  try:
1046
- # Use batched approach for better performance
1047
- # upload_immediately=True means each batch uploads to HuggingFace right after BigQuery completes
1048
- all_metadata = fetch_all_pr_metadata_batched(
1049
- client, identifiers, start_date, end_date, batch_size=100, upload_immediately=True
1050
  )
1051
 
1052
  # Calculate summary statistics
@@ -1054,21 +1056,27 @@ def mine_all_agents():
1054
  agents_with_data = sum(1 for metadata_list in all_metadata.values() if metadata_list)
1055
 
1056
  print(f"\n{'='*80}")
1057
- print(f" BigQuery mining and upload complete!")
1058
  print(f" Total agents: {len(agents)}")
1059
  print(f" Agents with data: {agents_with_data}")
1060
  print(f" Total PRs found: {total_prs}")
1061
- print(f"{'='*80}\n")
1062
 
1063
  except Exception as e:
1064
- print(f"Error during BigQuery fetch: {str(e)}")
1065
  import traceback
1066
  traceback.print_exc()
1067
  return
 
 
 
 
 
 
1068
 
1069
  # Construct and save leaderboard data
1070
- print(f"\n{'='*80}")
1071
- print(f"📊 Constructing and saving leaderboard data...")
1072
  print(f"{'='*80}\n")
1073
 
1074
  try:
@@ -1076,22 +1084,23 @@ def mine_all_agents():
1076
  leaderboard_dict = construct_leaderboard_from_metadata()
1077
 
1078
  # Calculate monthly metrics
1079
- print(f"\n📈 Calculating monthly metrics...")
1080
  monthly_metrics = calculate_monthly_metrics_by_agent()
1081
 
1082
  # Save to HuggingFace
1083
- print(f"\n💾 Saving leaderboard data to HuggingFace...")
1084
  save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
1085
 
1086
  print(f"\n{'='*80}")
1087
- print(f" Leaderboard data saved successfully!")
 
1088
  print(f" Leaderboard entries: {len(leaderboard_dict)}")
1089
  print(f" Monthly data points: {len(monthly_metrics.get('months', []))} months")
1090
  print(f" Saved to: {LEADERBOARD_REPO}/swe-review.json")
1091
- print(f"{'='*80}\n")
1092
 
1093
  except Exception as e:
1094
- print(f"\n✗ Failed to construct/save leaderboard data: {str(e)}")
1095
  import traceback
1096
  traceback.print_exc()
1097
 
@@ -1101,4 +1110,4 @@ def mine_all_agents():
1101
  # =============================================================================
1102
 
1103
  if __name__ == "__main__":
1104
- mine_all_agents()
 
1
  """
2
  Minimalist Review Metadata Mining Script
3
+ Mines PR review metadata from local GHArchive data via DuckDB and saves to HuggingFace dataset.
4
+
5
+ Changes from previous version:
6
+ 1. Single SQL query for all agents (no batching)
7
+ 2. Batch upload with time gaps and comprehensive retry logic
8
+ 3. Handles both rate limit and timeout errors with exponential backoff
9
  """
10
 
11
  import json
12
  import os
13
+ import time
14
  import tempfile
15
  from datetime import datetime, timezone, timedelta
16
  from collections import defaultdict
17
  from huggingface_hub import HfApi, hf_hub_download
18
  from huggingface_hub.errors import HfHubHTTPError
19
  from dotenv import load_dotenv
20
+ import duckdb
21
  import backoff
22
+ import requests.exceptions
23
 
24
  # Load environment variables
25
  load_dotenv()
 
32
  REVIEW_METADATA_REPO = "SWE-Arena/review_metadata"
33
  LEADERBOARD_REPO = "SWE-Arena/leaderboard_metadata" # HuggingFace dataset for leaderboard data
34
  LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for leaderboard
35
+ GHARCHIVE_DATA_DIR = "../gharchive/data" # Local GHArchive data directory
36
+
37
+ # Upload configuration
38
+ UPLOAD_DELAY_SECONDS = 2 # Delay between individual file uploads to avoid rate limits
39
+ MAX_RETRIES = 5 # Maximum number of retries for each upload
40
+ INITIAL_BACKOFF = 60 # Initial backoff time in seconds (1 minute)
41
+ MAX_BACKOFF = 3600 # Maximum backoff time in seconds (60 minutes)
42
 
43
  # =============================================================================
44
  # UTILITY FUNCTIONS
 
112
 
113
 
114
  # =============================================================================
115
+ # HUGGINGFACE API WRAPPERS WITH ENHANCED BACKOFF
116
  # =============================================================================
117
 
118
+ def is_retryable_error(e):
119
+ """
120
+ Check if exception is retryable (rate limit or timeout error).
121
+ """
122
+ # Check for rate limit error (429)
123
  if isinstance(e, HfHubHTTPError):
124
+ if e.response.status_code == 429:
125
+ return True
126
+
127
+ # Check for timeout errors
128
+ if isinstance(e, (requests.exceptions.Timeout,
129
+ requests.exceptions.ReadTimeout,
130
+ requests.exceptions.ConnectTimeout)):
131
+ return True
132
+
133
+ # Check if it's a timeout error wrapped in HfHubHTTPError
134
+ if isinstance(e, Exception):
135
+ error_str = str(e).lower()
136
+ if 'timeout' in error_str or 'timed out' in error_str:
137
+ return True
138
+
139
  return False
140
 
141
 
142
+ def get_error_type(e):
143
+ """Get human-readable error type for logging."""
144
+ if isinstance(e, HfHubHTTPError):
145
+ if e.response.status_code == 429:
146
+ return "Rate limit"
147
+ if isinstance(e, (requests.exceptions.Timeout,
148
+ requests.exceptions.ReadTimeout,
149
+ requests.exceptions.ConnectTimeout)):
150
+ return "Timeout"
151
+ if 'timeout' in str(e).lower():
152
+ return "Timeout"
153
+ return "Unknown"
154
+
155
+
156
  @backoff.on_exception(
157
  backoff.expo,
158
+ (HfHubHTTPError, requests.exceptions.Timeout, requests.exceptions.RequestException, Exception),
159
  max_tries=8,
160
  base=300,
161
  max_value=3600,
162
+ giveup=lambda e: not is_retryable_error(e),
163
  on_backoff=lambda details: print(
164
+ f" {get_error_type(details['exception'])} error. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
165
  )
166
  )
167
  def list_repo_files_with_backoff(api, **kwargs):
168
+ """Wrapper for api.list_repo_files() with exponential backoff for retryable errors."""
169
  return api.list_repo_files(**kwargs)
170
 
171
 
172
  @backoff.on_exception(
173
  backoff.expo,
174
+ (HfHubHTTPError, requests.exceptions.Timeout, requests.exceptions.RequestException, Exception),
175
  max_tries=8,
176
  base=300,
177
  max_value=3600,
178
+ giveup=lambda e: not is_retryable_error(e),
179
  on_backoff=lambda details: print(
180
+ f" {get_error_type(details['exception'])} error. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
181
  )
182
  )
183
  def hf_hub_download_with_backoff(**kwargs):
184
+ """Wrapper for hf_hub_download() with exponential backoff for retryable errors."""
185
  return hf_hub_download(**kwargs)
186
 
187
 
188
  @backoff.on_exception(
189
  backoff.expo,
190
+ (HfHubHTTPError, requests.exceptions.Timeout, requests.exceptions.RequestException, Exception),
191
  max_tries=8,
192
  base=300,
193
  max_value=3600,
194
+ giveup=lambda e: not is_retryable_error(e),
195
  on_backoff=lambda details: print(
196
+ f" {get_error_type(details['exception'])} error. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
197
  )
198
  )
199
  def upload_file_with_backoff(api, **kwargs):
200
+ """Wrapper for api.upload_file() with exponential backoff for retryable errors."""
201
  return api.upload_file(**kwargs)
202
 
203
 
204
  @backoff.on_exception(
205
  backoff.expo,
206
+ (HfHubHTTPError, requests.exceptions.Timeout, requests.exceptions.RequestException, Exception),
207
  max_tries=8,
208
  base=300,
209
  max_value=3600,
210
+ giveup=lambda e: not is_retryable_error(e),
211
  on_backoff=lambda details: print(
212
+ f" {get_error_type(details['exception'])} error. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/8..."
213
  )
214
  )
215
  def upload_folder_with_backoff(api, **kwargs):
216
+ """Wrapper for api.upload_folder() with exponential backoff for retryable errors."""
217
  return api.upload_folder(**kwargs)
218
 
219
 
220
+ def get_duckdb_connection():
221
  """
222
+ Initialize DuckDB connection with JSON support.
223
 
224
+ Returns:
225
+ DuckDB connection object
226
  """
227
+ conn = duckdb.connect(':memory:')
 
 
 
 
 
 
 
228
 
229
+ # Enable JSON extension if needed
230
+ conn.execute("INSTALL json;")
231
+ conn.execute("LOAD json;")
232
 
233
+ return conn
 
234
 
 
 
235
 
236
+ def generate_file_path_patterns(start_date, end_date, data_dir=GHARCHIVE_DATA_DIR):
 
 
 
 
 
237
  """
238
+ Generate file path patterns for GHArchive data in date range.
 
239
 
240
  Args:
241
  start_date: Start datetime
242
  end_date: End datetime
243
+ data_dir: Directory containing GHArchive data files
244
 
245
  Returns:
246
+ List of file path patterns (one per day)
247
  """
248
+ file_patterns = []
 
 
 
249
 
250
+ current_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)
251
+ end_day = end_date.replace(hour=0, minute=0, second=0, microsecond=0)
252
 
253
+ while current_date <= end_day:
254
+ # Pattern for all hours in this day: 2024-11-15-*.json.gz
255
+ pattern = os.path.join(data_dir, f"{current_date.strftime('%Y-%m-%d')}-*.json.gz")
256
+ file_patterns.append(pattern)
257
 
258
+ # Move to next day
259
+ current_date += timedelta(days=1)
 
 
 
260
 
261
+ return file_patterns
 
 
262
 
263
 
264
  # =============================================================================
265
+ # DUCKDB QUERY FUNCTIONS
266
  # =============================================================================
267
 
268
+ def fetch_all_pr_metadata_single_query(conn, identifiers, start_date, end_date):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  """
270
+ Fetch PR review metadata for ALL agents using ONE comprehensive DuckDB query.
 
 
 
 
 
 
271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
  This query combines:
273
  1. Review events (PullRequestReviewEvent) for all agents
274
  2. PR status (PullRequestEvent with action='closed')
275
+
276
  Args:
277
+ conn: DuckDB connection instance
278
  identifiers: List of GitHub usernames/bot identifiers
279
  start_date: Start datetime (timezone-aware)
280
  end_date: End datetime (timezone-aware)
281
+
282
  Returns:
283
  Dictionary mapping agent identifier to list of PR metadata:
284
  {
 
294
  ...
295
  }
296
  """
297
+ print(f"Querying DuckDB for ALL {len(identifiers)} agents in ONE QUERY")
298
  print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
299
+
300
+ # Generate file path patterns for review period
301
+ review_patterns = generate_file_path_patterns(start_date, end_date)
302
+
303
+ # Generate file path patterns for PR status (use same lookback as reviews)
304
  status_start_date = end_date - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
305
+ status_patterns = generate_file_path_patterns(status_start_date, end_date)
306
+
307
  # Build identifier list for IN clause
308
  identifier_list = ', '.join([f"'{id}'" for id in identifiers])
309
+
310
+ # Build comprehensive query with CTEs using parameterized file lists
311
  query = f"""
312
  WITH review_events AS (
313
  -- Get all review events for ALL agents
314
  SELECT
315
+ json_extract_string(payload, '$.pull_request.html_url') as url,
316
  COALESCE(
317
+ json_extract_string(payload, '$.review.submitted_at'),
318
+ CAST(created_at AS VARCHAR)
319
  ) as reviewed_at,
320
+ json_extract_string(actor, '$.login') as reviewer,
321
+ json_extract_string(repo, '$.name') as repo_name,
322
+ CAST(json_extract_string(payload, '$.pull_request.number') AS INTEGER) as pr_number
323
+ FROM read_json_auto($review_patterns, ignore_errors=true, union_by_name=true)
 
 
324
  WHERE
325
  type = 'PullRequestReviewEvent'
326
+ AND json_extract_string(actor, '$.login') IN ({identifier_list})
327
+ AND json_extract_string(payload, '$.pull_request.html_url') IS NOT NULL
328
 
329
  UNION ALL
330
 
331
  -- Get PR comments (IssueCommentEvent on PRs)
332
  SELECT
333
+ json_extract_string(payload, '$.issue.html_url') as url,
334
+ CAST(created_at AS VARCHAR) as reviewed_at,
335
+ json_extract_string(actor, '$.login') as reviewer,
336
+ json_extract_string(repo, '$.name') as repo_name,
337
+ CAST(json_extract_string(payload, '$.issue.number') AS INTEGER) as pr_number
338
+ FROM read_json_auto($review_patterns, ignore_errors=true, union_by_name=true)
 
 
339
  WHERE
340
  type = 'IssueCommentEvent'
341
+ AND json_extract_string(actor, '$.login') IN ({identifier_list})
342
+ AND json_extract_string(payload, '$.issue.pull_request.url') IS NOT NULL
343
+ AND json_extract_string(payload, '$.issue.html_url') IS NOT NULL
344
 
345
  UNION ALL
346
 
347
  -- Get review comments (PullRequestReviewCommentEvent)
348
  SELECT
349
+ json_extract_string(payload, '$.pull_request.html_url') as url,
350
+ CAST(created_at AS VARCHAR) as reviewed_at,
351
+ json_extract_string(actor, '$.login') as reviewer,
352
+ json_extract_string(repo, '$.name') as repo_name,
353
+ CAST(json_extract_string(payload, '$.pull_request.number') AS INTEGER) as pr_number
354
+ FROM read_json_auto($review_patterns, ignore_errors=true, union_by_name=true)
 
 
355
  WHERE
356
  type = 'PullRequestReviewCommentEvent'
357
+ AND json_extract_string(actor, '$.login') IN ({identifier_list})
358
+ AND json_extract_string(payload, '$.pull_request.html_url') IS NOT NULL
359
  ),
360
+
361
  pr_status AS (
362
  -- Get merge/close status for those PRs
363
  SELECT
364
+ json_extract_string(payload, '$.pull_request.html_url') as url,
365
+ CAST(json_extract_string(payload, '$.pull_request.merged') AS BOOLEAN) as is_merged,
366
+ json_extract_string(payload, '$.pull_request.merged_at') as merged_at,
367
+ json_extract_string(payload, '$.pull_request.closed_at') as closed_at,
368
+ created_at,
369
+ ROW_NUMBER() OVER (PARTITION BY json_extract_string(payload, '$.pull_request.html_url') ORDER BY created_at DESC) as rn
370
+ FROM read_json_auto($status_patterns, ignore_errors=true, union_by_name=true)
 
371
  WHERE
372
  type = 'PullRequestEvent'
373
+ AND json_extract_string(payload, '$.action') = 'closed'
374
+ AND json_extract_string(payload, '$.pull_request.html_url') IS NOT NULL
375
+ AND json_extract_string(payload, '$.pull_request.html_url') IN (
376
  SELECT DISTINCT url FROM review_events
377
  )
 
378
  )
379
+
380
  -- Join review events with PR status
381
  SELECT DISTINCT
382
  re.reviewer,
 
385
  ps.merged_at,
386
  ps.closed_at
387
  FROM review_events re
388
+ LEFT JOIN (SELECT * FROM pr_status WHERE rn = 1) ps ON re.url = ps.url
389
  ORDER BY re.reviewer, re.reviewed_at DESC
390
  """
391
+
392
  # Calculate number of days for reporting
393
  review_days = (end_date - start_date).days
394
  status_days = (end_date - status_start_date).days
395
+
396
  print(f" Querying {review_days} days for reviews, {status_days} days for PR status...")
397
  print(f" Agents: {', '.join(identifiers[:5])}{'...' if len(identifiers) > 5 else ''}")
398
+
399
  try:
400
+ # Execute query with parameters
401
+ results = conn.execute(query, {'review_patterns': review_patterns, 'status_patterns': status_patterns}).fetchall()
 
 
 
 
 
 
 
 
402
 
403
+ print(f" Found {len(results)} total PR review records across all agents")
 
 
 
 
404
 
405
+ # Group results by agent
406
+ metadata_by_agent = defaultdict(list)
 
 
407
 
408
+ for row in results:
409
+ reviewer = row[0]
410
+ url = row[1]
411
+ reviewed_at = normalize_date_format(row[2]) if row[2] else None
412
+ merged_at = normalize_date_format(row[3]) if row[3] else None
413
+ closed_at = normalize_date_format(row[4]) if row[4] else None
414
 
415
  metadata_by_agent[reviewer].append({
416
+ 'url': url,
417
  'reviewed_at': reviewed_at,
418
  'merged_at': merged_at,
419
  'closed_at': closed_at,
420
  })
421
+
422
  # Print breakdown by agent
423
+ print(f"Results breakdown by agent:")
424
  for identifier in identifiers:
425
  count = len(metadata_by_agent.get(identifier, []))
426
  if count > 0:
 
429
  closed_count = sum(1 for m in metadata if m['closed_at'] is not None and m['merged_at'] is None)
430
  open_count = count - merged_count - closed_count
431
  print(f" {identifier}: {count} PRs ({merged_count} merged, {closed_count} closed, {open_count} open)")
432
+
433
  # Convert defaultdict to regular dict
434
  return dict(metadata_by_agent)
435
+
436
  except Exception as e:
437
+ print(f" DuckDB error: {str(e)}")
438
  import traceback
439
  traceback.print_exc()
440
  return {}
441
 
442
 
443
  # =============================================================================
444
+ # HUGGINGFACE STORAGE FUNCTIONS WITH BATCH UPLOAD
445
  # =============================================================================
446
 
447
  def group_metadata_by_date(metadata_list):
 
466
  return dict(grouped)
467
 
468
 
469
+ def upload_single_file_with_retry(api, local_path, repo_path, repo_id, repo_type, commit_message, max_retries=MAX_RETRIES):
470
  """
471
+ Upload a single file with exponential backoff retry logic.
 
 
 
 
472
 
473
  Args:
474
+ api: HfApi instance
475
+ local_path: Local file path
476
+ repo_path: Path in repository
477
+ repo_id: Repository ID
478
+ repo_type: Repository type (e.g., "dataset")
479
+ commit_message: Commit message
480
+ max_retries: Maximum number of retries
481
+
482
+ Returns:
483
+ bool: True if successful, False otherwise
484
+ """
485
+ for attempt in range(max_retries):
486
+ try:
487
+ upload_file_with_backoff(
488
+ api=api,
489
+ path_or_fileobj=local_path,
490
+ path_in_repo=repo_path,
491
+ repo_id=repo_id,
492
+ repo_type=repo_type,
493
+ commit_message=commit_message
494
+ )
495
+ return True
496
+ except Exception as e:
497
+ error_type = get_error_type(e)
498
+ if attempt < max_retries - 1:
499
+ # Calculate exponential backoff
500
+ wait_time = min(INITIAL_BACKOFF * (2 ** attempt), MAX_BACKOFF)
501
+ print(f" {error_type} error on attempt {attempt + 1}/{max_retries}. Retrying in {wait_time}s...")
502
+ time.sleep(wait_time)
503
+ else:
504
+ print(f" Failed after {max_retries} attempts: {str(e)}")
505
+ return False
506
+ return False
507
+
508
+
509
+ def batch_upload_review_metadata(all_metadata):
510
  """
511
+ Upload review metadata for all agents with time gaps between uploads.
512
+ Each agent's data is uploaded as separate daily files with retry logic.
513
 
514
+ Args:
515
+ all_metadata: Dictionary mapping agent identifier to list of PR metadata
516
+
517
+ Returns:
518
+ tuple: (success_count, error_count)
519
+ """
520
  try:
521
  token = get_hf_token()
522
  if not token:
 
524
 
525
  api = HfApi(token=token)
526
 
527
+ success_count = 0
528
+ error_count = 0
529
+ total_files = 0
530
 
531
+ # First, calculate total number of files to upload
532
+ for agent_identifier, metadata_list in all_metadata.items():
533
+ if metadata_list:
534
+ grouped = group_metadata_by_date(metadata_list)
535
+ total_files += len(grouped)
536
 
537
+ print(f"\n{'='*80}")
538
+ print(f"Starting batch upload: {len(all_metadata)} agents, {total_files} total files")
539
+ print(f"Upload delay: {UPLOAD_DELAY_SECONDS}s between files")
540
+ print(f"{'='*80}\n")
541
 
542
+ file_count = 0
 
543
 
544
+ for agent_idx, (agent_identifier, metadata_list) in enumerate(all_metadata.items(), 1):
545
+ if not metadata_list:
546
+ print(f"[{agent_idx}/{len(all_metadata)}] Skipping {agent_identifier} (no data)")
547
+ continue
548
 
549
+ # Group by date
550
+ grouped = group_metadata_by_date(metadata_list)
551
 
552
+ print(f"[{agent_idx}/{len(all_metadata)}] Uploading {len(grouped)} files for {agent_identifier}...")
 
 
553
 
554
+ # Create temporary files for this agent
555
+ agent_temp_dir = tempfile.mkdtemp()
 
 
 
 
 
 
 
 
556
 
557
+ try:
558
+ # Prepare all files locally
559
+ local_files = []
560
+ for (review_year, month, day), day_metadata in grouped.items():
561
+ filename = f"{review_year}.{month:02d}.{day:02d}.jsonl"
562
+ local_path = os.path.join(agent_temp_dir, filename)
563
+ repo_path = f"{agent_identifier}/{filename}"
564
+
565
+ # Sort by reviewed_at for better organization
566
+ day_metadata.sort(key=lambda x: x.get('reviewed_at', ''), reverse=True)
567
+
568
+ # Save to temp file
569
+ save_jsonl(local_path, day_metadata)
570
+ local_files.append((local_path, repo_path, len(day_metadata)))
571
+
572
+ # Upload each file with delay
573
+ agent_success = 0
574
+ agent_error = 0
575
+
576
+ for file_idx, (local_path, repo_path, review_count) in enumerate(local_files, 1):
577
+ file_count += 1
578
+
579
+ print(f" [{file_count}/{total_files}] Uploading {repo_path} ({review_count} reviews)...", end='')
580
+
581
+ if upload_single_file_with_retry(
582
+ api=api,
583
+ local_path=local_path,
584
+ repo_path=repo_path,
585
+ repo_id=REVIEW_METADATA_REPO,
586
+ repo_type="dataset",
587
+ commit_message=f"Update {repo_path}",
588
+ max_retries=MAX_RETRIES
589
+ ):
590
+ print(" ")
591
+ agent_success += 1
592
+ success_count += 1
593
+ else:
594
+ print(" ")
595
+ agent_error += 1
596
+ error_count += 1
597
+
598
+ # Add delay between uploads (except for last file)
599
+ if file_idx < len(local_files):
600
+ time.sleep(UPLOAD_DELAY_SECONDS)
601
+
602
+ print(f" Agent {agent_identifier}: {agent_success} uploaded, {agent_error} errors\n")
603
+
604
+ finally:
605
+ # Clean up temp directory
606
+ if os.path.exists(agent_temp_dir):
607
+ import shutil
608
+ shutil.rmtree(agent_temp_dir)
609
 
610
+ print(f"\n{'='*80}")
611
+ print(f"Batch upload complete!")
612
+ print(f" Total files: {total_files}")
613
+ print(f" Successful: {success_count}")
614
+ print(f" Errors: {error_count}")
615
+ print(f"{'='*80}\n")
616
+
617
+ return success_count, error_count
618
 
619
  except Exception as e:
620
+ print(f"Error during batch upload: {str(e)}")
621
  import traceback
622
  traceback.print_exc()
623
+ return 0, total_files if 'total_files' in locals() else 0
624
 
625
 
626
  def load_agents_from_hf():
 
667
  print(f"Warning: Could not load {json_file}: {str(e)}")
668
  continue
669
 
670
+ print(f"Loaded {len(agents)} agents from HuggingFace")
671
  return agents
672
 
673
  except Exception as e:
 
714
  except Exception:
715
  continue
716
 
717
+ print(f"Loading review metadata from last {LEADERBOARD_TIME_FRAME_DAYS} days ({len(time_frame_files)} daily files)...")
718
 
719
  all_metadata = []
720
 
 
743
  except Exception as e:
744
  print(f" Warning: Could not load {filename}: {str(e)}")
745
 
746
+ print(f"Loaded {len(all_metadata)} total reviews from last {LEADERBOARD_TIME_FRAME_DAYS} days")
747
  return all_metadata
748
 
749
  except Exception as e:
750
+ print(f"Error loading review metadata: {str(e)}")
751
  return []
752
 
753
 
 
909
  Returns:
910
  Dictionary of agent stats.
911
  """
912
+ print("Constructing leaderboard from review metadata...")
913
 
914
  # Load agents
915
  agents = load_agents_from_hf()
916
  if not agents:
917
+ print("No agents found")
918
  return {}
919
 
920
+ print(f"Loaded {len(agents)} agents")
921
 
922
  # Load all review metadata
923
  all_metadata = load_review_metadata()
924
+ print(f"Loaded {len(all_metadata)} review metadata entries")
925
 
926
  cache_dict = {}
927
 
 
936
  stats = calculate_review_stats_from_metadata(bot_metadata)
937
 
938
  cache_dict[identifier] = {
 
939
  'name': agent_name,
940
  'website': agent.get('website', 'N/A'),
941
  'github_identifier': identifier,
942
  **stats
943
  }
944
 
945
+ print(f"Constructed cache with {len(cache_dict)} agent entries")
946
 
947
  return cache_dict
948
 
 
981
  json.dump(combined_data, f, indent=2)
982
 
983
  try:
984
+ # Upload to HuggingFace with retry logic
985
+ print(f"Uploading leaderboard data...", end='')
986
  upload_file_with_backoff(
987
  api=api,
988
  path_or_fileobj=filename,
 
990
  repo_id=LEADERBOARD_REPO,
991
  repo_type="dataset"
992
  )
993
+ print(" ")
994
+ print(f"Saved leaderboard data to HuggingFace: {filename}")
995
  return True
996
  finally:
997
  # Always clean up local file
 
999
  os.remove(filename)
1000
 
1001
  except Exception as e:
1002
+ print(f" ")
1003
+ print(f"Error saving leaderboard data: {str(e)}")
1004
  import traceback
1005
  traceback.print_exc()
1006
  return False
 
1013
  def mine_all_agents():
1014
  """
1015
  Mine review metadata for all agents within LEADERBOARD_TIME_FRAME_DAYS and save to HuggingFace.
1016
+ Uses ONE DuckDB query for ALL agents, then batch uploads with time gaps.
1017
  """
1018
  # Load agent metadata from HuggingFace
1019
  agents = load_agents_from_hf()
1020
  if not agents:
1021
  print("No agents found in HuggingFace dataset")
1022
  return
1023
+
1024
  # Extract all identifiers
1025
  identifiers = [agent['github_identifier'] for agent in agents if agent.get('github_identifier')]
1026
  if not identifiers:
1027
  print("No valid agent identifiers found")
1028
  return
1029
+
1030
+ print(f"{'='*80}")
1031
  print(f"Starting review metadata mining for {len(identifiers)} agents")
1032
  print(f"Time frame: Last {LEADERBOARD_TIME_FRAME_DAYS} days")
1033
+ print(f"Data source: DuckDB + Local GHArchive (SINGLE QUERY)")
1034
+ print(f"{'='*80}")
1035
+
1036
+ # Initialize DuckDB connection
1037
  try:
1038
+ conn = get_duckdb_connection()
1039
  except Exception as e:
1040
+ print(f"Failed to initialize DuckDB connection: {str(e)}")
1041
  return
1042
+
1043
  # Define time range: past LEADERBOARD_TIME_FRAME_DAYS (excluding today)
1044
  current_time = datetime.now(timezone.utc)
1045
  end_date = current_time.replace(hour=0, minute=0, second=0, microsecond=0)
1046
  start_date = end_date - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
1047
+
1048
  try:
1049
+ # Use single query for all agents
1050
+ all_metadata = fetch_all_pr_metadata_single_query(
1051
+ conn, identifiers, start_date, end_date
 
1052
  )
1053
 
1054
  # Calculate summary statistics
 
1056
  agents_with_data = sum(1 for metadata_list in all_metadata.values() if metadata_list)
1057
 
1058
  print(f"\n{'='*80}")
1059
+ print(f"DuckDB query complete!")
1060
  print(f" Total agents: {len(agents)}")
1061
  print(f" Agents with data: {agents_with_data}")
1062
  print(f" Total PRs found: {total_prs}")
1063
+ print(f"{'='*80}")
1064
 
1065
  except Exception as e:
1066
+ print(f"Error during DuckDB fetch: {str(e)}")
1067
  import traceback
1068
  traceback.print_exc()
1069
  return
1070
+ finally:
1071
+ # Close DuckDB connection
1072
+ conn.close()
1073
+
1074
+ # Batch upload review metadata with time gaps
1075
+ success_count, error_count = batch_upload_review_metadata(all_metadata)
1076
 
1077
  # Construct and save leaderboard data
1078
+ print(f"{'='*80}")
1079
+ print(f"Constructing and saving leaderboard data...")
1080
  print(f"{'='*80}\n")
1081
 
1082
  try:
 
1084
  leaderboard_dict = construct_leaderboard_from_metadata()
1085
 
1086
  # Calculate monthly metrics
1087
+ print(f"Calculating monthly metrics...")
1088
  monthly_metrics = calculate_monthly_metrics_by_agent()
1089
 
1090
  # Save to HuggingFace
1091
+ print(f"Saving leaderboard data to HuggingFace...")
1092
  save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
1093
 
1094
  print(f"\n{'='*80}")
1095
+ print(f"ALL TASKS COMPLETE!")
1096
+ print(f" Review metadata: {success_count} files uploaded, {error_count} errors")
1097
  print(f" Leaderboard entries: {len(leaderboard_dict)}")
1098
  print(f" Monthly data points: {len(monthly_metrics.get('months', []))} months")
1099
  print(f" Saved to: {LEADERBOARD_REPO}/swe-review.json")
1100
+ print(f"{'='*80}")
1101
 
1102
  except Exception as e:
1103
+ print(f"Failed to construct/save leaderboard data: {str(e)}")
1104
  import traceback
1105
  traceback.print_exc()
1106
 
 
1110
  # =============================================================================
1111
 
1112
  if __name__ == "__main__":
1113
+ mine_all_agents()
requirements.txt CHANGED
@@ -1,12 +1,10 @@
1
  APScheduler
2
  backoff
3
- datasets
4
- db-dtypes
5
- google-cloud-bigquery
6
  gradio
7
  gradio_leaderboard
8
  huggingface_hub
9
  pandas
10
  plotly
11
- PyGithub
12
- python-dotenv
 
1
  APScheduler
2
  backoff
3
+ duckdb
 
 
4
  gradio
5
  gradio_leaderboard
6
  huggingface_hub
7
  pandas
8
  plotly
9
+ python-dotenv
10
+ requests