Spaces:
Sleeping
Sleeping
zhimin-z
commited on
Commit
·
717cb54
1
Parent(s):
a1b9ad1
- **Assistant**: Display name of the assistant
Browse files- **Website**: Link to the assistant's homepage or documentation
README.md
CHANGED
|
@@ -28,6 +28,8 @@ If an assistant can consistently provide valuable reviews across different proje
|
|
| 28 |
Key metrics from the last 180 days:
|
| 29 |
|
| 30 |
**Leaderboard Table**
|
|
|
|
|
|
|
| 31 |
- **Total Reviews**: PR reviews the assistant has made
|
| 32 |
- **Merged PRs**: PRs reviewed by the assistant that were merged
|
| 33 |
- **Acceptance Rate**: Percentage of reviewed PRs that were merged
|
|
|
|
| 28 |
Key metrics from the last 180 days:
|
| 29 |
|
| 30 |
**Leaderboard Table**
|
| 31 |
+
- **Assistant**: Display name of the assistant
|
| 32 |
+
- **Website**: Link to the assistant's homepage or documentation
|
| 33 |
- **Total Reviews**: PR reviews the assistant has made
|
| 34 |
- **Merged PRs**: PRs reviewed by the assistant that were merged
|
| 35 |
- **Acceptance Rate**: Percentage of reviewed PRs that were merged
|
app.py
CHANGED
|
@@ -22,13 +22,13 @@ load_dotenv()
|
|
| 22 |
# CONFIGURATION
|
| 23 |
# =============================================================================
|
| 24 |
|
| 25 |
-
AGENTS_REPO = "SWE-Arena/bot_metadata" # HuggingFace dataset for
|
| 26 |
LEADERBOARD_FILENAME = f"{os.getenv('COMPOSE_PROJECT_NAME')}.json"
|
| 27 |
LEADERBOARD_REPO = "SWE-Arena/leaderboard_data" # HuggingFace dataset for leaderboard data
|
| 28 |
MAX_RETRIES = 5
|
| 29 |
|
| 30 |
LEADERBOARD_COLUMNS = [
|
| 31 |
-
("
|
| 32 |
("Website", "string"),
|
| 33 |
("Total Reviews", "number"),
|
| 34 |
("Merged PRs", "number"),
|
|
@@ -96,10 +96,10 @@ def validate_github_username(identifier):
|
|
| 96 |
# =============================================================================
|
| 97 |
|
| 98 |
def load_agents_from_hf():
|
| 99 |
-
"""Load all
|
| 100 |
try:
|
| 101 |
api = HfApi()
|
| 102 |
-
|
| 103 |
|
| 104 |
# List all files in the repository
|
| 105 |
files = list_repo_files_with_backoff(api=api, repo_id=AGENTS_REPO, repo_type="dataset")
|
|
@@ -119,7 +119,7 @@ def load_agents_from_hf():
|
|
| 119 |
with open(file_path, 'r') as f:
|
| 120 |
agent_data = json.load(f)
|
| 121 |
|
| 122 |
-
# Only process
|
| 123 |
if agent_data.get('status') != 'active':
|
| 124 |
continue
|
| 125 |
|
|
@@ -129,17 +129,17 @@ def load_agents_from_hf():
|
|
| 129 |
# Add or override github_identifier to match filename
|
| 130 |
agent_data['github_identifier'] = filename_identifier
|
| 131 |
|
| 132 |
-
|
| 133 |
|
| 134 |
except Exception as e:
|
| 135 |
print(f"Warning: Could not load {json_file}: {str(e)}")
|
| 136 |
continue
|
| 137 |
|
| 138 |
-
print(f"Loaded {len(
|
| 139 |
-
return
|
| 140 |
|
| 141 |
except Exception as e:
|
| 142 |
-
print(f"Could not load
|
| 143 |
return None
|
| 144 |
|
| 145 |
|
|
@@ -195,7 +195,7 @@ def upload_with_retry(api, path_or_fileobj, path_in_repo, repo_id, repo_type, to
|
|
| 195 |
|
| 196 |
|
| 197 |
def save_agent_to_hf(data):
|
| 198 |
-
"""Save a new
|
| 199 |
try:
|
| 200 |
api = HfApi()
|
| 201 |
token = get_hf_token()
|
|
@@ -220,7 +220,7 @@ def save_agent_to_hf(data):
|
|
| 220 |
repo_type="dataset",
|
| 221 |
token=token
|
| 222 |
)
|
| 223 |
-
print(f"Saved
|
| 224 |
return True
|
| 225 |
finally:
|
| 226 |
# Always clean up local file, even if upload fails
|
|
@@ -228,7 +228,7 @@ def save_agent_to_hf(data):
|
|
| 228 |
os.remove(filename)
|
| 229 |
|
| 230 |
except Exception as e:
|
| 231 |
-
print(f"Error saving
|
| 232 |
return False
|
| 233 |
|
| 234 |
|
|
@@ -275,10 +275,10 @@ def create_monthly_metrics_plot(top_n=5):
|
|
| 275 |
- Left y-axis: Acceptance Rate (%) as line curves
|
| 276 |
- Right y-axis: Total Reviews created as bar charts
|
| 277 |
|
| 278 |
-
Each
|
| 279 |
|
| 280 |
Args:
|
| 281 |
-
top_n: Number of top
|
| 282 |
"""
|
| 283 |
# Load from saved dataset
|
| 284 |
saved_data = load_leaderboard_data_from_hf()
|
|
@@ -303,10 +303,10 @@ def create_monthly_metrics_plot(top_n=5):
|
|
| 303 |
print(f"Loaded monthly metrics from saved dataset")
|
| 304 |
|
| 305 |
# Apply top_n filter if specified
|
| 306 |
-
if top_n is not None and top_n > 0 and metrics.get('
|
| 307 |
-
# Calculate total reviews for each
|
| 308 |
agent_totals = []
|
| 309 |
-
for agent_name in metrics['
|
| 310 |
agent_data = metrics['data'].get(agent_name, {})
|
| 311 |
total_reviews = sum(agent_data.get('total_reviews', []))
|
| 312 |
agent_totals.append((agent_name, total_reviews))
|
|
@@ -315,14 +315,14 @@ def create_monthly_metrics_plot(top_n=5):
|
|
| 315 |
agent_totals.sort(key=lambda x: x[1], reverse=True)
|
| 316 |
top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
|
| 317 |
|
| 318 |
-
# Filter metrics to only include top
|
| 319 |
metrics = {
|
| 320 |
-
'
|
| 321 |
'months': metrics['months'],
|
| 322 |
-
'data': {
|
| 323 |
}
|
| 324 |
|
| 325 |
-
if not metrics['
|
| 326 |
# Return an empty figure with a message
|
| 327 |
fig = go.Figure()
|
| 328 |
fig.add_annotation(
|
|
@@ -341,7 +341,7 @@ def create_monthly_metrics_plot(top_n=5):
|
|
| 341 |
# Create figure with secondary y-axis
|
| 342 |
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
| 343 |
|
| 344 |
-
# Generate unique colors for many
|
| 345 |
def generate_color(index, total):
|
| 346 |
"""Generate distinct colors using HSL color space for better distribution"""
|
| 347 |
hue = (index * 360 / total) % 360
|
|
@@ -349,15 +349,15 @@ def create_monthly_metrics_plot(top_n=5):
|
|
| 349 |
lightness = 45 + (index % 2) * 10 # Vary lightness slightly
|
| 350 |
return f'hsl({hue}, {saturation}%, {lightness}%)'
|
| 351 |
|
| 352 |
-
|
| 353 |
months = metrics['months']
|
| 354 |
data = metrics['data']
|
| 355 |
|
| 356 |
-
# Generate colors for all
|
| 357 |
-
agent_colors = {
|
| 358 |
|
| 359 |
-
# Add traces for each
|
| 360 |
-
for idx, agent_name in enumerate(
|
| 361 |
color = agent_colors[agent_name]
|
| 362 |
agent_data = data[agent_name]
|
| 363 |
|
|
@@ -377,8 +377,8 @@ def create_monthly_metrics_plot(top_n=5):
|
|
| 377 |
line=dict(color=color, width=2),
|
| 378 |
marker=dict(size=8),
|
| 379 |
legendgroup=agent_name,
|
| 380 |
-
showlegend=(top_n is not None and top_n <= 10), # Show legend for top N
|
| 381 |
-
hovertemplate='<b>
|
| 382 |
'Month: %{x}<br>' +
|
| 383 |
'Acceptance Rate: %{y:.2f}%<br>' +
|
| 384 |
'<extra></extra>'
|
|
@@ -387,7 +387,7 @@ def create_monthly_metrics_plot(top_n=5):
|
|
| 387 |
)
|
| 388 |
|
| 389 |
# Add bar trace for total reviews (right y-axis)
|
| 390 |
-
# Only show bars for months where
|
| 391 |
x_bars = []
|
| 392 |
y_bars = []
|
| 393 |
for month, count in zip(months, agent_data['total_reviews']):
|
|
@@ -404,11 +404,11 @@ def create_monthly_metrics_plot(top_n=5):
|
|
| 404 |
marker=dict(color=color, opacity=0.6),
|
| 405 |
legendgroup=agent_name,
|
| 406 |
showlegend=False, # Hide duplicate legend entry (already shown in Scatter)
|
| 407 |
-
hovertemplate='<b>
|
| 408 |
'Month: %{x}<br>' +
|
| 409 |
'Total Reviews: %{y}<br>' +
|
| 410 |
'<extra></extra>',
|
| 411 |
-
offsetgroup=agent_name # Group bars by
|
| 412 |
),
|
| 413 |
secondary_y=True
|
| 414 |
)
|
|
@@ -430,7 +430,7 @@ def create_monthly_metrics_plot(top_n=5):
|
|
| 430 |
show_legend = (top_n is not None and top_n <= 10)
|
| 431 |
fig.update_layout(
|
| 432 |
title=None,
|
| 433 |
-
hovermode='closest', # Show individual
|
| 434 |
barmode='group',
|
| 435 |
height=600,
|
| 436 |
showlegend=show_legend,
|
|
@@ -468,9 +468,9 @@ def get_leaderboard_dataframe():
|
|
| 468 |
filtered_count = 0
|
| 469 |
for identifier, data in cache_dict.items():
|
| 470 |
total_reviews = data.get('total_reviews', 0)
|
| 471 |
-
print(f"
|
| 472 |
|
| 473 |
-
# Filter out
|
| 474 |
if total_reviews == 0:
|
| 475 |
filtered_count += 1
|
| 476 |
continue
|
|
@@ -484,8 +484,8 @@ def get_leaderboard_dataframe():
|
|
| 484 |
data.get('acceptance_rate', 0.0),
|
| 485 |
])
|
| 486 |
|
| 487 |
-
print(f"Filtered out {filtered_count}
|
| 488 |
-
print(f"Leaderboard will show {len(rows)}
|
| 489 |
|
| 490 |
# Create DataFrame
|
| 491 |
column_names = [col[0] for col in LEADERBOARD_COLUMNS]
|
|
@@ -509,14 +509,14 @@ def get_leaderboard_dataframe():
|
|
| 509 |
|
| 510 |
def submit_agent(identifier, agent_name, organization, website):
|
| 511 |
"""
|
| 512 |
-
Submit a new
|
| 513 |
Validates input and saves submission.
|
| 514 |
"""
|
| 515 |
# Validate required fields
|
| 516 |
if not identifier or not identifier.strip():
|
| 517 |
return "ERROR: GitHub identifier is required", gr.update()
|
| 518 |
if not agent_name or not agent_name.strip():
|
| 519 |
-
return "ERROR:
|
| 520 |
if not organization or not organization.strip():
|
| 521 |
return "ERROR: Organization name is required", gr.update()
|
| 522 |
if not website or not website.strip():
|
|
@@ -533,12 +533,12 @@ def submit_agent(identifier, agent_name, organization, website):
|
|
| 533 |
if not is_valid:
|
| 534 |
return f"ERROR: {message}", gr.update()
|
| 535 |
|
| 536 |
-
# Check for duplicates by loading
|
| 537 |
-
|
| 538 |
-
if
|
| 539 |
-
existing_names = {
|
| 540 |
if identifier in existing_names:
|
| 541 |
-
return f"WARNING:
|
| 542 |
|
| 543 |
# Create submission
|
| 544 |
submission = {
|
|
@@ -575,7 +575,7 @@ def reload_leaderboard_data():
|
|
| 575 |
if data:
|
| 576 |
print(f"Successfully reloaded leaderboard data")
|
| 577 |
print(f" Last updated: {data.get('last_updated', 'Unknown')}")
|
| 578 |
-
print(f"
|
| 579 |
else:
|
| 580 |
print(f"No data available")
|
| 581 |
except Exception as e:
|
|
@@ -588,7 +588,7 @@ def reload_leaderboard_data():
|
|
| 588 |
# GRADIO APPLICATION
|
| 589 |
# =============================================================================
|
| 590 |
|
| 591 |
-
print(f"\nStarting SWE
|
| 592 |
print(f" Data source: {LEADERBOARD_REPO}")
|
| 593 |
print(f" Reload frequency: Daily at 12:00 AM UTC\n")
|
| 594 |
|
|
@@ -609,19 +609,19 @@ print(f"On startup: Loads cached data from HuggingFace on demand")
|
|
| 609 |
print(f"{'='*80}\n")
|
| 610 |
|
| 611 |
# Create Gradio interface
|
| 612 |
-
with gr.Blocks(title="SWE
|
| 613 |
-
gr.Markdown("# SWE
|
| 614 |
-
gr.Markdown(f"Track and compare GitHub PR review acceptance statistics for SWE
|
| 615 |
|
| 616 |
with gr.Tabs():
|
| 617 |
|
| 618 |
# Leaderboard Tab
|
| 619 |
with gr.Tab("Leaderboard"):
|
| 620 |
-
gr.Markdown("*Statistics are based on
|
| 621 |
leaderboard_table = Leaderboard(
|
| 622 |
value=pd.DataFrame(columns=[col[0] for col in LEADERBOARD_COLUMNS]), # Empty initially
|
| 623 |
datatype=LEADERBOARD_COLUMNS,
|
| 624 |
-
search_columns=["
|
| 625 |
filter_columns=[
|
| 626 |
ColumnFilter(
|
| 627 |
"Acceptance Rate (%)",
|
|
@@ -644,8 +644,8 @@ with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as
|
|
| 644 |
# Monthly Metrics Section
|
| 645 |
gr.Markdown("---") # Divider
|
| 646 |
with gr.Group():
|
| 647 |
-
gr.Markdown("### Monthly Performance - Top 5
|
| 648 |
-
gr.Markdown("*Shows acceptance rate trends and review volumes for the most active
|
| 649 |
monthly_metrics_plot = gr.Plot(label="Monthly Metrics")
|
| 650 |
|
| 651 |
# Load monthly metrics when app starts
|
|
@@ -656,20 +656,20 @@ with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as
|
|
| 656 |
)
|
| 657 |
|
| 658 |
|
| 659 |
-
# Submit
|
| 660 |
-
with gr.Tab("Submit Your
|
| 661 |
|
| 662 |
-
gr.Markdown("Fill in the details below to add your
|
| 663 |
|
| 664 |
with gr.Row():
|
| 665 |
with gr.Column():
|
| 666 |
github_input = gr.Textbox(
|
| 667 |
label="GitHub Identifier*",
|
| 668 |
-
placeholder="Your
|
| 669 |
)
|
| 670 |
name_input = gr.Textbox(
|
| 671 |
-
label="
|
| 672 |
-
placeholder="Your
|
| 673 |
)
|
| 674 |
|
| 675 |
with gr.Column():
|
|
@@ -679,11 +679,11 @@ with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as
|
|
| 679 |
)
|
| 680 |
website_input = gr.Textbox(
|
| 681 |
label="Website*",
|
| 682 |
-
placeholder="https://your-
|
| 683 |
)
|
| 684 |
|
| 685 |
submit_button = gr.Button(
|
| 686 |
-
"Submit
|
| 687 |
variant="primary"
|
| 688 |
)
|
| 689 |
submission_status = gr.Textbox(
|
|
|
|
| 22 |
# CONFIGURATION
|
| 23 |
# =============================================================================
|
| 24 |
|
| 25 |
+
AGENTS_REPO = "SWE-Arena/bot_metadata" # HuggingFace dataset for assistant metadata
|
| 26 |
LEADERBOARD_FILENAME = f"{os.getenv('COMPOSE_PROJECT_NAME')}.json"
|
| 27 |
LEADERBOARD_REPO = "SWE-Arena/leaderboard_data" # HuggingFace dataset for leaderboard data
|
| 28 |
MAX_RETRIES = 5
|
| 29 |
|
| 30 |
LEADERBOARD_COLUMNS = [
|
| 31 |
+
("Assistant", "string"),
|
| 32 |
("Website", "string"),
|
| 33 |
("Total Reviews", "number"),
|
| 34 |
("Merged PRs", "number"),
|
|
|
|
| 96 |
# =============================================================================
|
| 97 |
|
| 98 |
def load_agents_from_hf():
|
| 99 |
+
"""Load all assistant metadata JSON files from HuggingFace dataset."""
|
| 100 |
try:
|
| 101 |
api = HfApi()
|
| 102 |
+
assistants = []
|
| 103 |
|
| 104 |
# List all files in the repository
|
| 105 |
files = list_repo_files_with_backoff(api=api, repo_id=AGENTS_REPO, repo_type="dataset")
|
|
|
|
| 119 |
with open(file_path, 'r') as f:
|
| 120 |
agent_data = json.load(f)
|
| 121 |
|
| 122 |
+
# Only process assistants with status == "active"
|
| 123 |
if agent_data.get('status') != 'active':
|
| 124 |
continue
|
| 125 |
|
|
|
|
| 129 |
# Add or override github_identifier to match filename
|
| 130 |
agent_data['github_identifier'] = filename_identifier
|
| 131 |
|
| 132 |
+
assistants.append(agent_data)
|
| 133 |
|
| 134 |
except Exception as e:
|
| 135 |
print(f"Warning: Could not load {json_file}: {str(e)}")
|
| 136 |
continue
|
| 137 |
|
| 138 |
+
print(f"Loaded {len(assistants)} assistants from HuggingFace")
|
| 139 |
+
return assistants
|
| 140 |
|
| 141 |
except Exception as e:
|
| 142 |
+
print(f"Could not load assistants from HuggingFace: {str(e)}")
|
| 143 |
return None
|
| 144 |
|
| 145 |
|
|
|
|
| 195 |
|
| 196 |
|
| 197 |
def save_agent_to_hf(data):
|
| 198 |
+
"""Save a new assistant to HuggingFace dataset as {identifier}.json in root."""
|
| 199 |
try:
|
| 200 |
api = HfApi()
|
| 201 |
token = get_hf_token()
|
|
|
|
| 220 |
repo_type="dataset",
|
| 221 |
token=token
|
| 222 |
)
|
| 223 |
+
print(f"Saved assistant to HuggingFace: {filename}")
|
| 224 |
return True
|
| 225 |
finally:
|
| 226 |
# Always clean up local file, even if upload fails
|
|
|
|
| 228 |
os.remove(filename)
|
| 229 |
|
| 230 |
except Exception as e:
|
| 231 |
+
print(f"Error saving assistant: {str(e)}")
|
| 232 |
return False
|
| 233 |
|
| 234 |
|
|
|
|
| 275 |
- Left y-axis: Acceptance Rate (%) as line curves
|
| 276 |
- Right y-axis: Total Reviews created as bar charts
|
| 277 |
|
| 278 |
+
Each assistant gets a unique color for both their line and bars.
|
| 279 |
|
| 280 |
Args:
|
| 281 |
+
top_n: Number of top assistants to show (default: 5)
|
| 282 |
"""
|
| 283 |
# Load from saved dataset
|
| 284 |
saved_data = load_leaderboard_data_from_hf()
|
|
|
|
| 303 |
print(f"Loaded monthly metrics from saved dataset")
|
| 304 |
|
| 305 |
# Apply top_n filter if specified
|
| 306 |
+
if top_n is not None and top_n > 0 and metrics.get('assistants'):
|
| 307 |
+
# Calculate total reviews for each assistant
|
| 308 |
agent_totals = []
|
| 309 |
+
for agent_name in metrics['assistants']:
|
| 310 |
agent_data = metrics['data'].get(agent_name, {})
|
| 311 |
total_reviews = sum(agent_data.get('total_reviews', []))
|
| 312 |
agent_totals.append((agent_name, total_reviews))
|
|
|
|
| 315 |
agent_totals.sort(key=lambda x: x[1], reverse=True)
|
| 316 |
top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
|
| 317 |
|
| 318 |
+
# Filter metrics to only include top assistants
|
| 319 |
metrics = {
|
| 320 |
+
'assistants': top_agents,
|
| 321 |
'months': metrics['months'],
|
| 322 |
+
'data': {assistant: metrics['data'][assistant] for assistant in top_agents if assistant in metrics['data']}
|
| 323 |
}
|
| 324 |
|
| 325 |
+
if not metrics['assistants'] or not metrics['months']:
|
| 326 |
# Return an empty figure with a message
|
| 327 |
fig = go.Figure()
|
| 328 |
fig.add_annotation(
|
|
|
|
| 341 |
# Create figure with secondary y-axis
|
| 342 |
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
| 343 |
|
| 344 |
+
# Generate unique colors for many assistants using HSL color space
|
| 345 |
def generate_color(index, total):
|
| 346 |
"""Generate distinct colors using HSL color space for better distribution"""
|
| 347 |
hue = (index * 360 / total) % 360
|
|
|
|
| 349 |
lightness = 45 + (index % 2) * 10 # Vary lightness slightly
|
| 350 |
return f'hsl({hue}, {saturation}%, {lightness}%)'
|
| 351 |
|
| 352 |
+
assistants = metrics['assistants']
|
| 353 |
months = metrics['months']
|
| 354 |
data = metrics['data']
|
| 355 |
|
| 356 |
+
# Generate colors for all assistants
|
| 357 |
+
agent_colors = {assistant: generate_color(idx, len(assistants)) for idx, assistant in enumerate(assistants)}
|
| 358 |
|
| 359 |
+
# Add traces for each assistant
|
| 360 |
+
for idx, agent_name in enumerate(assistants):
|
| 361 |
color = agent_colors[agent_name]
|
| 362 |
agent_data = data[agent_name]
|
| 363 |
|
|
|
|
| 377 |
line=dict(color=color, width=2),
|
| 378 |
marker=dict(size=8),
|
| 379 |
legendgroup=agent_name,
|
| 380 |
+
showlegend=(top_n is not None and top_n <= 10), # Show legend for top N assistants
|
| 381 |
+
hovertemplate='<b>Assistant: %{fullData.name}</b><br>' +
|
| 382 |
'Month: %{x}<br>' +
|
| 383 |
'Acceptance Rate: %{y:.2f}%<br>' +
|
| 384 |
'<extra></extra>'
|
|
|
|
| 387 |
)
|
| 388 |
|
| 389 |
# Add bar trace for total reviews (right y-axis)
|
| 390 |
+
# Only show bars for months where assistant has reviews
|
| 391 |
x_bars = []
|
| 392 |
y_bars = []
|
| 393 |
for month, count in zip(months, agent_data['total_reviews']):
|
|
|
|
| 404 |
marker=dict(color=color, opacity=0.6),
|
| 405 |
legendgroup=agent_name,
|
| 406 |
showlegend=False, # Hide duplicate legend entry (already shown in Scatter)
|
| 407 |
+
hovertemplate='<b>Assistant: %{fullData.name}</b><br>' +
|
| 408 |
'Month: %{x}<br>' +
|
| 409 |
'Total Reviews: %{y}<br>' +
|
| 410 |
'<extra></extra>',
|
| 411 |
+
offsetgroup=agent_name # Group bars by assistant for proper spacing
|
| 412 |
),
|
| 413 |
secondary_y=True
|
| 414 |
)
|
|
|
|
| 430 |
show_legend = (top_n is not None and top_n <= 10)
|
| 431 |
fig.update_layout(
|
| 432 |
title=None,
|
| 433 |
+
hovermode='closest', # Show individual assistant info on hover
|
| 434 |
barmode='group',
|
| 435 |
height=600,
|
| 436 |
showlegend=show_legend,
|
|
|
|
| 468 |
filtered_count = 0
|
| 469 |
for identifier, data in cache_dict.items():
|
| 470 |
total_reviews = data.get('total_reviews', 0)
|
| 471 |
+
print(f" Assistant '{identifier}': {total_reviews} reviews")
|
| 472 |
|
| 473 |
+
# Filter out assistants with zero total reviews
|
| 474 |
if total_reviews == 0:
|
| 475 |
filtered_count += 1
|
| 476 |
continue
|
|
|
|
| 484 |
data.get('acceptance_rate', 0.0),
|
| 485 |
])
|
| 486 |
|
| 487 |
+
print(f"Filtered out {filtered_count} assistants with 0 reviews")
|
| 488 |
+
print(f"Leaderboard will show {len(rows)} assistants")
|
| 489 |
|
| 490 |
# Create DataFrame
|
| 491 |
column_names = [col[0] for col in LEADERBOARD_COLUMNS]
|
|
|
|
| 509 |
|
| 510 |
def submit_agent(identifier, agent_name, organization, website):
|
| 511 |
"""
|
| 512 |
+
Submit a new assistant to the leaderboard.
|
| 513 |
Validates input and saves submission.
|
| 514 |
"""
|
| 515 |
# Validate required fields
|
| 516 |
if not identifier or not identifier.strip():
|
| 517 |
return "ERROR: GitHub identifier is required", gr.update()
|
| 518 |
if not agent_name or not agent_name.strip():
|
| 519 |
+
return "ERROR: Assistant name is required", gr.update()
|
| 520 |
if not organization or not organization.strip():
|
| 521 |
return "ERROR: Organization name is required", gr.update()
|
| 522 |
if not website or not website.strip():
|
|
|
|
| 533 |
if not is_valid:
|
| 534 |
return f"ERROR: {message}", gr.update()
|
| 535 |
|
| 536 |
+
# Check for duplicates by loading assistants from HuggingFace
|
| 537 |
+
assistants = load_agents_from_hf()
|
| 538 |
+
if assistants:
|
| 539 |
+
existing_names = {assistant['github_identifier'] for assistant in assistants}
|
| 540 |
if identifier in existing_names:
|
| 541 |
+
return f"WARNING: Assistant with identifier '{identifier}' already exists", gr.update()
|
| 542 |
|
| 543 |
# Create submission
|
| 544 |
submission = {
|
|
|
|
| 575 |
if data:
|
| 576 |
print(f"Successfully reloaded leaderboard data")
|
| 577 |
print(f" Last updated: {data.get('last_updated', 'Unknown')}")
|
| 578 |
+
print(f" Assistants: {len(data.get('leaderboard', {}))}")
|
| 579 |
else:
|
| 580 |
print(f"No data available")
|
| 581 |
except Exception as e:
|
|
|
|
| 588 |
# GRADIO APPLICATION
|
| 589 |
# =============================================================================
|
| 590 |
|
| 591 |
+
print(f"\nStarting SWE Assistant PR Leaderboard")
|
| 592 |
print(f" Data source: {LEADERBOARD_REPO}")
|
| 593 |
print(f" Reload frequency: Daily at 12:00 AM UTC\n")
|
| 594 |
|
|
|
|
| 609 |
print(f"{'='*80}\n")
|
| 610 |
|
| 611 |
# Create Gradio interface
|
| 612 |
+
with gr.Blocks(title="SWE Assistant Review Leaderboard", theme=gr.themes.Soft()) as app:
|
| 613 |
+
gr.Markdown("# SWE Assistant Review Leaderboard")
|
| 614 |
+
gr.Markdown(f"Track and compare GitHub PR review acceptance statistics for SWE assistants")
|
| 615 |
|
| 616 |
with gr.Tabs():
|
| 617 |
|
| 618 |
# Leaderboard Tab
|
| 619 |
with gr.Tab("Leaderboard"):
|
| 620 |
+
gr.Markdown("*Statistics are based on assistant review activity tracked by the system*")
|
| 621 |
leaderboard_table = Leaderboard(
|
| 622 |
value=pd.DataFrame(columns=[col[0] for col in LEADERBOARD_COLUMNS]), # Empty initially
|
| 623 |
datatype=LEADERBOARD_COLUMNS,
|
| 624 |
+
search_columns=["Assistant", "Website"],
|
| 625 |
filter_columns=[
|
| 626 |
ColumnFilter(
|
| 627 |
"Acceptance Rate (%)",
|
|
|
|
| 644 |
# Monthly Metrics Section
|
| 645 |
gr.Markdown("---") # Divider
|
| 646 |
with gr.Group():
|
| 647 |
+
gr.Markdown("### Monthly Performance - Top 5 Assistants")
|
| 648 |
+
gr.Markdown("*Shows acceptance rate trends and review volumes for the most active assistants*")
|
| 649 |
monthly_metrics_plot = gr.Plot(label="Monthly Metrics")
|
| 650 |
|
| 651 |
# Load monthly metrics when app starts
|
|
|
|
| 656 |
)
|
| 657 |
|
| 658 |
|
| 659 |
+
# Submit Assistant Tab
|
| 660 |
+
with gr.Tab("Submit Your Assistant"):
|
| 661 |
|
| 662 |
+
gr.Markdown("Fill in the details below to add your assistant to the leaderboard.")
|
| 663 |
|
| 664 |
with gr.Row():
|
| 665 |
with gr.Column():
|
| 666 |
github_input = gr.Textbox(
|
| 667 |
label="GitHub Identifier*",
|
| 668 |
+
placeholder="Your assistant username (e.g., claude[bot])"
|
| 669 |
)
|
| 670 |
name_input = gr.Textbox(
|
| 671 |
+
label="Assistant Name*",
|
| 672 |
+
placeholder="Your assistant's display name"
|
| 673 |
)
|
| 674 |
|
| 675 |
with gr.Column():
|
|
|
|
| 679 |
)
|
| 680 |
website_input = gr.Textbox(
|
| 681 |
label="Website*",
|
| 682 |
+
placeholder="https://your-assistant-website.com"
|
| 683 |
)
|
| 684 |
|
| 685 |
submit_button = gr.Button(
|
| 686 |
+
"Submit Assistant",
|
| 687 |
variant="primary"
|
| 688 |
)
|
| 689 |
submission_status = gr.Textbox(
|
msr.py
CHANGED
|
@@ -364,7 +364,7 @@ def fetch_all_review_metadata_streaming(conn, identifiers, start_date, end_date)
|
|
| 364 |
end_date: End datetime (timezone-aware)
|
| 365 |
|
| 366 |
Returns:
|
| 367 |
-
Dictionary mapping
|
| 368 |
"""
|
| 369 |
identifier_list = ', '.join([f"'{id}'" for id in identifiers])
|
| 370 |
metadata_by_agent = defaultdict(list)
|
|
@@ -496,7 +496,7 @@ def fetch_all_review_metadata_streaming(conn, identifiers, start_date, end_date)
|
|
| 496 |
|
| 497 |
# Final summary
|
| 498 |
agents_with_data = sum(1 for reviews in metadata_by_agent.values() if reviews)
|
| 499 |
-
print(f"\n ✓ Complete: {total_reviews} reviews found for {agents_with_data}/{len(identifiers)}
|
| 500 |
|
| 501 |
return dict(metadata_by_agent)
|
| 502 |
|
|
@@ -559,14 +559,14 @@ def sync_agents_repo():
|
|
| 559 |
|
| 560 |
def load_agents_from_hf():
|
| 561 |
"""
|
| 562 |
-
Load all
|
| 563 |
ALWAYS syncs with remote first to ensure we have the latest bot data.
|
| 564 |
"""
|
| 565 |
# MANDATORY: Sync with remote first to get latest bot data
|
| 566 |
-
print(f" Syncing bot_data repository to get latest
|
| 567 |
sync_agents_repo() # Will raise exception if sync fails
|
| 568 |
|
| 569 |
-
|
| 570 |
|
| 571 |
# Scan local directory for JSON files
|
| 572 |
if not os.path.exists(AGENTS_REPO_LOCAL_PATH):
|
|
@@ -574,7 +574,7 @@ def load_agents_from_hf():
|
|
| 574 |
|
| 575 |
# Walk through the directory to find all JSON files
|
| 576 |
files_processed = 0
|
| 577 |
-
print(f" Loading
|
| 578 |
|
| 579 |
for root, dirs, files in os.walk(AGENTS_REPO_LOCAL_PATH):
|
| 580 |
# Skip .git directory
|
|
@@ -592,7 +592,7 @@ def load_agents_from_hf():
|
|
| 592 |
with open(file_path, 'r', encoding='utf-8') as f:
|
| 593 |
agent_data = json.load(f)
|
| 594 |
|
| 595 |
-
# Only include active
|
| 596 |
if agent_data.get('status') != 'active':
|
| 597 |
continue
|
| 598 |
|
|
@@ -600,14 +600,14 @@ def load_agents_from_hf():
|
|
| 600 |
github_identifier = filename.replace('.json', '')
|
| 601 |
agent_data['github_identifier'] = github_identifier
|
| 602 |
|
| 603 |
-
|
| 604 |
|
| 605 |
except Exception as e:
|
| 606 |
print(f" ⚠ Error loading {filename}: {str(e)}")
|
| 607 |
continue
|
| 608 |
|
| 609 |
-
print(f" ✓ Loaded {len(
|
| 610 |
-
return
|
| 611 |
|
| 612 |
|
| 613 |
def get_pr_status_from_metadata(review_meta):
|
|
@@ -648,12 +648,12 @@ def calculate_review_stats_from_metadata(metadata_list):
|
|
| 648 |
}
|
| 649 |
|
| 650 |
|
| 651 |
-
def calculate_monthly_metrics_by_agent(all_metadata_dict,
|
| 652 |
-
"""Calculate monthly metrics for all
|
| 653 |
-
identifier_to_name = {
|
| 654 |
|
| 655 |
if not all_metadata_dict:
|
| 656 |
-
return {'
|
| 657 |
|
| 658 |
agent_month_data = defaultdict(lambda: defaultdict(list))
|
| 659 |
|
|
@@ -712,30 +712,30 @@ def calculate_monthly_metrics_by_agent(all_metadata_dict, agents):
|
|
| 712 |
agents_list = sorted(list(agent_month_data.keys()))
|
| 713 |
|
| 714 |
return {
|
| 715 |
-
'
|
| 716 |
'months': months,
|
| 717 |
'data': result_data
|
| 718 |
}
|
| 719 |
|
| 720 |
|
| 721 |
-
def construct_leaderboard_from_metadata(all_metadata_dict,
|
| 722 |
"""Construct leaderboard from in-memory review metadata."""
|
| 723 |
-
if not
|
| 724 |
-
print("Error: No
|
| 725 |
return {}
|
| 726 |
|
| 727 |
cache_dict = {}
|
| 728 |
|
| 729 |
-
for
|
| 730 |
-
identifier =
|
| 731 |
-
agent_name =
|
| 732 |
|
| 733 |
bot_metadata = all_metadata_dict.get(identifier, [])
|
| 734 |
stats = calculate_review_stats_from_metadata(bot_metadata)
|
| 735 |
|
| 736 |
cache_dict[identifier] = {
|
| 737 |
'name': agent_name,
|
| 738 |
-
'website':
|
| 739 |
'github_identifier': identifier,
|
| 740 |
**stats
|
| 741 |
}
|
|
@@ -789,7 +789,7 @@ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
|
|
| 789 |
|
| 790 |
def mine_all_agents():
|
| 791 |
"""
|
| 792 |
-
Mine review metadata for all
|
| 793 |
Downloads GHArchive data, then uses BATCH-based DuckDB queries.
|
| 794 |
"""
|
| 795 |
print(f"\n[1/4] Downloading GHArchive data...")
|
|
@@ -797,19 +797,19 @@ def mine_all_agents():
|
|
| 797 |
if not download_all_gharchive_data():
|
| 798 |
print("Warning: Download had errors, continuing with available data...")
|
| 799 |
|
| 800 |
-
print(f"\n[2/4] Loading
|
| 801 |
|
| 802 |
-
|
| 803 |
-
if not
|
| 804 |
-
print("Error: No
|
| 805 |
return
|
| 806 |
|
| 807 |
-
identifiers = [
|
| 808 |
if not identifiers:
|
| 809 |
-
print("Error: No valid
|
| 810 |
return
|
| 811 |
|
| 812 |
-
print(f"\n[3/4] Mining review metadata ({len(identifiers)}
|
| 813 |
|
| 814 |
try:
|
| 815 |
conn = get_duckdb_connection()
|
|
@@ -837,8 +837,8 @@ def mine_all_agents():
|
|
| 837 |
print(f"\n[4/4] Saving leaderboard...")
|
| 838 |
|
| 839 |
try:
|
| 840 |
-
leaderboard_dict = construct_leaderboard_from_metadata(all_metadata,
|
| 841 |
-
monthly_metrics = calculate_monthly_metrics_by_agent(all_metadata,
|
| 842 |
save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
|
| 843 |
|
| 844 |
except Exception as e:
|
|
@@ -872,7 +872,7 @@ def setup_scheduler():
|
|
| 872 |
mine_all_agents,
|
| 873 |
trigger=trigger,
|
| 874 |
id='mine_all_agents',
|
| 875 |
-
name='Mine GHArchive data for all
|
| 876 |
replace_existing=True
|
| 877 |
)
|
| 878 |
|
|
|
|
| 364 |
end_date: End datetime (timezone-aware)
|
| 365 |
|
| 366 |
Returns:
|
| 367 |
+
Dictionary mapping assistant identifier to list of review metadata
|
| 368 |
"""
|
| 369 |
identifier_list = ', '.join([f"'{id}'" for id in identifiers])
|
| 370 |
metadata_by_agent = defaultdict(list)
|
|
|
|
| 496 |
|
| 497 |
# Final summary
|
| 498 |
agents_with_data = sum(1 for reviews in metadata_by_agent.values() if reviews)
|
| 499 |
+
print(f"\n ✓ Complete: {total_reviews} reviews found for {agents_with_data}/{len(identifiers)} assistants")
|
| 500 |
|
| 501 |
return dict(metadata_by_agent)
|
| 502 |
|
|
|
|
| 559 |
|
| 560 |
def load_agents_from_hf():
|
| 561 |
"""
|
| 562 |
+
Load all assistant metadata JSON files from local git repository.
|
| 563 |
ALWAYS syncs with remote first to ensure we have the latest bot data.
|
| 564 |
"""
|
| 565 |
# MANDATORY: Sync with remote first to get latest bot data
|
| 566 |
+
print(f" Syncing bot_data repository to get latest assistants...")
|
| 567 |
sync_agents_repo() # Will raise exception if sync fails
|
| 568 |
|
| 569 |
+
assistants = []
|
| 570 |
|
| 571 |
# Scan local directory for JSON files
|
| 572 |
if not os.path.exists(AGENTS_REPO_LOCAL_PATH):
|
|
|
|
| 574 |
|
| 575 |
# Walk through the directory to find all JSON files
|
| 576 |
files_processed = 0
|
| 577 |
+
print(f" Loading assistant metadata from {AGENTS_REPO_LOCAL_PATH}...")
|
| 578 |
|
| 579 |
for root, dirs, files in os.walk(AGENTS_REPO_LOCAL_PATH):
|
| 580 |
# Skip .git directory
|
|
|
|
| 592 |
with open(file_path, 'r', encoding='utf-8') as f:
|
| 593 |
agent_data = json.load(f)
|
| 594 |
|
| 595 |
+
# Only include active assistants
|
| 596 |
if agent_data.get('status') != 'active':
|
| 597 |
continue
|
| 598 |
|
|
|
|
| 600 |
github_identifier = filename.replace('.json', '')
|
| 601 |
agent_data['github_identifier'] = github_identifier
|
| 602 |
|
| 603 |
+
assistants.append(agent_data)
|
| 604 |
|
| 605 |
except Exception as e:
|
| 606 |
print(f" ⚠ Error loading {filename}: {str(e)}")
|
| 607 |
continue
|
| 608 |
|
| 609 |
+
print(f" ✓ Loaded {len(assistants)} active assistants (from {files_processed} total files)")
|
| 610 |
+
return assistants
|
| 611 |
|
| 612 |
|
| 613 |
def get_pr_status_from_metadata(review_meta):
|
|
|
|
| 648 |
}
|
| 649 |
|
| 650 |
|
| 651 |
+
def calculate_monthly_metrics_by_agent(all_metadata_dict, assistants):
|
| 652 |
+
"""Calculate monthly metrics for all assistants for visualization."""
|
| 653 |
+
identifier_to_name = {assistant.get('github_identifier'): assistant.get('name') for assistant in assistants if assistant.get('github_identifier')}
|
| 654 |
|
| 655 |
if not all_metadata_dict:
|
| 656 |
+
return {'assistants': [], 'months': [], 'data': {}}
|
| 657 |
|
| 658 |
agent_month_data = defaultdict(lambda: defaultdict(list))
|
| 659 |
|
|
|
|
| 712 |
agents_list = sorted(list(agent_month_data.keys()))
|
| 713 |
|
| 714 |
return {
|
| 715 |
+
'assistants': agents_list,
|
| 716 |
'months': months,
|
| 717 |
'data': result_data
|
| 718 |
}
|
| 719 |
|
| 720 |
|
| 721 |
+
def construct_leaderboard_from_metadata(all_metadata_dict, assistants):
|
| 722 |
"""Construct leaderboard from in-memory review metadata."""
|
| 723 |
+
if not assistants:
|
| 724 |
+
print("Error: No assistants found")
|
| 725 |
return {}
|
| 726 |
|
| 727 |
cache_dict = {}
|
| 728 |
|
| 729 |
+
for assistant in assistants:
|
| 730 |
+
identifier = assistant.get('github_identifier')
|
| 731 |
+
agent_name = assistant.get('name', 'Unknown')
|
| 732 |
|
| 733 |
bot_metadata = all_metadata_dict.get(identifier, [])
|
| 734 |
stats = calculate_review_stats_from_metadata(bot_metadata)
|
| 735 |
|
| 736 |
cache_dict[identifier] = {
|
| 737 |
'name': agent_name,
|
| 738 |
+
'website': assistant.get('website', 'N/A'),
|
| 739 |
'github_identifier': identifier,
|
| 740 |
**stats
|
| 741 |
}
|
|
|
|
| 789 |
|
| 790 |
def mine_all_agents():
|
| 791 |
"""
|
| 792 |
+
Mine review metadata for all assistants using STREAMING batch processing.
|
| 793 |
Downloads GHArchive data, then uses BATCH-based DuckDB queries.
|
| 794 |
"""
|
| 795 |
print(f"\n[1/4] Downloading GHArchive data...")
|
|
|
|
| 797 |
if not download_all_gharchive_data():
|
| 798 |
print("Warning: Download had errors, continuing with available data...")
|
| 799 |
|
| 800 |
+
print(f"\n[2/4] Loading assistant metadata...")
|
| 801 |
|
| 802 |
+
assistants = load_agents_from_hf()
|
| 803 |
+
if not assistants:
|
| 804 |
+
print("Error: No assistants found")
|
| 805 |
return
|
| 806 |
|
| 807 |
+
identifiers = [assistant['github_identifier'] for assistant in assistants if assistant.get('github_identifier')]
|
| 808 |
if not identifiers:
|
| 809 |
+
print("Error: No valid assistant identifiers found")
|
| 810 |
return
|
| 811 |
|
| 812 |
+
print(f"\n[3/4] Mining review metadata ({len(identifiers)} assistants, {LEADERBOARD_TIME_FRAME_DAYS} days)...")
|
| 813 |
|
| 814 |
try:
|
| 815 |
conn = get_duckdb_connection()
|
|
|
|
| 837 |
print(f"\n[4/4] Saving leaderboard...")
|
| 838 |
|
| 839 |
try:
|
| 840 |
+
leaderboard_dict = construct_leaderboard_from_metadata(all_metadata, assistants)
|
| 841 |
+
monthly_metrics = calculate_monthly_metrics_by_agent(all_metadata, assistants)
|
| 842 |
save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
|
| 843 |
|
| 844 |
except Exception as e:
|
|
|
|
| 872 |
mine_all_agents,
|
| 873 |
trigger=trigger,
|
| 874 |
id='mine_all_agents',
|
| 875 |
+
name='Mine GHArchive data for all assistants',
|
| 876 |
replace_existing=True
|
| 877 |
)
|
| 878 |
|