zhimin-z commited on
Commit
0118014
·
1 Parent(s): 73d682a
Files changed (3) hide show
  1. Dockerfile +0 -7
  2. docker-compose.yml +2 -4
  3. msr.py +26 -122
Dockerfile CHANGED
@@ -9,16 +9,9 @@ RUN apt-get update && apt-get install -y \
9
  g++ \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
- # Copy requirements first to leverage Docker cache
13
- COPY requirements.txt .
14
-
15
  # Install Python dependencies
16
  RUN pip install --no-cache-dir -r requirements.txt
17
 
18
- # Copy application code
19
- COPY msr.py .
20
- COPY .env .env
21
-
22
  # Set environment variables
23
  ENV PYTHONUNBUFFERED=1
24
 
 
9
  g++ \
10
  && rm -rf /var/lib/apt/lists/*
11
 
 
 
 
12
  # Install Python dependencies
13
  RUN pip install --no-cache-dir -r requirements.txt
14
 
 
 
 
 
15
  # Set environment variables
16
  ENV PYTHONUNBUFFERED=1
17
 
docker-compose.yml CHANGED
@@ -10,12 +10,10 @@ services:
10
  env_file:
11
  - .env
12
  volumes:
 
 
13
  # Mount gharchive data directory
14
  - ../gharchive/data:/gharchive/data:ro
15
- # Persist DuckDB cache
16
- - ./gharchive_cache.duckdb:/app/gharchive_cache.duckdb
17
- # Persist logs
18
- - ./logs:/app/logs
19
  environment:
20
  - PYTHONUNBUFFERED=1
21
  logging:
 
10
  env_file:
11
  - .env
12
  volumes:
13
+ # Mount entire workspace for live code updates
14
+ - .:/app
15
  # Mount gharchive data directory
16
  - ../gharchive/data:/gharchive/data:ro
 
 
 
 
17
  environment:
18
  - PYTHONUNBUFFERED=1
19
  logging:
msr.py CHANGED
@@ -137,7 +137,6 @@ def download_file(url):
137
 
138
  # Skip if json.gz already exists
139
  if os.path.exists(filepath):
140
- print(f" ✓ {filename} (already exists)")
141
  return True
142
 
143
  # Download with retry logic
@@ -147,7 +146,6 @@ def download_file(url):
147
  response.raise_for_status()
148
  with open(filepath, "wb") as f:
149
  f.write(response.content)
150
- print(f" ✓ {filename} (downloaded)")
151
  return True
152
 
153
  except requests.exceptions.HTTPError as e:
@@ -194,10 +192,6 @@ def download_all_gharchive_data():
194
  Returns:
195
  bool: True if all downloads completed (some may have failed), False if critical error
196
  """
197
- print(f"\n{'='*80}")
198
- print(f"DOWNLOADING GHARCHIVE DATA")
199
- print(f"{'='*80}")
200
-
201
  # Create data directory if it doesn't exist
202
  os.makedirs(GHARCHIVE_DATA_DIR, exist_ok=True)
203
 
@@ -215,11 +209,6 @@ def download_all_gharchive_data():
215
  urls.append(url)
216
  current_date += timedelta(days=1)
217
 
218
- print(f"Downloading {len(urls)} files ({len(urls)//24} days × 24 hours)")
219
- print(f"Workers: {DOWNLOAD_WORKERS}")
220
- print(f"Target directory: {GHARCHIVE_DATA_DIR}")
221
- print(f"{'='*80}\n")
222
-
223
  downloads_processed = 0
224
 
225
  try:
@@ -230,18 +219,12 @@ def download_all_gharchive_data():
230
  # Wait for downloads to complete
231
  for future in as_completed(futures):
232
  downloads_processed += 1
233
- if downloads_processed % 100 == 0:
234
- print(f" Progress: {downloads_processed}/{len(urls)} files processed ({downloads_processed*100//len(urls)}%)")
235
 
236
- print(f"\n{'='*80}")
237
- print(f"Download complete: {downloads_processed}/{len(urls)} files processed")
238
- print(f"{'='*80}\n")
239
  return True
240
 
241
  except Exception as e:
242
- print(f"\n{'='*80}")
243
  print(f"Error during download: {str(e)}")
244
- print(f"{'='*80}\n")
245
  import traceback
246
  traceback.print_exc()
247
  return False
@@ -435,9 +418,6 @@ def fetch_all_pr_metadata_single_query(conn, identifiers, start_date, end_date):
435
  ...
436
  }
437
  """
438
- print(f"Querying DuckDB for ALL {len(identifiers)} agents in ONE QUERY")
439
- print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
440
-
441
  # Generate file path patterns for review period
442
  review_patterns = generate_file_path_patterns(start_date, end_date)
443
 
@@ -530,13 +510,6 @@ def fetch_all_pr_metadata_single_query(conn, identifiers, start_date, end_date):
530
  ORDER BY re.reviewer, re.reviewed_at DESC
531
  """
532
 
533
- # Calculate number of days for reporting
534
- review_days = (end_date - start_date).days
535
- status_days = (end_date - status_start_date).days
536
-
537
- print(f" Querying {review_days} days for reviews, {status_days} days for PR status...")
538
- print(f" Agents: {', '.join(identifiers[:5])}{'...' if len(identifiers) > 5 else ''}")
539
-
540
  try:
541
  # Create cache table name based on date range
542
  cache_table_name = f"pr_cache_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}"
@@ -548,14 +521,12 @@ def fetch_all_pr_metadata_single_query(conn, identifiers, start_date, end_date):
548
  """).fetchone()[0] > 0
549
 
550
  if cache_exists:
551
- print(f" Using cached results from table {cache_table_name}")
552
  results = conn.execute(f"""
553
  SELECT reviewer, url, reviewed_at, merged_at, closed_at
554
  FROM {cache_table_name}
555
  WHERE reviewer IN ({identifier_list})
556
  """).fetchall()
557
  else:
558
- print(f" Cache miss - executing full query and caching to {cache_table_name}")
559
  # Execute query with parameters
560
  results = conn.execute(query, {'review_patterns': review_patterns, 'status_patterns': status_patterns}).fetchall()
561
 
@@ -575,9 +546,6 @@ def fetch_all_pr_metadata_single_query(conn, identifiers, start_date, end_date):
575
  [r[3] for r in results],
576
  [r[4] for r in results]
577
  ])
578
- print(f" Cached {len(results)} results to {cache_table_name}")
579
-
580
- print(f" Found {len(results)} total PR review records across all agents")
581
 
582
  # Group results by agent
583
  metadata_by_agent = defaultdict(list)
@@ -596,22 +564,11 @@ def fetch_all_pr_metadata_single_query(conn, identifiers, start_date, end_date):
596
  'closed_at': closed_at,
597
  })
598
 
599
- # Print breakdown by agent
600
- print(f"Results breakdown by agent:")
601
- for identifier in identifiers:
602
- count = len(metadata_by_agent.get(identifier, []))
603
- if count > 0:
604
- metadata = metadata_by_agent[identifier]
605
- merged_count = sum(1 for m in metadata if m['merged_at'] is not None)
606
- closed_count = sum(1 for m in metadata if m['closed_at'] is not None and m['merged_at'] is None)
607
- open_count = count - merged_count - closed_count
608
- print(f" {identifier}: {count} PRs ({merged_count} merged, {closed_count} closed, {open_count} open)")
609
-
610
  # Convert defaultdict to regular dict
611
  return dict(metadata_by_agent)
612
 
613
  except Exception as e:
614
- print(f" DuckDB error: {str(e)}")
615
  import traceback
616
  traceback.print_exc()
617
  return {}
@@ -710,23 +667,17 @@ def batch_upload_review_metadata(all_metadata):
710
  grouped = group_metadata_by_date(metadata_list)
711
  total_files += len(grouped)
712
 
713
- print(f"\n{'='*80}")
714
- print(f"Starting batch upload: {len(all_metadata)} agents, {total_files} total files")
715
- print(f"Upload delay: {UPLOAD_DELAY_SECONDS}s between files")
716
- print(f"{'='*80}\n")
717
 
718
  file_count = 0
719
 
720
  for agent_idx, (agent_identifier, metadata_list) in enumerate(all_metadata.items(), 1):
721
  if not metadata_list:
722
- print(f"[{agent_idx}/{len(all_metadata)}] Skipping {agent_identifier} (no data)")
723
  continue
724
 
725
  # Group by date
726
  grouped = group_metadata_by_date(metadata_list)
727
 
728
- print(f"[{agent_idx}/{len(all_metadata)}] Uploading {len(grouped)} files for {agent_identifier}...")
729
-
730
  # Create temporary files for this agent
731
  agent_temp_dir = tempfile.mkdtemp()
732
 
@@ -752,8 +703,6 @@ def batch_upload_review_metadata(all_metadata):
752
  for file_idx, (local_path, repo_path, review_count) in enumerate(local_files, 1):
753
  file_count += 1
754
 
755
- print(f" [{file_count}/{total_files}] Uploading {repo_path} ({review_count} reviews)...", end='')
756
-
757
  if upload_single_file_with_retry(
758
  api=api,
759
  local_path=local_path,
@@ -773,20 +722,16 @@ def batch_upload_review_metadata(all_metadata):
773
  if file_idx < len(local_files):
774
  time.sleep(UPLOAD_DELAY_SECONDS)
775
 
776
- print(f" Agent {agent_identifier}: {agent_success} uploaded, {agent_error} errors\n")
777
-
778
  finally:
779
  # Clean up temp directory
780
  if os.path.exists(agent_temp_dir):
781
  import shutil
782
  shutil.rmtree(agent_temp_dir)
783
 
784
- print(f"\n{'='*80}")
785
- print(f"Batch upload complete!")
786
- print(f" Total files: {total_files}")
787
- print(f" Successful: {success_count}")
788
- print(f" Errors: {error_count}")
789
- print(f"{'='*80}\n")
790
 
791
  return success_count, error_count
792
 
@@ -813,8 +758,6 @@ def load_agents_from_hf():
813
  # Filter for JSON files only
814
  json_files = [f for f in files if f.endswith('.json')]
815
 
816
- print(f"Found {len(json_files)} agent files in {AGENTS_REPO}")
817
-
818
  # Download and parse each JSON file
819
  for json_file in json_files:
820
  try:
@@ -838,10 +781,9 @@ def load_agents_from_hf():
838
  agents.append(agent_data)
839
 
840
  except Exception as e:
841
- print(f"Warning: Could not load {json_file}: {str(e)}")
842
  continue
843
 
844
- print(f"Loaded {len(agents)} agents from HuggingFace")
845
  return agents
846
 
847
  except Exception as e:
@@ -1010,14 +952,10 @@ def construct_leaderboard_from_metadata(all_metadata_dict, agents):
1010
  Returns:
1011
  Dictionary of agent stats.
1012
  """
1013
- print("Constructing leaderboard from review metadata...")
1014
-
1015
  if not agents:
1016
- print("No agents found")
1017
  return {}
1018
 
1019
- print(f"Processing {len(agents)} agents")
1020
-
1021
  cache_dict = {}
1022
 
1023
  for agent in agents:
@@ -1037,8 +975,6 @@ def construct_leaderboard_from_metadata(all_metadata_dict, agents):
1037
  **stats
1038
  }
1039
 
1040
- print(f"Constructed cache with {len(cache_dict)} agent entries")
1041
-
1042
  return cache_dict
1043
 
1044
 
@@ -1077,7 +1013,6 @@ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
1077
 
1078
  try:
1079
  # Upload to HuggingFace with retry logic
1080
- print(f"Uploading leaderboard data...", end='')
1081
  upload_file_with_backoff(
1082
  api=api,
1083
  path_or_fileobj=filename,
@@ -1085,7 +1020,6 @@ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
1085
  repo_id=LEADERBOARD_REPO,
1086
  repo_type="dataset"
1087
  )
1088
- print(f"Saved leaderboard data to HuggingFace: {filename}")
1089
  return True
1090
  finally:
1091
  # Always clean up local file
@@ -1109,35 +1043,26 @@ def mine_all_agents():
1109
  Downloads GHArchive data first, then uses ONE DuckDB query for ALL agents, then batch uploads with time gaps.
1110
  """
1111
  # Step 1: Download GHArchive data
1112
- print(f"\n{'='*80}")
1113
- print(f"STEP 1: DOWNLOADING GHARCHIVE DATA")
1114
- print(f"{'='*80}\n")
1115
 
1116
  if not download_all_gharchive_data():
1117
- print("Warning: Download had errors, but continuing with available data...")
1118
 
1119
  # Step 2: Load agent metadata from HuggingFace
1120
- print(f"\n{'='*80}")
1121
- print(f"STEP 2: LOADING AGENT METADATA")
1122
- print(f"{'='*80}\n")
1123
 
1124
  agents = load_agents_from_hf()
1125
  if not agents:
1126
- print("No agents found in HuggingFace dataset")
1127
  return
1128
 
1129
  # Extract all identifiers
1130
  identifiers = [agent['github_identifier'] for agent in agents if agent.get('github_identifier')]
1131
  if not identifiers:
1132
- print("No valid agent identifiers found")
1133
  return
1134
 
1135
- print(f"\n{'='*80}")
1136
- print(f"STEP 3: MINING REVIEW METADATA")
1137
- print(f"{'='*80}")
1138
- print(f"Agents: {len(identifiers)}")
1139
- print(f"Time frame: Last {LEADERBOARD_TIME_FRAME_DAYS} days")
1140
- print(f"{'='*80}")
1141
 
1142
  # Initialize DuckDB connection
1143
  try:
@@ -1161,12 +1086,7 @@ def mine_all_agents():
1161
  total_prs = sum(len(metadata_list) for metadata_list in all_metadata.values())
1162
  agents_with_data = sum(1 for metadata_list in all_metadata.values() if metadata_list)
1163
 
1164
- print(f"\n{'='*80}")
1165
- print(f"DuckDB query complete!")
1166
- print(f" Total agents: {len(agents)}")
1167
- print(f" Agents with data: {agents_with_data}")
1168
- print(f" Total PRs found: {total_prs}")
1169
- print(f"{'='*80}")
1170
 
1171
  except Exception as e:
1172
  print(f"Error during DuckDB fetch: {str(e)}")
@@ -1178,39 +1098,27 @@ def mine_all_agents():
1178
  conn.close()
1179
 
1180
  # Step 4: Batch upload review metadata with time gaps
1181
- print(f"\n{'='*80}")
1182
- print(f"STEP 4: UPLOADING REVIEW METADATA")
1183
- print(f"{'='*80}\n")
1184
 
1185
  success_count, error_count = batch_upload_review_metadata(all_metadata)
1186
 
1187
  # Step 5: Construct and save leaderboard data
1188
- print(f"\n{'='*80}")
1189
- print(f"STEP 5: CONSTRUCTING AND SAVING LEADERBOARD")
1190
- print(f"{'='*80}\n")
1191
 
1192
  try:
1193
  # Construct leaderboard from in-memory data
1194
  leaderboard_dict = construct_leaderboard_from_metadata(all_metadata, agents)
1195
 
1196
  # Calculate monthly metrics from in-memory data
1197
- print(f"Calculating monthly metrics...")
1198
  monthly_metrics = calculate_monthly_metrics_by_agent(all_metadata, agents)
1199
 
1200
  # Save to HuggingFace
1201
- print(f"Saving leaderboard data to HuggingFace...")
1202
  save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
1203
 
1204
- print(f"\n{'='*80}")
1205
- print(f"ALL TASKS COMPLETE!")
1206
- print(f" Review metadata: {success_count} files uploaded, {error_count} errors")
1207
- print(f" Leaderboard entries: {len(leaderboard_dict)}")
1208
- print(f" Monthly data points: {len(monthly_metrics.get('months', []))} months")
1209
- print(f" Saved to: {LEADERBOARD_REPO}/swe-review.json")
1210
- print(f"{'='*80}")
1211
 
1212
  except Exception as e:
1213
- print(f"Failed to construct/save leaderboard data: {str(e)}")
1214
  import traceback
1215
  traceback.print_exc()
1216
 
@@ -1258,21 +1166,17 @@ def setup_scheduler():
1258
  )
1259
 
1260
  # Print schedule information
1261
- print(f"{'='*80}")
1262
- print(f"SCHEDULER CONFIGURED")
1263
- print(f"{'='*80}")
1264
- print(f"Schedule: Monthly on day {SCHEDULE_DAY_OF_MONTH} at {SCHEDULE_HOUR:02d}:{SCHEDULE_MINUTE:02d} {SCHEDULE_TIMEZONE}")
1265
- print(f"Next run: {scheduler.get_jobs()[0].next_run_time}")
1266
- print(f"{'='*80}\n")
1267
 
1268
  # Run immediately on startup
1269
- print("Running initial mining job on startup...")
1270
  mine_all_agents()
1271
 
1272
  # Start scheduler (blocking call)
1273
- print(f"\n{'='*80}")
1274
- print("Starting scheduler... (Press Ctrl+C to exit)")
1275
- print(f"{'='*80}\n")
1276
 
1277
  try:
1278
  scheduler.start()
 
137
 
138
  # Skip if json.gz already exists
139
  if os.path.exists(filepath):
 
140
  return True
141
 
142
  # Download with retry logic
 
146
  response.raise_for_status()
147
  with open(filepath, "wb") as f:
148
  f.write(response.content)
 
149
  return True
150
 
151
  except requests.exceptions.HTTPError as e:
 
192
  Returns:
193
  bool: True if all downloads completed (some may have failed), False if critical error
194
  """
 
 
 
 
195
  # Create data directory if it doesn't exist
196
  os.makedirs(GHARCHIVE_DATA_DIR, exist_ok=True)
197
 
 
209
  urls.append(url)
210
  current_date += timedelta(days=1)
211
 
 
 
 
 
 
212
  downloads_processed = 0
213
 
214
  try:
 
219
  # Wait for downloads to complete
220
  for future in as_completed(futures):
221
  downloads_processed += 1
 
 
222
 
223
+ print(f"Download complete: {downloads_processed} files")
 
 
224
  return True
225
 
226
  except Exception as e:
 
227
  print(f"Error during download: {str(e)}")
 
228
  import traceback
229
  traceback.print_exc()
230
  return False
 
418
  ...
419
  }
420
  """
 
 
 
421
  # Generate file path patterns for review period
422
  review_patterns = generate_file_path_patterns(start_date, end_date)
423
 
 
510
  ORDER BY re.reviewer, re.reviewed_at DESC
511
  """
512
 
 
 
 
 
 
 
 
513
  try:
514
  # Create cache table name based on date range
515
  cache_table_name = f"pr_cache_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}"
 
521
  """).fetchone()[0] > 0
522
 
523
  if cache_exists:
 
524
  results = conn.execute(f"""
525
  SELECT reviewer, url, reviewed_at, merged_at, closed_at
526
  FROM {cache_table_name}
527
  WHERE reviewer IN ({identifier_list})
528
  """).fetchall()
529
  else:
 
530
  # Execute query with parameters
531
  results = conn.execute(query, {'review_patterns': review_patterns, 'status_patterns': status_patterns}).fetchall()
532
 
 
546
  [r[3] for r in results],
547
  [r[4] for r in results]
548
  ])
 
 
 
549
 
550
  # Group results by agent
551
  metadata_by_agent = defaultdict(list)
 
564
  'closed_at': closed_at,
565
  })
566
 
 
 
 
 
 
 
 
 
 
 
 
567
  # Convert defaultdict to regular dict
568
  return dict(metadata_by_agent)
569
 
570
  except Exception as e:
571
+ print(f"DuckDB error: {str(e)}")
572
  import traceback
573
  traceback.print_exc()
574
  return {}
 
667
  grouped = group_metadata_by_date(metadata_list)
668
  total_files += len(grouped)
669
 
670
+ print(f"Uploading {total_files} files for {len(all_metadata)} agents...")
 
 
 
671
 
672
  file_count = 0
673
 
674
  for agent_idx, (agent_identifier, metadata_list) in enumerate(all_metadata.items(), 1):
675
  if not metadata_list:
 
676
  continue
677
 
678
  # Group by date
679
  grouped = group_metadata_by_date(metadata_list)
680
 
 
 
681
  # Create temporary files for this agent
682
  agent_temp_dir = tempfile.mkdtemp()
683
 
 
703
  for file_idx, (local_path, repo_path, review_count) in enumerate(local_files, 1):
704
  file_count += 1
705
 
 
 
706
  if upload_single_file_with_retry(
707
  api=api,
708
  local_path=local_path,
 
722
  if file_idx < len(local_files):
723
  time.sleep(UPLOAD_DELAY_SECONDS)
724
 
 
 
725
  finally:
726
  # Clean up temp directory
727
  if os.path.exists(agent_temp_dir):
728
  import shutil
729
  shutil.rmtree(agent_temp_dir)
730
 
731
+ if error_count > 0:
732
+ print(f"Upload complete: {success_count}/{total_files} succeeded, {error_count} errors")
733
+ else:
734
+ print(f"Upload complete: {success_count}/{total_files} files")
 
 
735
 
736
  return success_count, error_count
737
 
 
758
  # Filter for JSON files only
759
  json_files = [f for f in files if f.endswith('.json')]
760
 
 
 
761
  # Download and parse each JSON file
762
  for json_file in json_files:
763
  try:
 
781
  agents.append(agent_data)
782
 
783
  except Exception as e:
784
+ print(f"Error loading {json_file}: {str(e)}")
785
  continue
786
 
 
787
  return agents
788
 
789
  except Exception as e:
 
952
  Returns:
953
  Dictionary of agent stats.
954
  """
 
 
955
  if not agents:
956
+ print("Error: No agents found")
957
  return {}
958
 
 
 
959
  cache_dict = {}
960
 
961
  for agent in agents:
 
975
  **stats
976
  }
977
 
 
 
978
  return cache_dict
979
 
980
 
 
1013
 
1014
  try:
1015
  # Upload to HuggingFace with retry logic
 
1016
  upload_file_with_backoff(
1017
  api=api,
1018
  path_or_fileobj=filename,
 
1020
  repo_id=LEADERBOARD_REPO,
1021
  repo_type="dataset"
1022
  )
 
1023
  return True
1024
  finally:
1025
  # Always clean up local file
 
1043
  Downloads GHArchive data first, then uses ONE DuckDB query for ALL agents, then batch uploads with time gaps.
1044
  """
1045
  # Step 1: Download GHArchive data
1046
+ print(f"\n[1/5] Downloading GHArchive data...")
 
 
1047
 
1048
  if not download_all_gharchive_data():
1049
+ print("Warning: Download had errors, continuing with available data...")
1050
 
1051
  # Step 2: Load agent metadata from HuggingFace
1052
+ print(f"\n[2/5] Loading agent metadata...")
 
 
1053
 
1054
  agents = load_agents_from_hf()
1055
  if not agents:
1056
+ print("Error: No agents found")
1057
  return
1058
 
1059
  # Extract all identifiers
1060
  identifiers = [agent['github_identifier'] for agent in agents if agent.get('github_identifier')]
1061
  if not identifiers:
1062
+ print("Error: No valid agent identifiers found")
1063
  return
1064
 
1065
+ print(f"\n[3/5] Mining review metadata ({len(identifiers)} agents, {LEADERBOARD_TIME_FRAME_DAYS} days)...")
 
 
 
 
 
1066
 
1067
  # Initialize DuckDB connection
1068
  try:
 
1086
  total_prs = sum(len(metadata_list) for metadata_list in all_metadata.values())
1087
  agents_with_data = sum(1 for metadata_list in all_metadata.values() if metadata_list)
1088
 
1089
+ print(f"Query complete: {total_prs} PRs found for {agents_with_data}/{len(agents)} agents")
 
 
 
 
 
1090
 
1091
  except Exception as e:
1092
  print(f"Error during DuckDB fetch: {str(e)}")
 
1098
  conn.close()
1099
 
1100
  # Step 4: Batch upload review metadata with time gaps
1101
+ print(f"\n[4/5] Uploading review metadata...")
 
 
1102
 
1103
  success_count, error_count = batch_upload_review_metadata(all_metadata)
1104
 
1105
  # Step 5: Construct and save leaderboard data
1106
+ print(f"\n[5/5] Saving leaderboard...")
 
 
1107
 
1108
  try:
1109
  # Construct leaderboard from in-memory data
1110
  leaderboard_dict = construct_leaderboard_from_metadata(all_metadata, agents)
1111
 
1112
  # Calculate monthly metrics from in-memory data
 
1113
  monthly_metrics = calculate_monthly_metrics_by_agent(all_metadata, agents)
1114
 
1115
  # Save to HuggingFace
 
1116
  save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
1117
 
1118
+ print(f"\nCOMPLETE: {success_count} files uploaded" + (f", {error_count} errors" if error_count > 0 else ""))
 
 
 
 
 
 
1119
 
1120
  except Exception as e:
1121
+ print(f"Error saving leaderboard: {str(e)}")
1122
  import traceback
1123
  traceback.print_exc()
1124
 
 
1166
  )
1167
 
1168
  # Print schedule information
1169
+ from datetime import datetime
1170
+ next_run = trigger.get_next_fire_time(None, datetime.now(trigger.timezone))
1171
+ print(f"Scheduler: Monthly on day {SCHEDULE_DAY_OF_MONTH} at {SCHEDULE_HOUR:02d}:{SCHEDULE_MINUTE:02d} {SCHEDULE_TIMEZONE}")
1172
+ print(f"Next run: {next_run}\n")
 
 
1173
 
1174
  # Run immediately on startup
1175
+ print("Running initial mining job...")
1176
  mine_all_agents()
1177
 
1178
  # Start scheduler (blocking call)
1179
+ print(f"\nScheduler started (Press Ctrl+C to exit)")
 
 
1180
 
1181
  try:
1182
  scheduler.start()