zhimin-z commited on
Commit
767a7c9
·
1 Parent(s): bc2c415
Files changed (1) hide show
  1. msr.py +22 -6
msr.py CHANGED
@@ -402,28 +402,44 @@ def fetch_all_review_metadata_streaming(conn, identifiers, start_date, end_date)
402
  query = f"""
403
  WITH review_events AS (
404
  SELECT
405
- payload.pull_request.html_url as pr_url,
 
 
 
 
406
  actor.login as reviewer,
407
  COALESCE(payload.review.submitted_at, created_at) as reviewed_at
408
  FROM read_json({file_patterns_sql}, union_by_name=true, filename=true, compression='gzip', format='newline_delimited', ignore_errors=true, maximum_object_size=2147483648)
409
  WHERE
410
  type = 'PullRequestReviewEvent'
411
- AND payload.pull_request.html_url IS NOT NULL
412
  AND actor.login IN ({identifier_list})
413
  ),
414
  pr_status AS (
415
  SELECT
416
- payload.pull_request.html_url as pr_url,
 
 
 
 
417
  payload.pull_request.merged as is_merged,
418
  payload.pull_request.merged_at as merged_at,
419
  payload.pull_request.closed_at as closed_at,
420
- ROW_NUMBER() OVER (PARTITION BY payload.pull_request.html_url ORDER BY created_at DESC) as rn
 
 
 
 
421
  FROM read_json({file_patterns_sql}, union_by_name=true, filename=true, compression='gzip', format='newline_delimited', ignore_errors=true, maximum_object_size=2147483648)
422
  WHERE
423
  type = 'PullRequestEvent'
424
  AND payload.action = 'closed'
425
- AND payload.pull_request.html_url IS NOT NULL
426
- AND payload.pull_request.html_url IN (SELECT DISTINCT pr_url FROM review_events)
 
 
 
 
427
  )
428
  SELECT
429
  re.reviewer,
 
402
  query = f"""
403
  WITH review_events AS (
404
  SELECT
405
+ CONCAT(
406
+ REPLACE(repo.url, 'api.github.com/repos/', 'github.com/'),
407
+ '/pull/',
408
+ CAST(payload.pull_request.number AS VARCHAR)
409
+ ) as pr_url,
410
  actor.login as reviewer,
411
  COALESCE(payload.review.submitted_at, created_at) as reviewed_at
412
  FROM read_json({file_patterns_sql}, union_by_name=true, filename=true, compression='gzip', format='newline_delimited', ignore_errors=true, maximum_object_size=2147483648)
413
  WHERE
414
  type = 'PullRequestReviewEvent'
415
+ AND payload.pull_request.number IS NOT NULL
416
  AND actor.login IN ({identifier_list})
417
  ),
418
  pr_status AS (
419
  SELECT
420
+ CONCAT(
421
+ REPLACE(repo.url, 'api.github.com/repos/', 'github.com/'),
422
+ '/pull/',
423
+ CAST(payload.pull_request.number AS VARCHAR)
424
+ ) as pr_url,
425
  payload.pull_request.merged as is_merged,
426
  payload.pull_request.merged_at as merged_at,
427
  payload.pull_request.closed_at as closed_at,
428
+ ROW_NUMBER() OVER (PARTITION BY CONCAT(
429
+ REPLACE(repo.url, 'api.github.com/repos/', 'github.com/'),
430
+ '/pull/',
431
+ CAST(payload.pull_request.number AS VARCHAR)
432
+ ) ORDER BY created_at DESC) as rn
433
  FROM read_json({file_patterns_sql}, union_by_name=true, filename=true, compression='gzip', format='newline_delimited', ignore_errors=true, maximum_object_size=2147483648)
434
  WHERE
435
  type = 'PullRequestEvent'
436
  AND payload.action = 'closed'
437
+ AND payload.pull_request.number IS NOT NULL
438
+ AND CONCAT(
439
+ REPLACE(repo.url, 'api.github.com/repos/', 'github.com/'),
440
+ '/pull/',
441
+ CAST(payload.pull_request.number AS VARCHAR)
442
+ ) IN (SELECT DISTINCT pr_url FROM review_events)
443
  )
444
  SELECT
445
  re.reviewer,