import os, requests, re from datetime import datetime from typing import Optional COMMON_HEADERS = { 'Accept': 'application/vnd.github+json', 'Authorization': f'Bearer {os.environ["GITHUB_AUTH_TOKEN_CRAB"]}', 'X-Github-Api-Version': '2022-11-28', } def github_call(url): return requests.get(url, headers=COMMON_HEADERS) def get_comments(repo_url: str, pr_number: str) -> list[dict]: response = github_call(f'{repo_url}/pulls/{pr_number}/comments') return response.json() def get_commits(repo_url: str, pr_number: str) -> list[dict]: response = github_call(f'{repo_url}/pulls/{pr_number}/commits') return response.json() def parse_date(date: str) -> datetime: return datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ") def get_first_comment_date(comments: list[dict]) -> datetime: return min([parse_date(comment['created_at']) for comment in comments]) def get_useful_commits(commits: list[dict], first_comment_date: datetime) -> list[dict]: ret = [] for commit in commits: if ("commit" not in commit and "author" not in commit["author"] and "date" not in commit['commit']['author']): continue commit_date = parse_date(commit['commit']['author']['date']) if commit_date > first_comment_date: ret.append(commit) return ret def parse_hunk_header(hunk_header) -> Optional[dict]: """Extracts line ranges from a diff hunk header.""" match = re.match(r'@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@', hunk_header) if match: old_start = int(match.group(1)) old_count = int(match.group(2)) if match.group(2) else 1 new_start = int(match.group(3)) new_count = int(match.group(4)) if match.group(4) else 1 return { "old_range": { "start" : old_start, "end" : old_start + old_count - 1 }, "new_range": { "start" : new_start, "end" : new_start + new_count - 1 }, } return None def augment_comments(comments: list[dict]) -> list[dict]: ret = [] for comment in comments: new_comment = comment.copy() if "diff_hunk" not in comment: continue new_comment["hunk_range"] = parse_hunk_header(comment["diff_hunk"]) ret.append(new_comment) return ret def process_pull_request(repo_url: str, pr_number: str) -> bool: comments = augment_comments(get_comments(repo_url, pr_number)) if len(comments) == 0: # No comments, can't extract triplet return False first_comment_date = get_first_comment_date(comments) commits = get_commits(repo_url, pr_number) if len(commits) == 0: # No commits, can't extract triplet return False # filter out the commits that are older than the first comment, since they are the commits relevant for the PR actual_commits = get_useful_commits(commits, first_comment_date) if len(actual_commits) == 0: # No commits after the first comment, there were no revision from the contributor, so no triplet return False for commit in actual_commits: print(f"Commit: {commit['sha']}") print(f"Author: {commit['author']['login']}") print(f"Date: {commit['commit']['author']['date']}") print(f"Message: {commit['commit']['message']}") print("") return True if __name__ == "__main__": response = github_call('https://api.github.com/repos/cdk/cdk/pulls/1140/commits') response = github_call('https://api.github.com/repos/cdk/cdk/pulls/1140/commits') process_pull_request('https://api.github.com/repos/cdk/cdk', '1140')