formatted file

This commit is contained in:
Karma Riuk
2025-05-21 09:18:33 +02:00
parent d48c5d04b8
commit a8ccf081a2

View File

@ -3,17 +3,18 @@ from datetime import datetime
import pandas as pd import pandas as pd
from tqdm import tqdm from tqdm import tqdm
from github import Github from github import Github
from utils import has_only_1_round_of_comments, has_only_1_comment, move_github_logging_to_file from utils import has_only_1_round_of_comments, has_only_1_comment, move_logger_to_file
tqdm.pandas() tqdm.pandas()
# Initialize GitHub API client # Initialize GitHub API client
g = Github(os.environ["GITHUB_AUTH_TOKEN_CRAB"]) g = Github(os.environ["GITHUB_AUTH_TOKEN_CRAB"])
def process_pull(repo, pull): def process_pull(repo, pull):
commits = pull.get_commits() commits = pull.get_commits()
comments = pull.get_review_comments() comments = pull.get_review_comments()
return { return {
"repo": repo.full_name, "repo": repo.full_name,
"pr_number": pull.number, "pr_number": pull.number,
@ -24,24 +25,26 @@ def process_pull(repo, pull):
"has_only_1_comment": has_only_1_comment(commits, comments), "has_only_1_comment": has_only_1_comment(commits, comments),
} }
def process_repo(repo_name): def process_repo(repo_name):
repo = g.get_repo(repo_name) repo = g.get_repo(repo_name)
stats = [] stats = []
with tqdm(list(repo.get_pulls(state="closed")), desc=repo_name, leave=False) as pbar: with tqdm(list(repo.get_pulls(state="closed")), desc=repo_name, leave=False) as pbar:
for pull in pbar: for pull in pbar:
pbar.set_postfix({"started at": datetime.now().strftime("%d/%m, %H:%M:%S")}) pbar.set_postfix({"started at": datetime.now().strftime("%d/%m, %H:%M:%S")})
if not pull.merged_at: if not pull.merged_at:
continue continue
stats.append(process_pull(repo, pull)) stats.append(process_pull(repo, pull))
return stats return stats
def main(): def main():
repos = pd.read_csv("results.csv") repos = pd.read_csv("results.csv")
repos = repos[(repos["good_repo_for_crab"] == True) & (repos["n_tests"] > 0)] repos = repos[(repos["good_repo_for_crab"] == True) & (repos["n_tests"] > 0)]
stats = [] stats = []
try: try:
for _, row in tqdm(repos.iterrows(), total=len(repos)): for _, row in tqdm(repos.iterrows(), total=len(repos)):
if "name" not in row or not isinstance(row["name"], str): if "name" not in row or not isinstance(row["name"], str):
@ -51,6 +54,7 @@ def main():
finally: finally:
pd.DataFrame(stats).to_csv("pr_stats.csv", index=False) pd.DataFrame(stats).to_csv("pr_stats.csv", index=False)
if __name__ == "__main__": if __name__ == "__main__":
move_github_logging_to_file() move_logger_to_file()
main() main()