mirror of
https://github.com/karma-riuk/crab.git
synced 2025-07-05 05:28:13 +02:00
removed useless paramater
This commit is contained in:
@ -11,19 +11,17 @@ from dataset import Dataset, DatasetEntry, FileData, Metadata, Diff
|
|||||||
from utils import has_only_1_comment
|
from utils import has_only_1_comment
|
||||||
|
|
||||||
|
|
||||||
def get_good_projects(csv_file: str, verbose: bool = False) -> pd.DataFrame:
|
def get_good_projects(csv_file: str) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
Extracts the good (the ones that compile and test successfully, and that
|
Extracts the good (the ones that compile and test successfully, and that
|
||||||
have at least one test) from the given file.
|
have at least one test) from the given file.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
csv_file (str): The csv file containing the projects.
|
csv_file (str): The csv file containing the projects.
|
||||||
verbose (bool): Whether to print the number of good projects.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
pd.DataFrame: The good projects.
|
pd.DataFrame: The good projects.
|
||||||
"""
|
"""
|
||||||
if verbose: print(f"Reading {csv_file}...")
|
|
||||||
df = pd.read_csv(csv_file)
|
df = pd.read_csv(csv_file)
|
||||||
return df.loc[(df['good_repo_for_crab'] == True) & (df['n_tests'] > 0)]
|
return df.loc[(df['good_repo_for_crab'] == True) & (df['n_tests'] > 0)]
|
||||||
|
|
||||||
@ -83,7 +81,7 @@ def process_pull(repo: Repository, pr: PullRequest, dataset: Dataset):
|
|||||||
diffs_after=diffs_after,
|
diffs_after=diffs_after,
|
||||||
))
|
))
|
||||||
|
|
||||||
def process_repo(repo_name: str, stats_df: Optional[pd.DataFrame], dataset: Dataset, verbose: bool = False):
|
def process_repo(repo_name: str, stats_df: Optional[pd.DataFrame], dataset: Dataset):
|
||||||
good_prs = []
|
good_prs = []
|
||||||
repo = g.get_repo(repo_name)
|
repo = g.get_repo(repo_name)
|
||||||
good_prs = get_good_prs(repo, stats_df)
|
good_prs = get_good_prs(repo, stats_df)
|
||||||
@ -91,7 +89,7 @@ def process_repo(repo_name: str, stats_df: Optional[pd.DataFrame], dataset: Data
|
|||||||
for pr in tqdm(good_prs, desc="Processing good prs", leave=False):
|
for pr in tqdm(good_prs, desc="Processing good prs", leave=False):
|
||||||
process_pull(repo, pr, dataset)
|
process_pull(repo, pr, dataset)
|
||||||
|
|
||||||
def process_repos(csv_file: str, stats_csv: Optional[str], dataset: Dataset, verbose: bool = False):
|
def process_repos(csv_file: str, stats_csv: Optional[str], dataset: Dataset):
|
||||||
"""
|
"""
|
||||||
Processes the repos in the given csv file, extracting the good ones and
|
Processes the repos in the given csv file, extracting the good ones and
|
||||||
creating the "triplets" for the dataset.
|
creating the "triplets" for the dataset.
|
||||||
@ -102,7 +100,7 @@ def process_repos(csv_file: str, stats_csv: Optional[str], dataset: Dataset, ver
|
|||||||
Passing it by reference in order have the latest information, in case of an error
|
Passing it by reference in order have the latest information, in case of an error
|
||||||
verbose (bool): Whether to be verbose or not
|
verbose (bool): Whether to be verbose or not
|
||||||
"""
|
"""
|
||||||
df = get_good_projects(csv_file, verbose=verbose)
|
df = get_good_projects(csv_file)
|
||||||
stats_df = pd.read_csv(stats_csv) if stats_csv is not None else None
|
stats_df = pd.read_csv(stats_csv) if stats_csv is not None else None
|
||||||
already_processed_repos = []
|
already_processed_repos = []
|
||||||
potentially_good_repos = []
|
potentially_good_repos = []
|
||||||
@ -131,7 +129,7 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument('csv_file', type=str, help='The csv file containing the projects (the results from clone_repos.py).')
|
parser.add_argument('csv_file', type=str, help='The csv file containing the projects (the results from clone_repos.py).')
|
||||||
parser.add_argument('-o', '--output', type=str, default="./dataset.json", help='The file in which the dataset will be contained. Default is "./dataset.json"')
|
parser.add_argument('-o', '--output', type=str, default="./dataset.json", help='The file in which the dataset will be contained. Default is "./dataset.json"')
|
||||||
parser.add_argument('-s', '--stats', type=str, help="The name of the output file from the stats_pull_requests.py. The stats file already knows which PRs are good (the ones with only 1 comment between two rounds of commits), so instead of going through all of PRs of a repo, we can fast-track using this. If the repo isn't in the stats file, we must go through each PR")
|
parser.add_argument('-s', '--stats', type=str, help="The name of the output file from the stats_pull_requests.py. The stats file already knows which PRs are good (the ones with only 1 comment between two rounds of commits), so instead of going through all of PRs of a repo, we can fast-track using this. If the repo isn't in the stats file, we must go through each PR")
|
||||||
parser.add_argument('-v', '--verbose', action='store_true', help='Prints the number of good projects.')
|
# parser.add_argument('-v', '--verbose', action='store_true', help='Prints the number of good projects.')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
g = Github(os.environ["GITHUB_AUTH_TOKEN_CRAB"])
|
g = Github(os.environ["GITHUB_AUTH_TOKEN_CRAB"])
|
||||||
@ -139,6 +137,6 @@ if __name__ == "__main__":
|
|||||||
dataset = Dataset()
|
dataset = Dataset()
|
||||||
try:
|
try:
|
||||||
# try and finally to save, regardless of an error occuring or the program finished correctly
|
# try and finally to save, regardless of an error occuring or the program finished correctly
|
||||||
process_repos(args.csv_file, args.stats, dataset, verbose=args.verbose)
|
process_repos(args.csv_file, args.stats, dataset)
|
||||||
finally:
|
finally:
|
||||||
dataset.to_json(args.output)
|
dataset.to_json(args.output)
|
||||||
|
Reference in New Issue
Block a user