now using normal names

This commit is contained in:
Karma Riuk
2025-03-31 15:32:18 +02:00
parent b482c35b90
commit 480dacea3e
2 changed files with 31 additions and 31 deletions

View File

@ -4,7 +4,7 @@ import json
# fmt: off # fmt: off
@dataclass @dataclass
class FileData_new: class FileData:
is_code_related: bool is_code_related: bool
coverage: Dict[str, float] # jacoco-report -> coverage coverage: Dict[str, float] # jacoco-report -> coverage
content_before_pr: str = "" content_before_pr: str = ""
@ -18,7 +18,7 @@ class Comment:
to: int to: int
@dataclass @dataclass
class Metadata_new: class Metadata:
repo: str # the name of the repo, with style XXX/YYY repo: str # the name of the repo, with style XXX/YYY
pr_number: int pr_number: int
pr_title: str pr_title: str
@ -31,9 +31,9 @@ class Metadata_new:
@dataclass @dataclass
class DatasetEntry_new: class DatasetEntry:
metadata: Metadata_new metadata: Metadata
files: Dict[str, FileData_new] # filename -> file data, files before the PR (before the first PR commits) files: Dict[str, FileData] # filename -> file data, files before the PR (before the first PR commits)
diffs_before: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment diffs_before: Dict[str, str] # filename -> diff, diffs between the opening of the PR and the comment
comments: List[Comment] comments: List[Comment]
diffs_after: Dict[str, str] # filename -> diff, changes after the comment diffs_after: Dict[str, str] # filename -> diff, changes after the comment
@ -41,8 +41,8 @@ class DatasetEntry_new:
# fmt: on # fmt: on
@dataclass @dataclass
class Dataset_new: class Dataset:
entries: List[DatasetEntry_new] = field(default_factory=list) entries: List[DatasetEntry] = field(default_factory=list)
def __len__(self) -> int: def __len__(self) -> int:
return sum(1 for entry in self.entries if entry.metadata.successful) return sum(1 for entry in self.entries if entry.metadata.successful)
@ -53,7 +53,7 @@ class Dataset_new:
json.dump(self, f, default=lambda o: o.__dict__, indent=4) json.dump(self, f, default=lambda o: o.__dict__, indent=4)
@staticmethod @staticmethod
def from_json(filename: str, keep_still_in_progress: bool = False) -> "Dataset_new": def from_json(filename: str, keep_still_in_progress: bool = False) -> "Dataset":
with open(filename, "r", encoding="utf-8") as f: with open(filename, "r", encoding="utf-8") as f:
print(f"Loading dataset from {filename}...", end="") print(f"Loading dataset from {filename}...", end="")
data = json.load(f) data = json.load(f)
@ -62,7 +62,7 @@ class Dataset_new:
entries = [] entries = []
for entry_data in data["entries"]: for entry_data in data["entries"]:
metadata_data = entry_data["metadata"] metadata_data = entry_data["metadata"]
metadata = Metadata_new(**metadata_data) metadata = Metadata(**metadata_data)
if ( if (
not keep_still_in_progress not keep_still_in_progress
@ -70,11 +70,11 @@ class Dataset_new:
): ):
continue continue
files = {fname: FileData_new(**fdata) for fname, fdata in entry_data["files"].items()} files = {fname: FileData(**fdata) for fname, fdata in entry_data["files"].items()}
comments = [Comment(**comment) for comment in entry_data["comments"]] comments = [Comment(**comment) for comment in entry_data["comments"]]
entry = DatasetEntry_new( entry = DatasetEntry(
metadata=metadata, metadata=metadata,
files=files, files=files,
diffs_before=entry_data["diffs_before"], diffs_before=entry_data["diffs_before"],
@ -83,4 +83,4 @@ class Dataset_new:
) )
entries.append(entry) entries.append(entry)
return Dataset_new(entries=entries) return Dataset(entries=entries)

View File

@ -12,10 +12,10 @@ from datetime import datetime
from dataset import ( from dataset import (
Comment, Comment,
Dataset_new, Dataset,
DatasetEntry_new, DatasetEntry,
FileData_new, FileData,
Metadata_new, Metadata,
) )
from errors import ( from errors import (
CantCheckoutCommitError, CantCheckoutCommitError,
@ -137,7 +137,7 @@ def try_read_file(fname: str) -> str:
return "File listed in PR is a directory (likely a submodule), to be ignored" return "File listed in PR is a directory (likely a submodule), to be ignored"
def get_files(pr: PullRequest, repo: Repository, repo_path: str) -> dict[str, FileData_new]: def get_files(pr: PullRequest, repo: Repository, repo_path: str) -> dict[str, FileData]:
ret = {} ret = {}
for file in pr.get_files(): for file in pr.get_files():
try: try:
@ -167,7 +167,7 @@ def get_files(pr: PullRequest, repo: Repository, repo_path: str) -> dict[str, Fi
checkout(repo_path, pr) checkout(repo_path, pr)
contents_after = try_read_file(os.path.join(repo_path, file.filename)) contents_after = try_read_file(os.path.join(repo_path, file.filename))
ret[file.filename] = FileData_new( ret[file.filename] = FileData(
is_code_related=file.filename.endswith('.java'), is_code_related=file.filename.endswith('.java'),
coverage={}, coverage={},
content_before_pr=contents_before, content_before_pr=contents_before,
@ -196,16 +196,16 @@ def get_comments(pr: PullRequest) -> list[Comment]:
def process_pull( def process_pull(
repo: Repository, repo: Repository,
pr: PullRequest, pr: PullRequest,
dataset: Dataset_new, dataset: Dataset,
repos_dir: str, repos_dir: str,
cache: dict[str, dict[int, DatasetEntry_new]] = {}, cache: dict[str, dict[int, DatasetEntry]] = {},
): ):
if pr.number in cache.get(repo.full_name, set()): if pr.number in cache.get(repo.full_name, set()):
dataset.entries.append(cache[repo.full_name][pr.number]) dataset.entries.append(cache[repo.full_name][pr.number])
return return
entry = DatasetEntry_new( entry = DatasetEntry(
metadata=Metadata_new( metadata=Metadata(
repo.full_name, repo.full_name,
pr.number, pr.number,
pr.title, pr.title,
@ -321,9 +321,9 @@ def process_pull(
def process_repo( def process_repo(
repo_name: str, repo_name: str,
dataset: Dataset_new, dataset: Dataset,
repos_dir: str, repos_dir: str,
cache: dict[str, dict[int, DatasetEntry_new]] = {}, cache: dict[str, dict[int, DatasetEntry]] = {},
): ):
repo = g.get_repo(repo_name) repo = g.get_repo(repo_name)
if repo.full_name in cache: if repo.full_name in cache:
@ -348,9 +348,9 @@ def process_repo(
def process_repos( def process_repos(
df: pd.DataFrame, df: pd.DataFrame,
dataset: Dataset_new, dataset: Dataset,
repos_dir: str, repos_dir: str,
cache: dict[str, dict[int, DatasetEntry_new]] = {}, cache: dict[str, dict[int, DatasetEntry]] = {},
): ):
""" """
Processes the repos in the given csv file, extracting the good ones and Processes the repos in the given csv file, extracting the good ones and
@ -378,9 +378,9 @@ def process_repos(
def only_inject_jacoco( def only_inject_jacoco(
dataset: Dataset_new, dataset: Dataset,
repos_dir: str, repos_dir: str,
cache: dict[str, dict[int, DatasetEntry_new]] = {}, cache: dict[str, dict[int, DatasetEntry]] = {},
): ):
n_successfull_injections = 0 n_successfull_injections = 0
n_tried_injections = 0 n_tried_injections = 0
@ -468,13 +468,13 @@ if __name__ == "__main__":
if args.only_repo is not None: if args.only_repo is not None:
df = df.loc[df["name"] == args.only_repo] df = df.loc[df["name"] == args.only_repo]
cache: dict[str, dict[int, DatasetEntry_new]] = defaultdict(dict) cache: dict[str, dict[int, DatasetEntry]] = defaultdict(dict)
if args.cache is not None: if args.cache is not None:
cache_dataset = Dataset_new.from_json(args.cache) cache_dataset = Dataset.from_json(args.cache)
for cache_entry in cache_dataset.entries: for cache_entry in cache_dataset.entries:
cache[cache_entry.metadata.repo][cache_entry.metadata.pr_number] = cache_entry cache[cache_entry.metadata.repo][cache_entry.metadata.pr_number] = cache_entry
dataset = Dataset_new() dataset = Dataset()
try: try:
if args.only_inject_jacoco: if args.only_inject_jacoco:
only_inject_jacoco(dataset, args.repos, cache) only_inject_jacoco(dataset, args.repos, cache)