we can now clean the dataset from useless entries

This commit is contained in:
Karma Riuk
2025-05-07 10:38:41 +02:00
parent 36b7dc5c02
commit 959184b2a8

View File

@ -92,7 +92,12 @@ class Dataset:
def __len__(self) -> int: def __len__(self) -> int:
return sum(1 for entry in self.entries if entry.metadata.successful) return sum(1 for entry in self.entries if entry.metadata.successful)
def to_json(self, filename: str, type_: OutputType = OutputType.FULL) -> None: def to_json(
self,
filename: str,
type_: OutputType = OutputType.FULL,
remove_non_suggesting: bool = False,
) -> None:
"""Serialize the dataset to a JSON file""" """Serialize the dataset to a JSON file"""
entries_to_dump = self.entries entries_to_dump = self.entries
@ -109,6 +114,12 @@ class Dataset:
for entry in self.entries for entry in self.entries
if entry.metadata.selection and entry.metadata.selection.diff_after_address_change if entry.metadata.selection and entry.metadata.selection.diff_after_address_change
] ]
elif type_ == OutputType.FULL and remove_non_suggesting:
entries_to_dump = [
entry
for entry in self.entries
if entry.metadata.selection and entry.metadata.selection.comment_suggests_change
]
to_dump = Dataset(entries=entries_to_dump) to_dump = Dataset(entries=entries_to_dump)
print(f"{len(entries_to_dump)} entries...", end=" ", flush=True) print(f"{len(entries_to_dump)} entries...", end=" ", flush=True)
@ -194,6 +205,11 @@ if __name__ == "__main__":
default=OutputType.FULL.value, default=OutputType.FULL.value,
help="Type of output to generate", help="Type of output to generate",
) )
parser.add_argument(
"--remove-non-suggesting",
action="store_true",
help="Applies only when output type is full. When this flag is given, removes the entries that don't suggest change",
)
args = parser.parse_args() args = parser.parse_args()
dataset = Dataset.from_json(args.filename) dataset = Dataset.from_json(args.filename)
@ -206,5 +222,5 @@ if __name__ == "__main__":
print("Exiting without saving.") print("Exiting without saving.")
exit(0) exit(0)
print(f"Saving dataset to {args.output},", end=" ", flush=True) print(f"Saving dataset to {args.output},", end=" ", flush=True)
dataset.to_json(args.output, OutputType(args.output_type)) dataset.to_json(args.output, OutputType(args.output_type), args.remove_non_suggesting)
print("Done") print("Done")