From 959184b2a8995ccd158bdc03398febe2e50c8385 Mon Sep 17 00:00:00 2001 From: Karma Riuk Date: Wed, 7 May 2025 10:38:41 +0200 Subject: [PATCH] we can now clean the dataset from useless entries --- dataset.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/dataset.py b/dataset.py index ca719b9..2222196 100644 --- a/dataset.py +++ b/dataset.py @@ -92,7 +92,12 @@ class Dataset: def __len__(self) -> int: return sum(1 for entry in self.entries if entry.metadata.successful) - def to_json(self, filename: str, type_: OutputType = OutputType.FULL) -> None: + def to_json( + self, + filename: str, + type_: OutputType = OutputType.FULL, + remove_non_suggesting: bool = False, + ) -> None: """Serialize the dataset to a JSON file""" entries_to_dump = self.entries @@ -109,6 +114,12 @@ class Dataset: for entry in self.entries if entry.metadata.selection and entry.metadata.selection.diff_after_address_change ] + elif type_ == OutputType.FULL and remove_non_suggesting: + entries_to_dump = [ + entry + for entry in self.entries + if entry.metadata.selection and entry.metadata.selection.comment_suggests_change + ] to_dump = Dataset(entries=entries_to_dump) print(f"{len(entries_to_dump)} entries...", end=" ", flush=True) @@ -194,6 +205,11 @@ if __name__ == "__main__": default=OutputType.FULL.value, help="Type of output to generate", ) + parser.add_argument( + "--remove-non-suggesting", + action="store_true", + help="Applies only when output type is full. When this flag is given, removes the entries that don't suggest change", + ) args = parser.parse_args() dataset = Dataset.from_json(args.filename) @@ -206,5 +222,5 @@ if __name__ == "__main__": print("Exiting without saving.") exit(0) print(f"Saving dataset to {args.output},", end=" ", flush=True) - dataset.to_json(args.output, OutputType(args.output_type)) + dataset.to_json(args.output, OutputType(args.output_type), args.remove_non_suggesting) print("Done")