mirror of
https://github.com/karma-riuk/crab.git
synced 2025-07-05 05:28:13 +02:00
we can now clean the dataset from useless entries
This commit is contained in:
20
dataset.py
20
dataset.py
@ -92,7 +92,12 @@ class Dataset:
|
|||||||
def __len__(self) -> int:
|
def __len__(self) -> int:
|
||||||
return sum(1 for entry in self.entries if entry.metadata.successful)
|
return sum(1 for entry in self.entries if entry.metadata.successful)
|
||||||
|
|
||||||
def to_json(self, filename: str, type_: OutputType = OutputType.FULL) -> None:
|
def to_json(
|
||||||
|
self,
|
||||||
|
filename: str,
|
||||||
|
type_: OutputType = OutputType.FULL,
|
||||||
|
remove_non_suggesting: bool = False,
|
||||||
|
) -> None:
|
||||||
"""Serialize the dataset to a JSON file"""
|
"""Serialize the dataset to a JSON file"""
|
||||||
|
|
||||||
entries_to_dump = self.entries
|
entries_to_dump = self.entries
|
||||||
@ -109,6 +114,12 @@ class Dataset:
|
|||||||
for entry in self.entries
|
for entry in self.entries
|
||||||
if entry.metadata.selection and entry.metadata.selection.diff_after_address_change
|
if entry.metadata.selection and entry.metadata.selection.diff_after_address_change
|
||||||
]
|
]
|
||||||
|
elif type_ == OutputType.FULL and remove_non_suggesting:
|
||||||
|
entries_to_dump = [
|
||||||
|
entry
|
||||||
|
for entry in self.entries
|
||||||
|
if entry.metadata.selection and entry.metadata.selection.comment_suggests_change
|
||||||
|
]
|
||||||
|
|
||||||
to_dump = Dataset(entries=entries_to_dump)
|
to_dump = Dataset(entries=entries_to_dump)
|
||||||
print(f"{len(entries_to_dump)} entries...", end=" ", flush=True)
|
print(f"{len(entries_to_dump)} entries...", end=" ", flush=True)
|
||||||
@ -194,6 +205,11 @@ if __name__ == "__main__":
|
|||||||
default=OutputType.FULL.value,
|
default=OutputType.FULL.value,
|
||||||
help="Type of output to generate",
|
help="Type of output to generate",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--remove-non-suggesting",
|
||||||
|
action="store_true",
|
||||||
|
help="Applies only when output type is full. When this flag is given, removes the entries that don't suggest change",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
dataset = Dataset.from_json(args.filename)
|
dataset = Dataset.from_json(args.filename)
|
||||||
@ -206,5 +222,5 @@ if __name__ == "__main__":
|
|||||||
print("Exiting without saving.")
|
print("Exiting without saving.")
|
||||||
exit(0)
|
exit(0)
|
||||||
print(f"Saving dataset to {args.output},", end=" ", flush=True)
|
print(f"Saving dataset to {args.output},", end=" ", flush=True)
|
||||||
dataset.to_json(args.output, OutputType(args.output_type))
|
dataset.to_json(args.output, OutputType(args.output_type), args.remove_non_suggesting)
|
||||||
print("Done")
|
print("Done")
|
||||||
|
Reference in New Issue
Block a user