first draft of using cache to resume progress

2025-07-04 21:28:12 +02:00 · 2025-03-23 09:52:43 +01:00
parent 2481adc1bf
commit dc897ac375
2 changed files with 47 additions and 5 deletions
--- a/dataset.py
+++ b/dataset.py
@ -38,3 +38,32 @@ class Dataset:
        """Serialize the dataset to a JSON file"""
        with open(filename, "w", encoding="utf-8") as f:
            json.dump(self, f, default=lambda o: o.__dict__, indent=4)
+
+    @staticmethod
+    def from_json(filename: str) -> "Dataset":
+        with open(filename) as f:
+            data = json.load(f)
+
+        entries = []
+        for entry_data in data["entries"]:
+            metadata_data = entry_data["metadata"]
+            if "commented_file" not in metadata_data:
+                print(f"Warning: missing commented_file in metadata in entry {metadata_data['repo']}/{metadata_data['pr_number']}")
+                print(metadata_data.keys())
+            metadata = Metadata(**metadata_data)
+
+            files = {
+                fname: FileData(**fdata) for fname, fdata in entry_data["files"].items()
+            }
+
+            entry = DatasetEntry(
+                metadata=metadata,
+                files=files,
+                diffs_before=entry_data["diffs_before"],
+                comment=entry_data["comment"],
+                diffs_after=entry_data["diffs_after"]
+            )
+            entries.append(entry)
+
+        return Dataset(entries=entries)
+